Search in sources :

Example 6 with Jerry

use of jodd.jerry.Jerry in project jodd by oblac.

the class StuckTest method testStuck.

@Test
public void testStuck() throws IOException {
    File file = new File(testDataRoot, "stuck.html.gz");
    InputStream in = new GZIPInputStream(new FileInputStream(file));
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    StreamUtil.copy(in, out);
    in.close();
    Jerry.JerryParser jerryParser = new Jerry.JerryParser();
    //		LagartoDOMBuilder lagartoDOMBuilder = (LagartoDOMBuilder) jerryParser.getDOMBuilder();
    //		lagartoDOMBuilder.setParsingErrorLogLevelName("ERROR");
    Jerry doc = jerryParser.parse(out.toString("UTF-8"));
    // parse
    try {
        doc.$("a").each(($this, index) -> {
            assertEquals("Go to Database Directory", $this.html().trim());
            return false;
        });
    } catch (StackOverflowError stackOverflowError) {
        fail("stack overflow!");
    }
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Jerry(jodd.jerry.Jerry) File(java.io.File) FileInputStream(java.io.FileInputStream) Test(org.junit.Test)

Example 7 with Jerry

use of jodd.jerry.Jerry in project jodd by oblac.

the class ParsingProblemsTest method testPreserveCC.

@Test
public void testPreserveCC() throws IOException {
    File file = new File(testDataRoot, "preserve-cc.html");
    String expectedResult = FileUtil.readString(file);
    Jerry.JerryParser jerryParser = new Jerry.JerryParser();
    ((LagartoDOMBuilder) jerryParser.getDOMBuilder()).enableHtmlMode();
    ((LagartoDOMBuilder) jerryParser.getDOMBuilder()).getConfig().setEnableConditionalComments(false);
    Jerry jerry = jerryParser.parse(expectedResult);
    String result = jerry.html();
    assertEquals(expectedResult, result);
}
Also used : LagartoDOMBuilder(jodd.lagarto.dom.LagartoDOMBuilder) Jerry(jodd.jerry.Jerry) File(java.io.File) Test(org.junit.Test)

Example 8 with Jerry

use of jodd.jerry.Jerry in project ignite by apache.

the class GridJavadocAntTask method processFile.

/**
     * Processes file (validating and cleaning up Javadoc's HTML).
     *
     * @param file File to cleanup.
     * @throws IOException Thrown in case of any I/O error.
     * @throws IllegalArgumentException In JavaDoc HTML validation failed.
     */
private void processFile(String file) throws IOException {
    assert file != null;
    String fileContent = readFileToString(file, Charset.forName("UTF-8"));
    if (verify) {
        // Parse HTML.
        Jerry doc = Jerry.jerry(fileContent);
        if (file.endsWith("overview-summary.html")) {
            // Try to find Other Packages section.
            Jerry otherPackages = doc.find("div.contentContainer table.overviewSummary caption span:contains('Other Packages')");
            if (otherPackages.size() > 0)
                throw new IllegalArgumentException("'Other Packages' section should not be present, " + "all packages should have corresponding documentation groups: " + file);
        } else if (!isViewHtml(file)) {
            // Try to find a class description block.
            Jerry descBlock = doc.find("div.contentContainer div.description ul.blockList li.blockList div.block");
            if (descBlock.size() == 0)
                throw new IllegalArgumentException("Class doesn't have description in file: " + file);
        }
    }
    GridJavadocCharArrayLexReader lexer = new GridJavadocCharArrayLexReader(fileContent.toCharArray());
    Collection<GridJavadocToken> toks = new ArrayList<>();
    StringBuilder tokBuf = new StringBuilder();
    int ch;
    while ((ch = lexer.read()) != GridJavadocCharArrayLexReader.EOF) {
        // Instruction, tag or comment.
        if (ch == '<') {
            if (tokBuf.length() > 0) {
                toks.add(new GridJavadocToken(GridJavadocTokenType.TOKEN_TEXT, tokBuf.toString()));
                tokBuf.setLength(0);
            }
            tokBuf.append('<');
            ch = lexer.read();
            if (ch == GridJavadocCharArrayLexReader.EOF)
                throw new IOException("Unexpected EOF: " + file);
            // Instruction or comment.
            if (ch == '!') {
                for (; ch != GridJavadocCharArrayLexReader.EOF && ch != '>'; ch = lexer.read()) tokBuf.append((char) ch);
                if (ch == GridJavadocCharArrayLexReader.EOF)
                    throw new IOException("Unexpected EOF: " + file);
                assert ch == '>';
                tokBuf.append('>');
                String val = tokBuf.toString();
                toks.add(new GridJavadocToken(val.startsWith("<!--") ? GridJavadocTokenType.TOKEN_COMM : GridJavadocTokenType.TOKEN_INSTR, val));
                tokBuf.setLength(0);
            } else // Tag.
            {
                for (; ch != GridJavadocCharArrayLexReader.EOF && ch != '>'; ch = lexer.read()) tokBuf.append((char) ch);
                if (ch == GridJavadocCharArrayLexReader.EOF)
                    throw new IOException("Unexpected EOF: " + file);
                assert ch == '>';
                tokBuf.append('>');
                if (tokBuf.length() <= 2)
                    throw new IOException("Invalid HTML in [file=" + file + ", html=" + tokBuf + ']');
                String val = tokBuf.toString();
                toks.add(new GridJavadocToken(val.startsWith("</") ? GridJavadocTokenType.TOKEN_CLOSE_TAG : GridJavadocTokenType.TOKEN_OPEN_TAG, val));
                tokBuf.setLength(0);
            }
        } else
            tokBuf.append((char) ch);
    }
    if (tokBuf.length() > 0)
        toks.add(new GridJavadocToken(GridJavadocTokenType.TOKEN_TEXT, tokBuf.toString()));
    for (GridJavadocToken tok : toks) {
        String val = tok.value();
        switch(tok.type()) {
            case TOKEN_COMM:
                {
                    break;
                }
            case TOKEN_OPEN_TAG:
                {
                    tok.update(fixColors(tok.value()));
                    break;
                }
            case TOKEN_CLOSE_TAG:
                {
                    if ("</head>".equalsIgnoreCase(val))
                        tok.update("<link rel='shortcut icon' href='https://ignite.apache.org/favicon.ico'/>\n" + "<link type='text/css' rel='stylesheet' href='" + SH_URL + "/styles/shCore.css'/>\n" + "<link type='text/css' rel='stylesheet' href='" + SH_URL + "/styles/shThemeDefault.css'/>\n" + "<script type='text/javascript' src='" + SH_URL + "/scripts/shCore.js'></script>\n" + "<script type='text/javascript' src='" + SH_URL + "/scripts/shLegacy.js'></script>\n" + "<script type='text/javascript' src='" + SH_URL + "/scripts/shBrushJava.js'></script>\n" + "<script type='text/javascript' src='" + SH_URL + "/scripts/shBrushPlain.js'></script>\n" + "<script type='text/javascript' src='" + SH_URL + "/scripts/shBrushJScript.js'></script>\n" + "<script type='text/javascript' src='" + SH_URL + "/scripts/shBrushBash.js'></script>\n" + "<script type='text/javascript' src='" + SH_URL + "/scripts/shBrushXml.js'></script>\n" + "<script type='text/javascript' src='" + SH_URL + "/scripts/shBrushScala.js'></script>\n" + "<script type='text/javascript' src='" + SH_URL + "/scripts/shBrushGroovy.js'></script>\n" + "</head>\n");
                    else if ("</body>".equalsIgnoreCase(val))
                        tok.update("<!--FOOTER-->" + "<script type='text/javascript'>" + "SyntaxHighlighter.all();" + "dp.SyntaxHighlighter.HighlightAll('code');" + "!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');" + "</script>\n" + "</body>\n");
                    break;
                }
            case TOKEN_INSTR:
                {
                    break;
                }
            case TOKEN_TEXT:
                {
                    tok.update(fixColors(val));
                    break;
                }
            default:
                assert false;
        }
    }
    StringBuilder buf = new StringBuilder();
    StringBuilder tmp = new StringBuilder();
    boolean inPre = false;
    // Second pass for unstructured replacements.
    for (GridJavadocToken tok : toks) {
        String val = tok.value();
        switch(tok.type()) {
            case TOKEN_INSTR:
            case TOKEN_TEXT:
            case TOKEN_COMM:
                {
                    tmp.append(val);
                    break;
                }
            case TOKEN_OPEN_TAG:
                {
                    if (val.toLowerCase().startsWith("<pre name=")) {
                        inPre = true;
                        buf.append(fixBrackets(tmp.toString()));
                        tmp.setLength(0);
                    }
                    tmp.append(val);
                    break;
                }
            case TOKEN_CLOSE_TAG:
                {
                    if (val.toLowerCase().startsWith("</pre") && inPre) {
                        inPre = false;
                        buf.append(tmp.toString());
                        tmp.setLength(0);
                    }
                    tmp.append(val);
                    break;
                }
            default:
                assert false;
        }
    }
    String s = buf.append(fixBrackets(tmp.toString())).toString();
    s = fixExternalLinks(s);
    s = fixDeprecated(s);
    s = fixNullable(s);
    s = fixTodo(s);
    replaceFile(file, s);
}
Also used : ArrayList(java.util.ArrayList) IOException(java.io.IOException) Jerry(jodd.jerry.Jerry)

Aggregations

Jerry (jodd.jerry.Jerry)8 File (java.io.File)7 Test (org.junit.Test)7 LagartoDOMBuilder (jodd.lagarto.dom.LagartoDOMBuilder)3 IOException (java.io.IOException)2 Document (jodd.lagarto.dom.Document)2 Element (jodd.lagarto.dom.Element)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 FileInputStream (java.io.FileInputStream)1 InputStream (java.io.InputStream)1 ArrayList (java.util.ArrayList)1 GZIPInputStream (java.util.zip.GZIPInputStream)1