Search in sources :

Example 1 with Tidy

use of org.w3c.tidy.Tidy in project jmeter by apache.

the class XPathUtil method makeTidyParser.

/**
     * Create a Tidy parser with the specified settings.
     *
     * @param quiet - set the Tidy quiet flag?
     * @param showWarnings - show Tidy warnings?
     * @param isXml - treat the content as XML?
     * @param stringWriter - if non-null, use this for Tidy errorOutput
     * @return the Tidy parser
     */
public static Tidy makeTidyParser(boolean quiet, boolean showWarnings, boolean isXml, StringWriter stringWriter) {
    Tidy tidy = new Tidy();
    tidy.setInputEncoding(StandardCharsets.UTF_8.name());
    tidy.setOutputEncoding(StandardCharsets.UTF_8.name());
    tidy.setQuiet(quiet);
    tidy.setShowWarnings(showWarnings);
    tidy.setMakeClean(true);
    tidy.setXmlTags(isXml);
    if (stringWriter != null) {
        tidy.setErrout(new PrintWriter(stringWriter));
    }
    return tidy;
}
Also used : Tidy(org.w3c.tidy.Tidy) PrintWriter(java.io.PrintWriter)

Example 2 with Tidy

use of org.w3c.tidy.Tidy in project jmeter by apache.

the class JTidyHTMLParser method getTidyParser.

/**
     * Returns <code>tidy</code> as HTML parser.
     *
     * @return a <code>tidy</code> HTML parser
     */
private static Tidy getTidyParser(String encoding) {
    log.debug("Start : getParser");
    Tidy tidy = new Tidy();
    tidy.setInputEncoding(encoding);
    tidy.setOutputEncoding(StandardCharsets.UTF_8.name());
    tidy.setQuiet(true);
    tidy.setShowWarnings(false);
    if (log.isDebugEnabled()) {
        log.debug("getParser : tidy parser created - " + tidy);
    }
    log.debug("End   : getParser");
    return tidy;
}
Also used : Tidy(org.w3c.tidy.Tidy)

Example 3 with Tidy

use of org.w3c.tidy.Tidy in project hudson-2.x by hudson.

the class JDKInstaller method locateStage1.

// dom4j doesn't do generics, apparently... should probably switch to XOM
@SuppressWarnings("unchecked")
private HttpURLConnection locateStage1(Platform platform, CPU cpu) throws IOException {
    URL url = new URL("https://cds.sun.com/is-bin/INTERSHOP.enfinity/WFS/CDS-CDS_Developer-Site/en_US/-/USD/ViewProductDetail-Start?ProductRef=" + id);
    String cookie;
    Element form;
    try {
        HttpURLConnection con = (HttpURLConnection) ProxyConfiguration.open(url);
        cookie = con.getHeaderField("Set-Cookie");
        LOGGER.fine("Cookie=" + cookie);
        Tidy tidy = new Tidy();
        tidy.setErrout(new PrintWriter(new NullWriter()));
        DOMReader domReader = new DOMReader();
        Document dom = domReader.read(tidy.parseDOM(con.getInputStream(), null));
        form = null;
        for (Element e : (List<Element>) dom.selectNodes("//form")) {
            String action = e.attributeValue("action");
            LOGGER.fine("Found form:" + action);
            if (action.contains("ViewFilteredProducts")) {
                form = e;
                break;
            }
        }
    } catch (IOException e) {
        throw new IOException2("Failed to access " + url, e);
    }
    url = new URL(form.attributeValue("action"));
    try {
        HttpURLConnection con = (HttpURLConnection) ProxyConfiguration.open(url);
        con.setRequestMethod("POST");
        con.setDoOutput(true);
        con.setRequestProperty("Cookie", cookie);
        con.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
        PrintStream os = new PrintStream(con.getOutputStream());
        // select platform
        String primary = null, secondary = null;
        Element p = (Element) form.selectSingleNode(".//select[@id='dnld_platform']");
        for (Element opt : (List<Element>) p.elements("option")) {
            String value = opt.attributeValue("value");
            String vcap = value.toUpperCase(Locale.ENGLISH);
            if (!platform.is(vcap))
                continue;
            switch(cpu.accept(vcap)) {
                case PRIMARY:
                    primary = value;
                    break;
                case SECONDARY:
                    secondary = value;
                    break;
                case UNACCEPTABLE:
                    break;
            }
        }
        if (primary == null)
            primary = secondary;
        if (primary == null)
            throw new AbortException("Couldn't find the right download for " + platform + " and " + cpu + " combination");
        os.print(p.attributeValue("name") + '=' + primary);
        LOGGER.fine("Platform choice:" + primary);
        // select language
        Element l = (Element) form.selectSingleNode(".//select[@id='dnld_language']");
        if (l != null) {
            os.print("&" + l.attributeValue("name") + "=" + l.element("option").attributeValue("value"));
        }
        // the rest
        for (Element e : (List<Element>) form.selectNodes(".//input")) {
            os.print('&');
            os.print(e.attributeValue("name"));
            os.print('=');
            String value = e.attributeValue("value");
            if (value == null)
                // assume this is a checkbox
                os.print("on");
            else
                os.print(URLEncoder.encode(value, "UTF-8"));
        }
        os.close();
        return con;
    } catch (IOException e) {
        throw new IOException2("Failed to access " + url, e);
    }
}
Also used : PrintStream(java.io.PrintStream) Element(org.dom4j.Element) IOException(java.io.IOException) Document(org.dom4j.Document) Tidy(org.w3c.tidy.Tidy) URL(java.net.URL) NullWriter(org.apache.commons.io.output.NullWriter) DOMReader(org.dom4j.io.DOMReader) HttpURLConnection(java.net.HttpURLConnection) ArrayList(java.util.ArrayList) List(java.util.List) IOException2(hudson.util.IOException2) PrintWriter(java.io.PrintWriter) AbortException(hudson.AbortException)

Example 4 with Tidy

use of org.w3c.tidy.Tidy in project jmeter by apache.

the class XPathUtil method tidyDoc.

/**
     * Create a document using Tidy
     *
     * @param stream - input
     * @param quiet - set Tidy quiet?
     * @param showWarnings - show Tidy warnings?
     * @param report_errors - log errors and throw TidyException?
     * @param isXML - treat document as XML?
     * @param out OutputStream, null if no output required
     * @return the document
     *
     * @throws TidyException if a ParseError is detected and report_errors is true
     */
private static Document tidyDoc(InputStream stream, boolean quiet, boolean showWarnings, boolean report_errors, boolean isXML, OutputStream out) throws TidyException {
    StringWriter sw = new StringWriter();
    Tidy tidy = makeTidyParser(quiet, showWarnings, isXML, sw);
    Document doc = tidy.parseDOM(stream, out);
    doc.normalize();
    if (tidy.getParseErrors() > 0) {
        if (report_errors) {
            log.error("TidyException: {}", sw);
            throw new TidyException(tidy.getParseErrors(), tidy.getParseWarnings());
        }
        log.warn("Tidy errors: {}", sw);
    }
    return doc;
}
Also used : StringWriter(java.io.StringWriter) Document(org.w3c.dom.Document) Tidy(org.w3c.tidy.Tidy)

Example 5 with Tidy

use of org.w3c.tidy.Tidy in project jmeter by apache.

the class HtmlParsingUtils method getParser.

/**
     * Returns <code>tidy</code> as HTML parser.
     *
     * @return a <code>tidy</code> HTML parser
     */
public static Tidy getParser() {
    log.debug("Start : getParser1");
    Tidy tidy = new Tidy();
    tidy.setInputEncoding(StandardCharsets.UTF_8.name());
    tidy.setOutputEncoding(StandardCharsets.UTF_8.name());
    tidy.setQuiet(true);
    tidy.setShowWarnings(false);
    if (log.isDebugEnabled()) {
        log.debug("getParser1 : tidy parser created - " + tidy);
    }
    log.debug("End : getParser1");
    return tidy;
}
Also used : Tidy(org.w3c.tidy.Tidy)

Aggregations

Tidy (org.w3c.tidy.Tidy)7 PrintWriter (java.io.PrintWriter)3 StringWriter (java.io.StringWriter)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 IOException (java.io.IOException)2 AbortException (hudson.AbortException)1 IOException2 (hudson.util.IOException2)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 PrintStream (java.io.PrintStream)1 HttpURLConnection (java.net.HttpURLConnection)1 URL (java.net.URL)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 JPanel (javax.swing.JPanel)1 NullWriter (org.apache.commons.io.output.NullWriter)1 Document (org.dom4j.Document)1 Element (org.dom4j.Element)1 DOMReader (org.dom4j.io.DOMReader)1 Document (org.w3c.dom.Document)1 Node (org.w3c.tidy.Node)1