Search in sources :

Example 1 with Parser

use of org.ccil.cowan.tagsoup.Parser in project android_frameworks_base by ParanoidAndroid.

the class HtmlToSpannedConverter method fromHtml.

/**
     * Returns displayable styled text from the provided HTML string.
     * Any <img> tags in the HTML will use the specified ImageGetter
     * to request a representation of the image (use null if you don't
     * want this) and the specified TagHandler to handle unknown tags
     * (specify null if you don't want this).
     *
     * <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
     */
public static Spanned fromHtml(String source, ImageGetter imageGetter, TagHandler tagHandler) {
    Parser parser = new Parser();
    try {
        parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
    } catch (org.xml.sax.SAXNotRecognizedException e) {
        // Should not happen.
        throw new RuntimeException(e);
    } catch (org.xml.sax.SAXNotSupportedException e) {
        // Should not happen.
        throw new RuntimeException(e);
    }
    HtmlToSpannedConverter converter = new HtmlToSpannedConverter(source, imageGetter, tagHandler, parser);
    return converter.convert();
}
Also used : Parser(org.ccil.cowan.tagsoup.Parser)

Example 2 with Parser

use of org.ccil.cowan.tagsoup.Parser in project XobotOS by xamarin.

the class HtmlToSpannedConverter method fromHtml.

/**
     * Returns displayable styled text from the provided HTML string.
     * Any &lt;img&gt; tags in the HTML will use the specified ImageGetter
     * to request a representation of the image (use null if you don't
     * want this) and the specified TagHandler to handle unknown tags
     * (specify null if you don't want this).
     *
     * <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
     */
public static Spanned fromHtml(String source, ImageGetter imageGetter, TagHandler tagHandler) {
    Parser parser = new Parser();
    try {
        parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
    } catch (org.xml.sax.SAXNotRecognizedException e) {
        // Should not happen.
        throw new RuntimeException(e);
    } catch (org.xml.sax.SAXNotSupportedException e) {
        // Should not happen.
        throw new RuntimeException(e);
    }
    HtmlToSpannedConverter converter = new HtmlToSpannedConverter(source, imageGetter, tagHandler, parser);
    return converter.convert();
}
Also used : Parser(org.ccil.cowan.tagsoup.Parser)

Example 3 with Parser

use of org.ccil.cowan.tagsoup.Parser in project camel by apache.

the class ManualGenerator method grabBodyContent.

private String grabBodyContent() throws MalformedURLException, IOException {
    URL url = new URL(page);
    File file = new File(targetDir, ".manualCache-" + url.getFile().substring(1));
    try {
        HttpURLConnection con = (HttpURLConnection) url.openConnection();
        XMLReader parser = new Parser();
        parser.setFeature(Parser.namespacesFeature, false);
        parser.setFeature(Parser.namespacePrefixesFeature, false);
        parser.setProperty(Parser.schemaProperty, new org.ccil.cowan.tagsoup.HTMLSchema() {

            {
                //problem with nested lists that the confluence {toc} macro creates
                elementType("ul", M_LI, M_BLOCK | M_LI, 0);
            }
        });
        StringWriter w = new StringWriter();
        XMLWriter xmlWriter = new XMLWriter(w) {

            int inDiv = Integer.MAX_VALUE;

            int count;

            public void characters(char[] ch, int start, int len) throws SAXException {
                if (inDiv <= count) {
                    super.characters(ch, start, len);
                }
            }

            public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
                count++;
                if ("div".equalsIgnoreCase(qName) && "wiki-content maincontent".equalsIgnoreCase(atts.getValue("class"))) {
                    inDiv = count;
                }
                if (inDiv <= count) {
                    super.startElement(uri, localName, qName, atts);
                }
            }

            public void endElement(String uri, String localName, String qName) throws SAXException {
                if (inDiv <= count) {
                    super.endElement(uri, localName, qName);
                }
                count--;
                if (inDiv > count) {
                    inDiv = Integer.MAX_VALUE;
                }
            }
        };
        xmlWriter.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, "yes");
        xmlWriter.setOutputProperty(XMLWriter.METHOD, "html");
        parser.setContentHandler(xmlWriter);
        long date = con.getLastModified();
        parser.parse(new InputSource(new BufferedInputStream(con.getInputStream())));
        FileWriter writer = new FileWriter(file);
        writer.write(Long.toString(date));
        writer.close();
        return w.toString();
    } catch (Throwable e) {
        e.printStackTrace();
        throw new RuntimeException("Failed", e);
    }
}
Also used : InputSource(org.xml.sax.InputSource) FileWriter(java.io.FileWriter) Attributes(org.xml.sax.Attributes) XMLWriter(org.ccil.cowan.tagsoup.XMLWriter) URL(java.net.URL) Parser(org.ccil.cowan.tagsoup.Parser) HttpURLConnection(java.net.HttpURLConnection) StringWriter(java.io.StringWriter) BufferedInputStream(java.io.BufferedInputStream) File(java.io.File) XMLReader(org.xml.sax.XMLReader)

Example 4 with Parser

use of org.ccil.cowan.tagsoup.Parser in project camel by apache.

the class TidyMarkupDataFormat method createTagSoupParser.

/**
     * Create the tagSoup Parser
     */
protected XMLReader createTagSoupParser() throws CamelException {
    XMLReader reader = new Parser();
    try {
        reader.setFeature(Parser.namespacesFeature, false);
        reader.setFeature(Parser.namespacePrefixesFeature, false);
        if (getParserFeatures() != null) {
            for (Entry<String, Boolean> e : getParserFeatures().entrySet()) {
                reader.setFeature(e.getKey(), e.getValue());
            }
        }
        if (getParserProperties() != null) {
            for (Entry<String, Object> e : getParserProperties().entrySet()) {
                reader.setProperty(e.getKey(), e.getValue());
            }
        }
        /*
             * default the schema to HTML
             */
        if (this.getParsingSchema() != null) {
            reader.setProperty(Parser.schemaProperty, getParsingSchema());
        }
    } catch (Exception e) {
        throw new IllegalArgumentException("Problem configuring the parser", e);
    }
    return reader;
}
Also used : XMLReader(org.xml.sax.XMLReader) CamelException(org.apache.camel.CamelException) Parser(org.ccil.cowan.tagsoup.Parser)

Example 5 with Parser

use of org.ccil.cowan.tagsoup.Parser in project platform_frameworks_base by android.

the class HtmlToSpannedConverter method fromHtml.

/**
     * Returns displayable styled text from the provided HTML string. Any &lt;img&gt; tags in the
     * HTML will use the specified ImageGetter to request a representation of the image (use null
     * if you don't want this) and the specified TagHandler to handle unknown tags (specify null if
     * you don't want this).
     *
     * <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
     */
public static Spanned fromHtml(String source, int flags, ImageGetter imageGetter, TagHandler tagHandler) {
    Parser parser = new Parser();
    try {
        parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
    } catch (org.xml.sax.SAXNotRecognizedException e) {
        // Should not happen.
        throw new RuntimeException(e);
    } catch (org.xml.sax.SAXNotSupportedException e) {
        // Should not happen.
        throw new RuntimeException(e);
    }
    HtmlToSpannedConverter converter = new HtmlToSpannedConverter(source, imageGetter, tagHandler, parser, flags);
    return converter.convert();
}
Also used : Parser(org.ccil.cowan.tagsoup.Parser)

Aggregations

Parser (org.ccil.cowan.tagsoup.Parser)18 InputSource (org.xml.sax.InputSource)5 IOException (java.io.IOException)4 SAXException (org.xml.sax.SAXException)4 XMLReader (org.xml.sax.XMLReader)4 HtmlParser (org.apache.sling.commons.html.HtmlParser)3 HttpURLConnection (java.net.HttpURLConnection)2 Transformer (javax.xml.transform.Transformer)2 DOMResult (javax.xml.transform.dom.DOMResult)2 SAXSource (javax.xml.transform.sax.SAXSource)2 BufferedInputStream (java.io.BufferedInputStream)1 File (java.io.File)1 FileWriter (java.io.FileWriter)1 StringWriter (java.io.StringWriter)1 URL (java.net.URL)1 DocumentBuilder (javax.xml.parsers.DocumentBuilder)1 DocumentBuilderFactory (javax.xml.parsers.DocumentBuilderFactory)1 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)1 TransformerException (javax.xml.transform.TransformerException)1 XPath (javax.xml.xpath.XPath)1