Search in sources :

Example 16 with Parser

use of org.ccil.cowan.tagsoup.Parser in project Lucee by lucee.

the class XMLUtilImpl method parse.

/**
 * parse XML/HTML String to a XML DOM representation
 * @param xml XML InputSource
 * @param isHtml is a HTML or XML Object
 * @return parsed Document
 * @throws SAXException
 * @throws IOException
 */
public final Document parse(InputSource xml, InputSource validator, boolean isHtml) throws SAXException, IOException {
    if (!isHtml) {
        DocumentBuilderFactory factory = newDocumentBuilderFactory();
        // print.o(factory);
        if (validator == null) {
            setAttributeEL(factory, NON_VALIDATING_DTD_EXTERNAL, Boolean.FALSE);
            setAttributeEL(factory, NON_VALIDATING_DTD_GRAMMAR, Boolean.FALSE);
        } else {
            setAttributeEL(factory, VALIDATION_SCHEMA, Boolean.TRUE);
            setAttributeEL(factory, VALIDATION_SCHEMA_FULL_CHECKING, Boolean.TRUE);
        }
        factory.setNamespaceAware(true);
        factory.setValidating(validator != null);
        try {
            DocumentBuilder builder = factory.newDocumentBuilder();
            builder.setEntityResolver(new XMLEntityResolverDefaultHandler(validator));
            builder.setErrorHandler(new ThrowingErrorHandler(true, true, false));
            return builder.parse(xml);
        } catch (ParserConfigurationException e) {
            throw new SAXException(e);
        }
    /*DOMParser parser = new DOMParser();
	        print.out("parse");
	        parser.setEntityResolver(new XMLEntityResolverDefaultHandler(validator));
	        parser.parse(xml);
	        return parser.getDocument();*/
    }
    XMLReader reader = new Parser();
    reader.setFeature(Parser.namespacesFeature, true);
    reader.setFeature(Parser.namespacePrefixesFeature, true);
    try {
        Transformer transformer = TransformerFactory.newInstance().newTransformer();
        DOMResult result = new DOMResult();
        transformer.transform(new SAXSource(reader, xml), result);
        return getDocument(result.getNode());
    } catch (Exception e) {
        throw new SAXException(e);
    }
}
Also used : DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) Transformer(javax.xml.transform.Transformer) DOMResult(javax.xml.transform.dom.DOMResult) SAXSource(javax.xml.transform.sax.SAXSource) DocumentBuilder(javax.xml.parsers.DocumentBuilder) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException) XMLReader(org.xml.sax.XMLReader) PageException(lucee.runtime.exp.PageException) SAXException(org.xml.sax.SAXException) TransformerException(javax.xml.transform.TransformerException) IOException(java.io.IOException) SAXParseException(org.xml.sax.SAXParseException) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException) SAXException(org.xml.sax.SAXException) Parser(org.ccil.cowan.tagsoup.Parser)

Example 17 with Parser

use of org.ccil.cowan.tagsoup.Parser in project knime-core by knime.

the class TipsAndNewsInjector method prepareData.

/**
 * {@inheritDoc}
 */
@Override
protected void prepareData() throws Exception {
    HttpURLConnection conn = (HttpURLConnection) m_tipsAndNewsUrl.openConnection();
    conn.setReadTimeout(5000);
    conn.setConnectTimeout(2000);
    conn.connect();
    // parse tips&tricks from webpage
    XMLReader reader = new Parser();
    reader.setFeature(Parser.namespacesFeature, false);
    reader.setFeature(Parser.namespacePrefixesFeature, false);
    final ClassLoader cl = Thread.currentThread().getContextClassLoader();
    Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
    DOMResult res = new DOMResult();
    try {
        Transformer transformer = m_transformerFactory.newTransformer();
        transformer.transform(new SAXSource(reader, new InputSource(conn.getInputStream())), res);
    } finally {
        Thread.currentThread().setContextClassLoader(cl);
    }
    conn.disconnect();
    XPath xpath = m_xpathFactory.newXPath();
    Node news = (Node) xpath.evaluate("//div[@id='knime-client-news']", res.getNode(), XPathConstants.NODE);
    m_news = checkIfNewsExist(news);
    if (m_news != null) {
        fixRelativeURLs(m_news, xpath);
    }
    m_tips = (Node) xpath.evaluate("//div[@id='col-right']//div[@class='contentWrapper']//div[@class='view-content']", res.getNode(), XPathConstants.NODE);
    if (m_tips != null) {
        fixRelativeURLs(m_tips, xpath);
    }
    m_newsGraphic = (Node) xpath.evaluate("//div[@id='news-graphic']", res.getNode(), XPathConstants.NODE);
    if (m_newsGraphic != null) {
        fixRelativeURLs(m_newsGraphic, xpath);
    }
}
Also used : XPath(javax.xml.xpath.XPath) InputSource(org.xml.sax.InputSource) HttpURLConnection(java.net.HttpURLConnection) DOMResult(javax.xml.transform.dom.DOMResult) Transformer(javax.xml.transform.Transformer) SAXSource(javax.xml.transform.sax.SAXSource) Node(org.w3c.dom.Node) XMLReader(org.xml.sax.XMLReader) Parser(org.ccil.cowan.tagsoup.Parser)

Example 18 with Parser

use of org.ccil.cowan.tagsoup.Parser in project ForPDA by RadiationX.

the class HtmlToSpannedConverter method fromHtml.

/**
 * Returns displayable styled text from the provided HTML string. Any <img> tags in the
 * HTML will use the specified ImageGetter to request a representation of the image (use null
 * if you don't want this) and the specified TagHandler to handle unknown tags (specify null if
 * you don't want this).
 * <p>
 * <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
 */
public static Spanned fromHtml(String source, int flags, ImageGetter imageGetter, TagHandler tagHandler) {
    Parser parser = new Parser();
    try {
        parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
    } catch (org.xml.sax.SAXNotRecognizedException e) {
        // Should not happen.
        throw new RuntimeException(e);
    } catch (org.xml.sax.SAXNotSupportedException e) {
        // Should not happen.
        throw new RuntimeException(e);
    }
    HtmlToSpannedConverter converter = new HtmlToSpannedConverter(source, imageGetter, tagHandler, parser, flags);
    return converter.convert();
}
Also used : Parser(org.ccil.cowan.tagsoup.Parser)

Aggregations

Parser (org.ccil.cowan.tagsoup.Parser)18 InputSource (org.xml.sax.InputSource)5 IOException (java.io.IOException)4 SAXException (org.xml.sax.SAXException)4 XMLReader (org.xml.sax.XMLReader)4 HtmlParser (org.apache.sling.commons.html.HtmlParser)3 HttpURLConnection (java.net.HttpURLConnection)2 Transformer (javax.xml.transform.Transformer)2 DOMResult (javax.xml.transform.dom.DOMResult)2 SAXSource (javax.xml.transform.sax.SAXSource)2 BufferedInputStream (java.io.BufferedInputStream)1 File (java.io.File)1 FileWriter (java.io.FileWriter)1 StringWriter (java.io.StringWriter)1 URL (java.net.URL)1 DocumentBuilder (javax.xml.parsers.DocumentBuilder)1 DocumentBuilderFactory (javax.xml.parsers.DocumentBuilderFactory)1 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)1 TransformerException (javax.xml.transform.TransformerException)1 XPath (javax.xml.xpath.XPath)1