use of org.ccil.cowan.tagsoup.Parser in project Lucee by lucee.
the class XMLUtilImpl method parse.
/**
* parse XML/HTML String to a XML DOM representation
* @param xml XML InputSource
* @param isHtml is a HTML or XML Object
* @return parsed Document
* @throws SAXException
* @throws IOException
*/
public final Document parse(InputSource xml, InputSource validator, boolean isHtml) throws SAXException, IOException {
if (!isHtml) {
DocumentBuilderFactory factory = newDocumentBuilderFactory();
// print.o(factory);
if (validator == null) {
setAttributeEL(factory, NON_VALIDATING_DTD_EXTERNAL, Boolean.FALSE);
setAttributeEL(factory, NON_VALIDATING_DTD_GRAMMAR, Boolean.FALSE);
} else {
setAttributeEL(factory, VALIDATION_SCHEMA, Boolean.TRUE);
setAttributeEL(factory, VALIDATION_SCHEMA_FULL_CHECKING, Boolean.TRUE);
}
factory.setNamespaceAware(true);
factory.setValidating(validator != null);
try {
DocumentBuilder builder = factory.newDocumentBuilder();
builder.setEntityResolver(new XMLEntityResolverDefaultHandler(validator));
builder.setErrorHandler(new ThrowingErrorHandler(true, true, false));
return builder.parse(xml);
} catch (ParserConfigurationException e) {
throw new SAXException(e);
}
/*DOMParser parser = new DOMParser();
print.out("parse");
parser.setEntityResolver(new XMLEntityResolverDefaultHandler(validator));
parser.parse(xml);
return parser.getDocument();*/
}
XMLReader reader = new Parser();
reader.setFeature(Parser.namespacesFeature, true);
reader.setFeature(Parser.namespacePrefixesFeature, true);
try {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
DOMResult result = new DOMResult();
transformer.transform(new SAXSource(reader, xml), result);
return getDocument(result.getNode());
} catch (Exception e) {
throw new SAXException(e);
}
}
use of org.ccil.cowan.tagsoup.Parser in project knime-core by knime.
the class TipsAndNewsInjector method prepareData.
/**
* {@inheritDoc}
*/
@Override
protected void prepareData() throws Exception {
HttpURLConnection conn = (HttpURLConnection) m_tipsAndNewsUrl.openConnection();
conn.setReadTimeout(5000);
conn.setConnectTimeout(2000);
conn.connect();
// parse tips&tricks from webpage
XMLReader reader = new Parser();
reader.setFeature(Parser.namespacesFeature, false);
reader.setFeature(Parser.namespacePrefixesFeature, false);
final ClassLoader cl = Thread.currentThread().getContextClassLoader();
Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
DOMResult res = new DOMResult();
try {
Transformer transformer = m_transformerFactory.newTransformer();
transformer.transform(new SAXSource(reader, new InputSource(conn.getInputStream())), res);
} finally {
Thread.currentThread().setContextClassLoader(cl);
}
conn.disconnect();
XPath xpath = m_xpathFactory.newXPath();
Node news = (Node) xpath.evaluate("//div[@id='knime-client-news']", res.getNode(), XPathConstants.NODE);
m_news = checkIfNewsExist(news);
if (m_news != null) {
fixRelativeURLs(m_news, xpath);
}
m_tips = (Node) xpath.evaluate("//div[@id='col-right']//div[@class='contentWrapper']//div[@class='view-content']", res.getNode(), XPathConstants.NODE);
if (m_tips != null) {
fixRelativeURLs(m_tips, xpath);
}
m_newsGraphic = (Node) xpath.evaluate("//div[@id='news-graphic']", res.getNode(), XPathConstants.NODE);
if (m_newsGraphic != null) {
fixRelativeURLs(m_newsGraphic, xpath);
}
}
use of org.ccil.cowan.tagsoup.Parser in project ForPDA by RadiationX.
the class HtmlToSpannedConverter method fromHtml.
/**
* Returns displayable styled text from the provided HTML string. Any <img> tags in the
* HTML will use the specified ImageGetter to request a representation of the image (use null
* if you don't want this) and the specified TagHandler to handle unknown tags (specify null if
* you don't want this).
* <p>
* <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
*/
public static Spanned fromHtml(String source, int flags, ImageGetter imageGetter, TagHandler tagHandler) {
Parser parser = new Parser();
try {
parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
} catch (org.xml.sax.SAXNotRecognizedException e) {
// Should not happen.
throw new RuntimeException(e);
} catch (org.xml.sax.SAXNotSupportedException e) {
// Should not happen.
throw new RuntimeException(e);
}
HtmlToSpannedConverter converter = new HtmlToSpannedConverter(source, imageGetter, tagHandler, parser, flags);
return converter.convert();
}
Aggregations