use of com.gargoylesoftware.htmlunit.ObjectInstantiationException in project htmlunit by HtmlUnit.
the class HtmlUnitNekoHTMLErrorHandler method parse.
/**
* Parses the WebResponse into an object tree representation.
*
* @param webResponse the response data
* @param page the HtmlPage to add the nodes
* @param xhtml if true use the XHtml parser
* @param createdByJavascript if true the (script) tag was created by javascript
* @throws IOException if there is an IO error
*/
@Override
public void parse(final WebResponse webResponse, final HtmlPage page, final boolean xhtml, final boolean createdByJavascript) throws IOException {
final URL url = webResponse.getWebRequest().getUrl();
final HtmlUnitNekoDOMBuilder domBuilder = new HtmlUnitNekoDOMBuilder(this, page, url, null, createdByJavascript);
Charset charset = webResponse.getContentCharsetOrNull();
try {
if (charset == null) {
charset = StandardCharsets.ISO_8859_1;
} else {
domBuilder.setFeature(HTMLScanner.IGNORE_SPECIFIED_CHARSET, true);
}
// xml content is different
if (xhtml) {
domBuilder.setFeature(HTMLScanner.ALLOW_SELFCLOSING_TAGS, true);
domBuilder.setFeature(HTMLScanner.SCRIPT_STRIP_CDATA_DELIMS, true);
domBuilder.setFeature(HTMLScanner.STYLE_STRIP_CDATA_DELIMS, true);
}
} catch (final Exception e) {
throw new ObjectInstantiationException("Error setting HTML parser feature", e);
}
try (InputStream content = webResponse.getContentAsStream()) {
final String encoding = charset.name();
final XMLInputSource in = new XMLInputSource(null, url.toString(), null, content, encoding);
page.registerParsingStart();
try {
domBuilder.parse(in);
} catch (final XNIException e) {
// extract enclosed exception
final Throwable origin = extractNestedException(e);
throw new RuntimeException("Failed parsing content from " + url, origin);
}
} finally {
page.registerParsingEnd();
}
}
Aggregations