use of org.cyberneko.html.parsers.SAXParser in project Java-readability by basis-technology-corp.
the class NekoJsoupParser method parse.
public Document parse(String data, String baseUri) throws SAXException, IOException {
InputSource source = new InputSource();
source.setCharacterStream(new StringReader(data));
SAXParser nekoParser = new SAXParser();
Document document = new Document(baseUri);
nekoParser.setContentHandler(new Handler(document));
nekoParser.setErrorHandler(new LocalErrorHandler());
nekoParser.parse(source);
return document;
}
use of org.cyberneko.html.parsers.SAXParser in project Java-readability by basis-technology-corp.
the class NekoJsoupParser method parse.
public Document parse(InputStream data, String baseUri) throws SAXException, IOException {
InputSource source = new InputSource();
source.setByteStream(data);
SAXParser nekoParser = new SAXParser();
Document document = new Document(baseUri);
nekoParser.setContentHandler(new Handler(document));
nekoParser.setErrorHandler(new LocalErrorHandler());
nekoParser.parse(source);
return document;
}
use of org.cyberneko.html.parsers.SAXParser in project gocd by gocd.
the class HtmlSaxParserContext method createParser.
@Override
protected AbstractSAXParser createParser() throws SAXException {
SAXParser parser = new SAXParser();
try {
parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
parser.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
return parser;
} catch (SAXException ex) {
throw new SAXException("Problem while creating HTML SAX Parser: " + ex.toString());
}
}
use of org.cyberneko.html.parsers.SAXParser in project gradle by gradle.
the class ApacheDirectoryListingParser method parse.
public List<String> parse(URI baseURI, InputStream content, String contentType) throws Exception {
baseURI = addTrailingSlashes(baseURI);
if (contentType == null || !contentType.startsWith("text/html")) {
throw new ResourceException(baseURI, String.format("Unsupported ContentType %s for directory listing '%s'", contentType, baseURI));
}
String contentEncoding = UriTextResource.extractCharacterEncoding(contentType, "utf-8");
final Reader htmlText = new InputStreamReader(content, contentEncoding);
final InputSource inputSource = new InputSource(htmlText);
final SAXParser htmlParser = new SAXParser();
final AnchorListerHandler anchorListerHandler = new AnchorListerHandler();
htmlParser.setContentHandler(anchorListerHandler);
htmlParser.parse(inputSource);
List<String> hrefs = anchorListerHandler.getHrefs();
List<URI> uris = resolveURIs(baseURI, hrefs);
return filterNonDirectChilds(baseURI, uris);
}
use of org.cyberneko.html.parsers.SAXParser in project nokogiri by sparklemotion.
the class HtmlSaxParserContext method createParser.
@Override
protected AbstractSAXParser createParser() throws SAXException {
SAXParser parser = new SAXParser();
try {
parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
parser.setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
return parser;
} catch (SAXException ex) {
throw new SAXException("Problem while creating HTML SAX Parser: " + ex.toString());
}
}
Aggregations