use of org.ccil.cowan.tagsoup.Parser in project android_frameworks_base by ParanoidAndroid.
the class HtmlToSpannedConverter method fromHtml.
/**
* Returns displayable styled text from the provided HTML string.
* Any <img> tags in the HTML will use the specified ImageGetter
* to request a representation of the image (use null if you don't
* want this) and the specified TagHandler to handle unknown tags
* (specify null if you don't want this).
*
* <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
*/
public static Spanned fromHtml(String source, ImageGetter imageGetter, TagHandler tagHandler) {
Parser parser = new Parser();
try {
parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
} catch (org.xml.sax.SAXNotRecognizedException e) {
// Should not happen.
throw new RuntimeException(e);
} catch (org.xml.sax.SAXNotSupportedException e) {
// Should not happen.
throw new RuntimeException(e);
}
HtmlToSpannedConverter converter = new HtmlToSpannedConverter(source, imageGetter, tagHandler, parser);
return converter.convert();
}
use of org.ccil.cowan.tagsoup.Parser in project XobotOS by xamarin.
the class HtmlToSpannedConverter method fromHtml.
/**
* Returns displayable styled text from the provided HTML string.
* Any <img> tags in the HTML will use the specified ImageGetter
* to request a representation of the image (use null if you don't
* want this) and the specified TagHandler to handle unknown tags
* (specify null if you don't want this).
*
* <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
*/
public static Spanned fromHtml(String source, ImageGetter imageGetter, TagHandler tagHandler) {
Parser parser = new Parser();
try {
parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
} catch (org.xml.sax.SAXNotRecognizedException e) {
// Should not happen.
throw new RuntimeException(e);
} catch (org.xml.sax.SAXNotSupportedException e) {
// Should not happen.
throw new RuntimeException(e);
}
HtmlToSpannedConverter converter = new HtmlToSpannedConverter(source, imageGetter, tagHandler, parser);
return converter.convert();
}
use of org.ccil.cowan.tagsoup.Parser in project camel by apache.
the class ManualGenerator method grabBodyContent.
private String grabBodyContent() throws MalformedURLException, IOException {
URL url = new URL(page);
File file = new File(targetDir, ".manualCache-" + url.getFile().substring(1));
try {
HttpURLConnection con = (HttpURLConnection) url.openConnection();
XMLReader parser = new Parser();
parser.setFeature(Parser.namespacesFeature, false);
parser.setFeature(Parser.namespacePrefixesFeature, false);
parser.setProperty(Parser.schemaProperty, new org.ccil.cowan.tagsoup.HTMLSchema() {
{
//problem with nested lists that the confluence {toc} macro creates
elementType("ul", M_LI, M_BLOCK | M_LI, 0);
}
});
StringWriter w = new StringWriter();
XMLWriter xmlWriter = new XMLWriter(w) {
int inDiv = Integer.MAX_VALUE;
int count;
public void characters(char[] ch, int start, int len) throws SAXException {
if (inDiv <= count) {
super.characters(ch, start, len);
}
}
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
count++;
if ("div".equalsIgnoreCase(qName) && "wiki-content maincontent".equalsIgnoreCase(atts.getValue("class"))) {
inDiv = count;
}
if (inDiv <= count) {
super.startElement(uri, localName, qName, atts);
}
}
public void endElement(String uri, String localName, String qName) throws SAXException {
if (inDiv <= count) {
super.endElement(uri, localName, qName);
}
count--;
if (inDiv > count) {
inDiv = Integer.MAX_VALUE;
}
}
};
xmlWriter.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, "yes");
xmlWriter.setOutputProperty(XMLWriter.METHOD, "html");
parser.setContentHandler(xmlWriter);
long date = con.getLastModified();
parser.parse(new InputSource(new BufferedInputStream(con.getInputStream())));
FileWriter writer = new FileWriter(file);
writer.write(Long.toString(date));
writer.close();
return w.toString();
} catch (Throwable e) {
e.printStackTrace();
throw new RuntimeException("Failed", e);
}
}
use of org.ccil.cowan.tagsoup.Parser in project camel by apache.
the class TidyMarkupDataFormat method createTagSoupParser.
/**
* Create the tagSoup Parser
*/
protected XMLReader createTagSoupParser() throws CamelException {
XMLReader reader = new Parser();
try {
reader.setFeature(Parser.namespacesFeature, false);
reader.setFeature(Parser.namespacePrefixesFeature, false);
if (getParserFeatures() != null) {
for (Entry<String, Boolean> e : getParserFeatures().entrySet()) {
reader.setFeature(e.getKey(), e.getValue());
}
}
if (getParserProperties() != null) {
for (Entry<String, Object> e : getParserProperties().entrySet()) {
reader.setProperty(e.getKey(), e.getValue());
}
}
/*
* default the schema to HTML
*/
if (this.getParsingSchema() != null) {
reader.setProperty(Parser.schemaProperty, getParsingSchema());
}
} catch (Exception e) {
throw new IllegalArgumentException("Problem configuring the parser", e);
}
return reader;
}
use of org.ccil.cowan.tagsoup.Parser in project platform_frameworks_base by android.
the class HtmlToSpannedConverter method fromHtml.
/**
* Returns displayable styled text from the provided HTML string. Any <img> tags in the
* HTML will use the specified ImageGetter to request a representation of the image (use null
* if you don't want this) and the specified TagHandler to handle unknown tags (specify null if
* you don't want this).
*
* <p>This uses TagSoup to handle real HTML, including all of the brokenness found in the wild.
*/
public static Spanned fromHtml(String source, int flags, ImageGetter imageGetter, TagHandler tagHandler) {
Parser parser = new Parser();
try {
parser.setProperty(Parser.schemaProperty, HtmlParser.schema);
} catch (org.xml.sax.SAXNotRecognizedException e) {
// Should not happen.
throw new RuntimeException(e);
} catch (org.xml.sax.SAXNotSupportedException e) {
// Should not happen.
throw new RuntimeException(e);
}
HtmlToSpannedConverter converter = new HtmlToSpannedConverter(source, imageGetter, tagHandler, parser, flags);
return converter.convert();
}
Aggregations