use of org.cyberneko.html.filters.Purifier in project zm-mailbox by Zimbra.
the class HtmlDefang method defang.
/**
* @param source HTML source
* @param neuterImages <tt>true</tt> to remove images
* @param maxChars maximum number of characters to return, or <tt><=0</tt> for no limit
*/
protected void defang(XMLInputSource source, boolean neuterImages, Writer out) throws IOException {
// create writer filter
// TODO: uft-8 right?
/*
org.cyberneko.html.filters.Writer writer =
new org.cyberneko.html.filters.Writer(out, "utf-8");
*/
DefangWriter writer = new DefangWriter(out, "utf-8");
DefangFilter defang = new DefangFilter(neuterImages);
Purifier purifier = new HtmlPurifier();
// setup filter chain
XMLDocumentFilter[] filters = { purifier, defang, writer };
// create HTML parser
XMLParserConfiguration parser = new HTMLConfiguration();
parser.setProperty("http://cyberneko.org/html/properties/filters", filters);
parser.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
parser.setFeature("http://cyberneko.org/html/features/balance-tags", false);
parser.setFeature("http://xml.org/sax/features/namespaces", false);
// parse document
parser.parse(source);
}
Aggregations