use of org.cyberneko.html.HTMLConfiguration in project zm-mailbox by Zimbra.
the class HtmlDefang method defang.
/**
* @param source HTML source
* @param neuterImages <tt>true</tt> to remove images
* @param maxChars maximum number of characters to return, or <tt><=0</tt> for no limit
*/
protected void defang(XMLInputSource source, boolean neuterImages, Writer out) throws IOException {
// create writer filter
// TODO: uft-8 right?
/*
org.cyberneko.html.filters.Writer writer =
new org.cyberneko.html.filters.Writer(out, "utf-8");
*/
DefangWriter writer = new DefangWriter(out, "utf-8");
DefangFilter defang = new DefangFilter(neuterImages);
Purifier purifier = new HtmlPurifier();
// setup filter chain
XMLDocumentFilter[] filters = { purifier, defang, writer };
// create HTML parser
XMLParserConfiguration parser = new HTMLConfiguration();
parser.setProperty("http://cyberneko.org/html/properties/filters", filters);
parser.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
parser.setFeature("http://cyberneko.org/html/features/balance-tags", false);
parser.setFeature("http://xml.org/sax/features/namespaces", false);
// parse document
parser.parse(source);
}
use of org.cyberneko.html.HTMLConfiguration in project gocd by gocd.
the class HtmlDomParserContext method initParser.
@Override
protected void initParser(Ruby runtime) {
XMLParserConfiguration config = new HTMLConfiguration();
XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
// XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
XMLDocumentFilter[] filters = { elementValidityCheckFilter };
config.setErrorHandler(this.errorHandler);
parser = new NokogiriDomParser(config);
// see http://nekohtml.sourceforge.net/settings.html for details
setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
setProperty("http://cyberneko.org/html/properties/filters", filters);
setFeature("http://cyberneko.org/html/features/report-errors", true);
setFeature("http://xml.org/sax/features/namespaces", false);
}
use of org.cyberneko.html.HTMLConfiguration in project gate-core by GateNLP.
the class ControllerMetadataViewer method setTarget.
@Override
public void setTarget(Object target) {
if (target == null)
throw new NullPointerException("received a null target");
if (!(target instanceof Controller))
throw new IllegalArgumentException("not a controller");
Controller controller = (Controller) target;
if (!controller.getFeatures().containsKey("gate.app.MetadataURL"))
throw new IllegalArgumentException("no gate.app.MetadataURL feature");
try {
URL metadata = (URL) controller.getFeatures().get("gate.app.MetadataURL");
URL longDesc = new URL(metadata, "long-desc.html");
URL iconDesc = new URL(metadata, "icon.png");
Document document = builder.parse(metadata.openStream());
Node text = document.getDocumentElement().getElementsByTagName("pipeline-name").item(0).getFirstChild();
Font font = Gate.getUserConfig().getFont(GateConstants.TEXT_COMPONENTS_FONT);
StringBuilder page = new StringBuilder();
page.append("<!DOCTYPE html>");
page.append("<html>");
page.append("<head>");
page.append("<style type='text/css'>body { font-family: ").append(font.getFamily()).append("; font-size: ").append(font.getSize()).append("pt }</style>");
page.append("</head>");
page.append("<body>");
page.append("<h1><img style='vertical-align: middle;' src='").append(StringEscapeUtils.escapeHtml(iconDesc.toString())).append("'/> ").append(StringEscapeUtils.escapeHtml(text.getTextContent())).append("</h1>");
page.append(IOUtils.toString(longDesc, "UTF-8"));
page.append("</body></html>");
// parse using NekoHTML
HTMLConfiguration config = new HTMLConfiguration();
// Force element names to lower case to match XHTML requirements
// as that is what Flying Saucer expects
config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
DOMParser htmlParser = new DOMParser(config);
htmlParser.parse(new InputSource(new StringReader(page.toString())));
display.setDocument(htmlParser.getDocument(), longDesc.toString());
} catch (Exception e) {
throw new IllegalArgumentException(e);
}
}
use of org.cyberneko.html.HTMLConfiguration in project nokogiri by sparklemotion.
the class HtmlDomParserContext method initParser.
@Override
protected void initParser(Ruby runtime) {
XMLParserConfiguration config = new HTMLConfiguration();
// XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter();
XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler);
// XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter};
XMLDocumentFilter[] filters = { elementValidityCheckFilter };
config.setErrorHandler(this.errorHandler);
parser = new NokogiriDomParser(config);
// see http://nekohtml.sourceforge.net/settings.html for details
setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding);
setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
setProperty("http://cyberneko.org/html/properties/names/attrs", "lower");
setProperty("http://cyberneko.org/html/properties/filters", filters);
setFeature("http://cyberneko.org/html/features/report-errors", true);
setFeature("http://xml.org/sax/features/namespaces", false);
}
Aggregations