use of javax.xml.transform.sax.SAXTransformerFactory in project tika by apache.
the class TikaGUI method getHtmlHandler.
/**
* Creates and returns a content handler that turns XHTML input to
* simplified HTML output that can be correctly parsed and displayed
* by {@link JEditorPane}.
* <p>
* The returned content handler is set to output <code>html</code>
* to the given writer. The XHTML namespace is removed from the output
* to prevent the serializer from using the <tag/> empty element
* syntax that causes extra ">" characters to be displayed.
* The <head> tags are dropped to prevent the serializer from
* generating a <META> content type tag that makes
* {@link JEditorPane} fail thinking that the document character set
* is inconsistent.
* <p>
* Additionally, it will use ImageSavingParser to re-write embedded:(image)
* image links to be file:///(temporary file) so that they can be loaded.
*
* @param writer output writer
* @return HTML content handler
* @throws TransformerConfigurationException if an error occurs
*/
private ContentHandler getHtmlHandler(Writer writer) throws TransformerConfigurationException {
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
handler.setResult(new StreamResult(writer));
return new ContentHandlerDecorator(handler) {
@Override
public void startElement(String uri, String localName, String name, Attributes atts) throws SAXException {
if (XHTMLContentHandler.XHTML.equals(uri)) {
uri = null;
}
if (!"head".equals(localName)) {
if ("img".equals(localName)) {
AttributesImpl newAttrs;
if (atts instanceof AttributesImpl) {
newAttrs = (AttributesImpl) atts;
} else {
newAttrs = new AttributesImpl(atts);
}
for (int i = 0; i < newAttrs.getLength(); i++) {
if ("src".equals(newAttrs.getLocalName(i))) {
String src = newAttrs.getValue(i);
if (src.startsWith("embedded:")) {
String filename = src.substring(src.indexOf(':') + 1);
try {
File img = imageParser.requestSave(filename);
String newSrc = img.toURI().toString();
newAttrs.setValue(i, newSrc);
} catch (IOException e) {
System.err.println("Error creating temp image file " + filename);
// The html viewer will show a broken image too to alert them
}
}
}
}
super.startElement(uri, localName, name, newAttrs);
} else {
super.startElement(uri, localName, name, atts);
}
}
}
@Override
public void endElement(String uri, String localName, String name) throws SAXException {
if (XHTMLContentHandler.XHTML.equals(uri)) {
uri = null;
}
if (!"head".equals(localName)) {
super.endElement(uri, localName, name);
}
}
@Override
public void startPrefixMapping(String prefix, String uri) {
}
@Override
public void endPrefixMapping(String prefix) {
}
};
}
use of javax.xml.transform.sax.SAXTransformerFactory in project tika by apache.
the class TikaGUI method getXmlContentHandler.
private ContentHandler getXmlContentHandler(Writer writer) throws TransformerConfigurationException {
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
handler.setResult(new StreamResult(writer));
return handler;
}
Aggregations