use of org.apache.tika.sax.ExpandedTitleContentHandler in project tika by apache.
the class TikaResource method produceOutput.
private StreamingOutput produceOutput(final InputStream is, final MultivaluedMap<String, String> httpHeaders, final UriInfo info, final String format) {
final Parser parser = createParser();
final Metadata metadata = new Metadata();
final ParseContext context = new ParseContext();
fillMetadata(parser, metadata, context, httpHeaders);
fillParseContext(context, httpHeaders, parser);
logRequest(LOG, info, metadata);
return new StreamingOutput() {
public void write(OutputStream outputStream) throws IOException, WebApplicationException {
Writer writer = new OutputStreamWriter(outputStream, UTF_8);
ContentHandler content;
try {
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, format);
handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, UTF_8.name());
handler.setResult(new StreamResult(writer));
content = new ExpandedTitleContentHandler(handler);
} catch (TransformerConfigurationException e) {
throw new WebApplicationException(e);
}
parse(parser, LOG, info.getPath(), is, content, metadata, context);
}
};
}
use of org.apache.tika.sax.ExpandedTitleContentHandler in project camel by apache.
the class TikaProducer method getContentHandler.
private ContentHandler getContentHandler(TikaConfiguration configuration, OutputStream outputStream) throws TransformerConfigurationException, UnsupportedEncodingException {
ContentHandler result = null;
TikaParseOutputFormat outputFormat = configuration.getTikaParseOutputFormat();
switch(outputFormat) {
case xml:
result = getTransformerHandler(outputStream, "xml", true);
break;
case text:
result = new BodyContentHandler(new OutputStreamWriter(outputStream, this.encoding));
break;
case textMain:
result = new BoilerpipeContentHandler(new OutputStreamWriter(outputStream, this.encoding));
break;
case html:
result = new ExpandedTitleContentHandler(getTransformerHandler(outputStream, "html", true));
break;
default:
throw new IllegalArgumentException(String.format("Unknown format %s", tikaConfiguration.getTikaParseOutputFormat()));
}
return result;
}
Aggregations