use of org.apache.tika.sax.RichTextContentHandler in project tika by apache.
the class UnpackerResource method process.
private Map<String, byte[]> process(InputStream is, @Context HttpHeaders httpHeaders, @Context UriInfo info, boolean saveAll) throws Exception {
Metadata metadata = new Metadata();
ParseContext pc = new ParseContext();
Parser parser = TikaResource.createParser();
if (parser instanceof DigestingParser) {
//no need to digest for unwrapping
parser = ((DigestingParser) parser).getWrappedParser();
}
TikaResource.fillMetadata(parser, metadata, pc, httpHeaders.getRequestHeaders());
TikaResource.logRequest(LOG, info, metadata);
ContentHandler ch;
ByteArrayOutputStream text = new ByteArrayOutputStream();
if (saveAll) {
ch = new BodyContentHandler(new RichTextContentHandler(new OutputStreamWriter(text, UTF_8)));
} else {
ch = new DefaultHandler();
}
Map<String, byte[]> files = new HashMap<>();
MutableInt count = new MutableInt();
pc.set(EmbeddedDocumentExtractor.class, new MyEmbeddedDocumentExtractor(count, files));
TikaResource.parse(parser, LOG, info.getPath(), is, ch, metadata, pc);
if (count.intValue() == 0 && !saveAll) {
throw new WebApplicationException(Response.Status.NO_CONTENT);
}
if (saveAll) {
files.put(TEXT_FILENAME, text.toByteArray());
ByteArrayOutputStream metaStream = new ByteArrayOutputStream();
metadataToCsv(metadata, metaStream);
files.put(META_FILENAME, metaStream.toByteArray());
}
return files;
}
use of org.apache.tika.sax.RichTextContentHandler in project tika by apache.
the class TikaResource method produceText.
public StreamingOutput produceText(final InputStream is, MultivaluedMap<String, String> httpHeaders, final UriInfo info) {
final Parser parser = createParser();
final Metadata metadata = new Metadata();
final ParseContext context = new ParseContext();
fillMetadata(parser, metadata, context, httpHeaders);
fillParseContext(context, httpHeaders, parser);
logRequest(LOG, info, metadata);
return new StreamingOutput() {
public void write(OutputStream outputStream) throws IOException, WebApplicationException {
Writer writer = new OutputStreamWriter(outputStream, UTF_8);
BodyContentHandler body = new BodyContentHandler(new RichTextContentHandler(writer));
parse(parser, LOG, info.getPath(), is, body, metadata, context);
}
};
}
Aggregations