use of org.apache.commons.io.input.TaggedInputStream in project tika by apache.
the class RTFParser method parse.
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
metadata.set(Metadata.CONTENT_TYPE, "application/rtf");
TaggedInputStream tagged = new TaggedInputStream(stream);
try {
XHTMLContentHandler xhtmlHandler = new XHTMLContentHandler(handler, metadata);
RTFEmbObjHandler embObjHandler = new RTFEmbObjHandler(xhtmlHandler, metadata, context, getMemoryLimitInKb());
final TextExtractor ert = new TextExtractor(xhtmlHandler, metadata, embObjHandler);
ert.extract(stream);
} catch (IOException e) {
tagged.throwIfCauseOf(e);
throw new TikaException("Error parsing an RTF document", e);
}
}
Aggregations