use of org.apache.tika.sax.EndDocumentShieldingContentHandler in project tika by apache.
the class OpenDocumentParser method parse.
public void parse(InputStream stream, ContentHandler baseHandler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
// Open the Zip stream
// Use a File if we can, and an already open zip is even better
ZipFile zipFile = null;
ZipInputStream zipStream = null;
if (stream instanceof TikaInputStream) {
TikaInputStream tis = (TikaInputStream) stream;
Object container = ((TikaInputStream) stream).getOpenContainer();
if (container instanceof ZipFile) {
zipFile = (ZipFile) container;
} else if (tis.hasFile()) {
zipFile = new ZipFile(tis.getFile());
} else {
zipStream = new ZipInputStream(stream);
}
} else {
zipStream = new ZipInputStream(stream);
}
// Prepare to handle the content
XHTMLContentHandler xhtml = new XHTMLContentHandler(baseHandler, metadata);
// As we don't know which of the metadata or the content
// we'll hit first, catch the endDocument call initially
EndDocumentShieldingContentHandler handler = new EndDocumentShieldingContentHandler(xhtml);
if (zipFile != null) {
try {
handleZipFile(zipFile, metadata, context, handler);
} finally {
//Do we want to close silently == catch an exception here?
zipFile.close();
}
} else {
try {
handleZipStream(zipStream, metadata, context, handler);
} finally {
//Do we want to close silently == catch an exception here?
zipStream.close();
}
}
// Only now call the end document
if (handler.getEndDocumentWasCalled()) {
handler.reallyEndDocument();
}
}
Aggregations