use of org.apache.commons.compress.archivers.zip.ZipArchiveEntry in project tika by apache.
the class IWorkPackageParser method parse.
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
ZipArchiveInputStream zip = new ZipArchiveInputStream(stream);
ZipArchiveEntry entry = zip.getNextZipEntry();
while (entry != null) {
if (!IWORK_CONTENT_ENTRIES.contains(entry.getName())) {
entry = zip.getNextZipEntry();
continue;
}
InputStream entryStream = new BufferedInputStream(zip, 4096);
entryStream.mark(4096);
IWORKDocumentType type = IWORKDocumentType.detectType(entryStream);
entryStream.reset();
if (type != null) {
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
ContentHandler contentHandler;
switch(type) {
case KEYNOTE:
contentHandler = new KeynoteContentHandler(xhtml, metadata);
break;
case NUMBERS:
contentHandler = new NumbersContentHandler(xhtml, metadata);
break;
case PAGES:
contentHandler = new PagesContentHandler(xhtml, metadata);
break;
case ENCRYPTED:
// We can't do anything for the file right now
contentHandler = null;
break;
default:
throw new TikaException("Unhandled iWorks file " + type);
}
metadata.add(Metadata.CONTENT_TYPE, type.getType().toString());
xhtml.startDocument();
if (contentHandler != null) {
context.getSAXParser().parse(new CloseShieldInputStream(entryStream), new OfflineContentHandler(contentHandler));
}
xhtml.endDocument();
}
entry = zip.getNextZipEntry();
}
// Don't close the zip InputStream (TIKA-1117).
}
Aggregations