use of org.apache.tika.exception.TikaMemoryLimitException in project tika by apache.
the class CompressorParser method parse.
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
// should not be closed
if (stream.markSupported()) {
stream = new CloseShieldInputStream(stream);
} else {
// Ensure that the stream supports the mark feature
stream = new BufferedInputStream(new CloseShieldInputStream(stream));
}
CompressorInputStream cis;
try {
CompressorParserOptions options = context.get(CompressorParserOptions.class, new CompressorParserOptions() {
public boolean decompressConcatenated(Metadata metadata) {
return false;
}
});
CompressorStreamFactory factory = new CompressorStreamFactory(options.decompressConcatenated(metadata), memoryLimitInKb);
cis = factory.createCompressorInputStream(stream);
} catch (CompressorException e) {
if (e.getCause() != null && e.getCause() instanceof MemoryLimitException) {
throw new TikaMemoryLimitException(e.getMessage());
}
throw new TikaException("Unable to uncompress document stream", e);
}
MediaType type = getMediaType(cis);
if (!type.equals(MediaType.OCTET_STREAM)) {
metadata.set(CONTENT_TYPE, type.toString());
}
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
try {
Metadata entrydata = new Metadata();
String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
if (name != null) {
if (name.endsWith(".tbz")) {
name = name.substring(0, name.length() - 4) + ".tar";
} else if (name.endsWith(".tbz2")) {
name = name.substring(0, name.length() - 5) + ".tar";
} else if (name.endsWith(".bz")) {
name = name.substring(0, name.length() - 3);
} else if (name.endsWith(".bz2")) {
name = name.substring(0, name.length() - 4);
} else if (name.endsWith(".xz")) {
name = name.substring(0, name.length() - 3);
} else if (name.endsWith(".zlib")) {
name = name.substring(0, name.length() - 5);
} else if (name.endsWith(".pack")) {
name = name.substring(0, name.length() - 5);
} else if (name.length() > 0) {
name = GzipUtils.getUncompressedFilename(name);
}
entrydata.set(Metadata.RESOURCE_NAME_KEY, name);
}
// Use the delegate parser to parse the compressed document
EmbeddedDocumentExtractor extractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
if (extractor.shouldParseEmbedded(entrydata)) {
extractor.parseEmbedded(cis, xhtml, entrydata, true);
}
} finally {
cis.close();
}
xhtml.endDocument();
}
Aggregations