use of org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor in project tika by apache.
the class AutoDetectParser method parse.
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
TemporaryResources tmp = new TemporaryResources();
try {
TikaInputStream tis = TikaInputStream.get(stream, tmp);
// Automatically detect the MIME type of the document
MediaType type = detector.detect(tis, metadata);
metadata.set(Metadata.CONTENT_TYPE, type.toString());
// TIKA-216: Zip bomb prevention
SecureContentHandler sch = handler != null ? new SecureContentHandler(handler, tis) : null;
//the caller hasn't specified one.
if (context.get(EmbeddedDocumentExtractor.class) == null) {
Parser p = context.get(Parser.class);
if (p == null) {
context.set(Parser.class, this);
}
context.set(EmbeddedDocumentExtractor.class, new ParsingEmbeddedDocumentExtractor(context));
}
try {
// Parse the document
super.parse(tis, sch, metadata, context);
} catch (SAXException e) {
// Convert zip bomb exceptions to TikaExceptions
sch.throwIfCauseOf(e);
throw e;
}
} finally {
tmp.dispose();
}
}
use of org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor in project tika by apache.
the class MockParser method getEmbeddedDocumentExtractor.
protected EmbeddedDocumentExtractor getEmbeddedDocumentExtractor(ParseContext context) {
EmbeddedDocumentExtractor extractor = context.get(EmbeddedDocumentExtractor.class);
if (extractor == null) {
Parser p = context.get(Parser.class);
if (p == null) {
context.set(Parser.class, new MockParser());
}
extractor = new ParsingEmbeddedDocumentExtractor(context);
}
return extractor;
}
Aggregations