use of org.apache.tika.parser.ParserDecorator in project tika by apache.
the class TIAParsingExample method testCompositeDocument.
public static void testCompositeDocument() throws Exception {
InputStream stream = new ByteArrayInputStream(new byte[0]);
ContentHandler handler = new DefaultHandler();
Metadata metadata = new Metadata();
Parser parser = new AutoDetectParser();
ParseContext context = new ParseContext();
context.set(Parser.class, new ParserDecorator(parser) {
private static final long serialVersionUID = 4424210691523343833L;
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
// custom processing of the component document
}
});
parser.parse(stream, handler, metadata, context);
}
use of org.apache.tika.parser.ParserDecorator in project tika by apache.
the class TikaConfigSerializer method addParser.
private static void addParser(Mode mode, Element rootElement, Document doc, Parser parser) throws Exception {
// If the parser is decorated, is it a kind where we output the parser inside?
ParserDecorator decoration = null;
if (parser instanceof ParserDecorator) {
if (parser.getClass().getName().startsWith(ParserDecorator.class.getName() + "$")) {
decoration = ((ParserDecorator) parser);
parser = decoration.getWrappedParser();
}
}
boolean outputParser = true;
List<Parser> children = Collections.emptyList();
if (mode == Mode.CURRENT && parser instanceof DefaultParser) {
// Only output the parser, not the children
} else if (parser instanceof CompositeParser) {
children = ((CompositeParser) parser).getAllComponentParsers();
// Special case for a naked composite
if (parser.getClass().equals(CompositeParser.class)) {
outputParser = false;
}
// Special case for making Default to static
if (parser instanceof DefaultParser && (mode == Mode.STATIC || mode == Mode.STATIC_FULL)) {
outputParser = false;
}
}
if (outputParser) {
rootElement = addParser(mode, rootElement, doc, parser, decoration);
}
for (Parser childParser : children) {
addParser(mode, rootElement, doc, childParser);
}
// TODO Parser Exclusions
}
use of org.apache.tika.parser.ParserDecorator in project tika by apache.
the class TikaParserConfigTest method testParserExcludeFromDefault.
@Test
public void testParserExcludeFromDefault() throws Exception {
TikaConfig config = getConfig("TIKA-1558-blacklist.xml");
assertNotNull(config.getParser());
assertNotNull(config.getDetector());
CompositeParser parser = (CompositeParser) config.getParser();
MediaType PE_EXE = MediaType.application("x-msdownload");
MediaType ELF = MediaType.application("x-elf");
// Get the DefaultParser from the config
ParserDecorator confWrappedParser = (ParserDecorator) parser.getParsers().get(MediaType.APPLICATION_XML);
assertNotNull(confWrappedParser);
DefaultParser confParser = (DefaultParser) confWrappedParser.getWrappedParser();
// Get a fresh "default" DefaultParser
DefaultParser normParser = new DefaultParser(config.getMediaTypeRegistry());
// The default one will offer the Executable Parser
assertContains(PE_EXE, normParser.getSupportedTypes(context));
assertContains(ELF, normParser.getSupportedTypes(context));
boolean hasExec = false;
for (Parser p : normParser.getParsers().values()) {
if (p instanceof ExecutableParser) {
hasExec = true;
break;
}
}
assertTrue(hasExec);
// The one from the config won't
assertNotContained(PE_EXE, confParser.getSupportedTypes(context));
assertNotContained(ELF, confParser.getSupportedTypes(context));
for (Parser p : confParser.getParsers().values()) {
if (p instanceof ExecutableParser)
fail("Shouldn't have the Executable Parser from config");
}
}
Aggregations