use of org.apache.tika.parser.CompositeParser in project tika by apache.
the class TikaConfigTest method testUnknownParser.
/**
* Make sure that with a service loader given, we can
* get different configurable behaviour on parser classes
* which can't be found.
*/
@Test
public void testUnknownParser() throws Exception {
ServiceLoader ignoreLoader = new ServiceLoader(getClass().getClassLoader(), LoadErrorHandler.IGNORE);
ServiceLoader warnLoader = new ServiceLoader(getClass().getClassLoader(), LoadErrorHandler.WARN);
ServiceLoader throwLoader = new ServiceLoader(getClass().getClassLoader(), LoadErrorHandler.THROW);
Path configPath = Paths.get(new URI(getConfigPath("TIKA-1700-unknown-parser.xml")));
TikaConfig ignore = new TikaConfig(configPath, ignoreLoader);
assertNotNull(ignore);
assertNotNull(ignore.getParser());
assertEquals(1, ((CompositeParser) ignore.getParser()).getAllComponentParsers().size());
TikaConfig warn = new TikaConfig(configPath, warnLoader);
assertNotNull(warn);
assertNotNull(warn.getParser());
assertEquals(1, ((CompositeParser) warn.getParser()).getAllComponentParsers().size());
try {
new TikaConfig(configPath, throwLoader);
fail("Shouldn't get here, invalid parser class");
} catch (TikaException expected) {
}
}
use of org.apache.tika.parser.CompositeParser in project tika by apache.
the class TikaConfigTest method defaultParserWithExcludes.
/**
* TIKA-1445 It should be possible to exclude DefaultParser from
* certain types, so another parser explicitly listed will take them
*/
@Test
public void defaultParserWithExcludes() throws Exception {
try {
TikaConfig config = getConfig("TIKA-1445-default-except.xml");
CompositeParser cp = (CompositeParser) config.getParser();
List<Parser> parsers = cp.getAllComponentParsers();
Parser p;
// Will be the three parsers defined in the xml
assertEquals(3, parsers.size());
// Should have a wrapped DefaultParser, not the main DefaultParser,
// as it is excluded from handling certain classes
p = parsers.get(0);
assertTrue(p.toString(), p instanceof ParserDecorator);
assertEquals(DefaultParser.class, ((ParserDecorator) p).getWrappedParser().getClass());
// Should have two others which claim things, which they wouldn't
// otherwise handle
p = parsers.get(1);
assertTrue(p.toString(), p instanceof ParserDecorator);
assertEquals(EmptyParser.class, ((ParserDecorator) p).getWrappedParser().getClass());
assertEquals("hello/world", p.getSupportedTypes(null).iterator().next().toString());
p = parsers.get(2);
assertTrue(p.toString(), p instanceof ParserDecorator);
assertEquals(ErrorParser.class, ((ParserDecorator) p).getWrappedParser().getClass());
assertEquals("fail/world", p.getSupportedTypes(null).iterator().next().toString());
} catch (TikaException e) {
fail("Unexpected TikaException: " + e);
}
}
use of org.apache.tika.parser.CompositeParser in project tika by apache.
the class ExternalParsersFactory method attachExternalParsers.
public static void attachExternalParsers(List<ExternalParser> parsers, TikaConfig config) {
Parser parser = config.getParser();
if (parser instanceof CompositeParser) {
CompositeParser cParser = (CompositeParser) parser;
Map<MediaType, Parser> parserMap = cParser.getParsers();
}
// TODO
}
use of org.apache.tika.parser.CompositeParser in project tika by apache.
the class TikaParserConfigTest method testMimeExcludeInclude.
@Test
public void testMimeExcludeInclude() throws Exception {
TikaConfig config = getConfig("TIKA-1558-blacklist.xml");
assertNotNull(config.getParser());
assertNotNull(config.getDetector());
Parser parser = config.getParser();
MediaType PDF = MediaType.application("pdf");
MediaType JPEG = MediaType.image("jpeg");
// Has two parsers
assertEquals(CompositeParser.class, parser.getClass());
CompositeParser cParser = (CompositeParser) parser;
assertEquals(2, cParser.getAllComponentParsers().size());
// Both are decorated
assertTrue(cParser.getAllComponentParsers().get(0) instanceof ParserDecorator);
assertTrue(cParser.getAllComponentParsers().get(1) instanceof ParserDecorator);
ParserDecorator p0 = (ParserDecorator) cParser.getAllComponentParsers().get(0);
ParserDecorator p1 = (ParserDecorator) cParser.getAllComponentParsers().get(1);
// DefaultParser will be wrapped with excludes
assertEquals(DefaultParser.class, p0.getWrappedParser().getClass());
assertNotContained(PDF, p0.getSupportedTypes(context));
assertContains(PDF, p0.getWrappedParser().getSupportedTypes(context));
assertNotContained(JPEG, p0.getSupportedTypes(context));
assertContains(JPEG, p0.getWrappedParser().getSupportedTypes(context));
// Will have an empty parser for PDF
assertEquals(EmptyParser.class, p1.getWrappedParser().getClass());
assertEquals(1, p1.getSupportedTypes(context).size());
assertContains(PDF, p1.getSupportedTypes(context));
assertNotContained(PDF, p1.getWrappedParser().getSupportedTypes(context));
}
use of org.apache.tika.parser.CompositeParser in project tika by apache.
the class TikaParserConfigTest method defaultParserBlacklist.
/**
* TIKA-1558 It should be possible to exclude Parsers from being picked up by
* DefaultParser.
*/
@Test
public void defaultParserBlacklist() throws Exception {
TikaConfig config = new TikaConfig();
assertNotNull(config.getParser());
assertNotNull(config.getDetector());
CompositeParser cp = (CompositeParser) config.getParser();
List<Parser> parsers = cp.getAllComponentParsers();
boolean hasXML = false;
for (Parser p : parsers) {
if (p instanceof XMLParser) {
hasXML = true;
break;
}
}
assertTrue("Default config should include an XMLParser.", hasXML);
// This custom TikaConfig should exclude XMLParser and all of its subclasses.
config = getConfig("TIKA-1558-blacklistsub.xml");
cp = (CompositeParser) config.getParser();
parsers = cp.getAllComponentParsers();
for (Parser p : parsers) {
if (p instanceof XMLParser)
fail("Custom config should not include an XMLParser (" + p.getClass() + ").");
}
}
Aggregations