use of org.apache.tika.parser.DefaultParser in project tika by apache.
the class TikaParserConfigTest method testParserExcludeFromDefault.
@Test
public void testParserExcludeFromDefault() throws Exception {
TikaConfig config = getConfig("TIKA-1558-blacklist.xml");
assertNotNull(config.getParser());
assertNotNull(config.getDetector());
CompositeParser parser = (CompositeParser) config.getParser();
MediaType PE_EXE = MediaType.application("x-msdownload");
MediaType ELF = MediaType.application("x-elf");
// Get the DefaultParser from the config
ParserDecorator confWrappedParser = (ParserDecorator) parser.getParsers().get(MediaType.APPLICATION_XML);
assertNotNull(confWrappedParser);
DefaultParser confParser = (DefaultParser) confWrappedParser.getWrappedParser();
// Get a fresh "default" DefaultParser
DefaultParser normParser = new DefaultParser(config.getMediaTypeRegistry());
// The default one will offer the Executable Parser
assertContains(PE_EXE, normParser.getSupportedTypes(context));
assertContains(ELF, normParser.getSupportedTypes(context));
boolean hasExec = false;
for (Parser p : normParser.getParsers().values()) {
if (p instanceof ExecutableParser) {
hasExec = true;
break;
}
}
assertTrue(hasExec);
// The one from the config won't
assertNotContained(PE_EXE, confParser.getSupportedTypes(context));
assertNotContained(ELF, confParser.getSupportedTypes(context));
for (Parser p : confParser.getParsers().values()) {
if (p instanceof ExecutableParser)
fail("Shouldn't have the Executable Parser from config");
}
}
use of org.apache.tika.parser.DefaultParser in project tika by apache.
the class TesseractOCRParserTest method offersNoTypesIfNotFound.
/*
Check that if Tesseract is not found, the TesseractOCRParser claims to not support
any file types. So, the standard image parser is called instead.
*/
@Test
public void offersNoTypesIfNotFound() throws Exception {
TesseractOCRParser parser = new TesseractOCRParser();
DefaultParser defaultParser = new DefaultParser();
MediaType png = MediaType.image("png");
// With an invalid path, will offer no types
TesseractOCRConfig invalidConfig = new TesseractOCRConfig();
invalidConfig.setTesseractPath("/made/up/path");
ParseContext parseContext = new ParseContext();
parseContext.set(TesseractOCRConfig.class, invalidConfig);
// No types offered
assertEquals(0, parser.getSupportedTypes(parseContext).size());
// And DefaultParser won't use us
assertEquals(ImageParser.class, defaultParser.getParsers(parseContext).get(png).getClass());
}
Aggregations