use of org.apache.tika.parser.CompositeParser in project tika by apache.
the class TikaParserConfigTest method testParserExcludeFromDefault.
@Test
public void testParserExcludeFromDefault() throws Exception {
TikaConfig config = getConfig("TIKA-1558-blacklist.xml");
assertNotNull(config.getParser());
assertNotNull(config.getDetector());
CompositeParser parser = (CompositeParser) config.getParser();
MediaType PE_EXE = MediaType.application("x-msdownload");
MediaType ELF = MediaType.application("x-elf");
// Get the DefaultParser from the config
ParserDecorator confWrappedParser = (ParserDecorator) parser.getParsers().get(MediaType.APPLICATION_XML);
assertNotNull(confWrappedParser);
DefaultParser confParser = (DefaultParser) confWrappedParser.getWrappedParser();
// Get a fresh "default" DefaultParser
DefaultParser normParser = new DefaultParser(config.getMediaTypeRegistry());
// The default one will offer the Executable Parser
assertContains(PE_EXE, normParser.getSupportedTypes(context));
assertContains(ELF, normParser.getSupportedTypes(context));
boolean hasExec = false;
for (Parser p : normParser.getParsers().values()) {
if (p instanceof ExecutableParser) {
hasExec = true;
break;
}
}
assertTrue(hasExec);
// The one from the config won't
assertNotContained(PE_EXE, confParser.getSupportedTypes(context));
assertNotContained(ELF, confParser.getSupportedTypes(context));
for (Parser p : confParser.getParsers().values()) {
if (p instanceof ExecutableParser)
fail("Shouldn't have the Executable Parser from config");
}
}
use of org.apache.tika.parser.CompositeParser in project tika by apache.
the class TikaCLI method displaySupportedTypes.
/**
* Prints all the known media types, aliases and matching parser classes.
*/
private void displaySupportedTypes() {
AutoDetectParser parser = new AutoDetectParser();
MediaTypeRegistry registry = parser.getMediaTypeRegistry();
Map<MediaType, Parser> parsers = parser.getParsers();
for (MediaType type : registry.getTypes()) {
System.out.println(type);
for (MediaType alias : registry.getAliases(type)) {
System.out.println(" alias: " + alias);
}
MediaType supertype = registry.getSupertype(type);
if (supertype != null) {
System.out.println(" supertype: " + supertype);
}
Parser p = parsers.get(type);
if (p != null) {
if (p instanceof CompositeParser) {
p = ((CompositeParser) p).getParsers().get(type);
}
System.out.println(" parser: " + p.getClass().getName());
}
}
}
use of org.apache.tika.parser.CompositeParser in project tika by apache.
the class TikaMimeTypes method getMediaTypes.
protected List<MediaTypeDetails> getMediaTypes() {
MediaTypeRegistry registry = TikaResource.getConfig().getMediaTypeRegistry();
Map<MediaType, Parser> parsers = ((CompositeParser) TikaResource.getConfig().getParser()).getParsers();
List<MediaTypeDetails> types = new ArrayList<TikaMimeTypes.MediaTypeDetails>(registry.getTypes().size());
for (MediaType type : registry.getTypes()) {
MediaTypeDetails details = new MediaTypeDetails();
details.type = type;
details.aliases = registry.getAliases(type).toArray(new MediaType[0]);
MediaType supertype = registry.getSupertype(type);
if (supertype != null && !MediaType.OCTET_STREAM.equals(supertype)) {
details.supertype = supertype;
}
Parser p = parsers.get(type);
if (p != null) {
if (p instanceof CompositeParser) {
p = ((CompositeParser) p).getParsers().get(type);
}
details.parser = p.getClass().getName();
}
types.add(details);
}
return types;
}
use of org.apache.tika.parser.CompositeParser in project tika by apache.
the class TikaConfigTest method parserWithChildParsers.
/**
* TIKA-1653 If one parser has child parsers, those child parsers shouldn't
* show up at the top level as well
*/
@Test
public void parserWithChildParsers() throws Exception {
try {
TikaConfig config = getConfig("TIKA-1653-norepeat.xml");
CompositeParser cp = (CompositeParser) config.getParser();
List<Parser> parsers = cp.getAllComponentParsers();
Parser p;
// Just 2 top level parsers
assertEquals(2, parsers.size());
// Should have a CompositeParser with 2 child ones, and
// and a wrapped empty parser
p = parsers.get(0);
assertTrue(p.toString(), p instanceof CompositeParser);
assertEquals(2, ((CompositeParser) p).getAllComponentParsers().size());
p = parsers.get(1);
assertTrue(p.toString(), p instanceof ParserDecorator);
assertEquals(EmptyParser.class, ((ParserDecorator) p).getWrappedParser().getClass());
assertEquals("hello/world", p.getSupportedTypes(null).iterator().next().toString());
} catch (TikaException e) {
fail("Unexpected TikaException: " + e);
}
}
use of org.apache.tika.parser.CompositeParser in project tika by apache.
the class TIAParsingExample method useCompositeParser.
public static void useCompositeParser() throws Exception {
InputStream stream = new ByteArrayInputStream(new byte[0]);
ContentHandler handler = new DefaultHandler();
ParseContext context = new ParseContext();
Map<MediaType, Parser> parsersByType = new HashMap<MediaType, Parser>();
parsersByType.put(MediaType.parse("text/html"), new HtmlParser());
parsersByType.put(MediaType.parse("application/xml"), new XMLParser());
CompositeParser parser = new CompositeParser();
parser.setParsers(parsersByType);
parser.setFallback(new TXTParser());
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "text/html");
parser.parse(stream, handler, metadata, context);
}
Aggregations