use of org.apache.tika.parser.xml.XMLParser in project tika by apache.
the class TikaParserConfigTest method defaultParserBlacklist.
/**
* TIKA-1558 It should be possible to exclude Parsers from being picked up by
* DefaultParser.
*/
@Test
public void defaultParserBlacklist() throws Exception {
TikaConfig config = new TikaConfig();
assertNotNull(config.getParser());
assertNotNull(config.getDetector());
CompositeParser cp = (CompositeParser) config.getParser();
List<Parser> parsers = cp.getAllComponentParsers();
boolean hasXML = false;
for (Parser p : parsers) {
if (p instanceof XMLParser) {
hasXML = true;
break;
}
}
assertTrue("Default config should include an XMLParser.", hasXML);
// This custom TikaConfig should exclude XMLParser and all of its subclasses.
config = getConfig("TIKA-1558-blacklistsub.xml");
cp = (CompositeParser) config.getParser();
parsers = cp.getAllComponentParsers();
for (Parser p : parsers) {
if (p instanceof XMLParser)
fail("Custom config should not include an XMLParser (" + p.getClass() + ").");
}
}
use of org.apache.tika.parser.xml.XMLParser in project tika by apache.
the class TIAParsingExample method useCompositeParser.
public static void useCompositeParser() throws Exception {
InputStream stream = new ByteArrayInputStream(new byte[0]);
ContentHandler handler = new DefaultHandler();
ParseContext context = new ParseContext();
Map<MediaType, Parser> parsersByType = new HashMap<MediaType, Parser>();
parsersByType.put(MediaType.parse("text/html"), new HtmlParser());
parsersByType.put(MediaType.parse("application/xml"), new XMLParser());
CompositeParser parser = new CompositeParser();
parser.setParsers(parsersByType);
parser.setFallback(new TXTParser());
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "text/html");
parser.parse(stream, handler, metadata, context);
}
Aggregations