use of org.apache.tika.parser.DefaultParser in project tika by apache.
the class TikaConfigSerializer method addParsers.
private static void addParsers(Mode mode, Element rootElement, Document doc, TikaConfig config) throws Exception {
Parser parser = config.getParser();
if (mode == Mode.MINIMAL && parser instanceof DefaultParser) {
// Don't output anything, all using defaults
return;
} else if (mode == Mode.MINIMAL) {
mode = Mode.CURRENT;
}
Element parsersElement = doc.createElement("parsers");
rootElement.appendChild(parsersElement);
addParser(mode, parsersElement, doc, parser);
}
use of org.apache.tika.parser.DefaultParser in project tika by apache.
the class Activator method start.
@Override
public void start(BundleContext context) throws Exception {
detectorService = context.registerService(Detector.class.getName(), new DefaultDetector(Activator.class.getClassLoader()), new Properties());
Parser parser = new DefaultParser(Activator.class.getClassLoader());
parserService = context.registerService(Parser.class.getName(), parser, new Properties());
}
use of org.apache.tika.parser.DefaultParser in project tika by apache.
the class TesseractOCRParserTest method offersTypesIfFound.
/*
If Tesseract is found, test we retrieve the proper number of supporting Parsers.
*/
@Test
public void offersTypesIfFound() throws Exception {
TesseractOCRParser parser = new TesseractOCRParser();
DefaultParser defaultParser = new DefaultParser();
ParseContext parseContext = new ParseContext();
MediaType png = MediaType.image("png");
// Assuming that Tesseract is on the path, we should find 5 Parsers that support PNG.
assumeTrue(canRun());
assertEquals(8, parser.getSupportedTypes(parseContext).size());
assertTrue(parser.getSupportedTypes(parseContext).contains(png));
// DefaultParser will now select the TesseractOCRParser.
assertEquals(TesseractOCRParser.class, defaultParser.getParsers(parseContext).get(png).getClass());
}
use of org.apache.tika.parser.DefaultParser in project tika by apache.
the class ServiceLoaderUtilsTest method testOrdering.
@Test
public void testOrdering() throws Exception {
//make sure that non Tika parsers come last
//which means that they'll overwrite Tika parsers and
//be preferred.
DefaultParser defaultParser = new DefaultParser();
int vorbisIndex = -1;
int fictIndex = -1;
int dcxmlIndex = -1;
int i = 0;
for (Parser p : defaultParser.getAllComponentParsers()) {
if ("class org.gagravarr.tika.VorbisParser".equals(p.getClass().toString())) {
vorbisIndex = i;
}
if ("class org.apache.tika.parser.xml.FictionBookParser".equals(p.getClass().toString())) {
fictIndex = i;
}
if ("class org.apache.tika.parser.xml.DcXMLParser".equals(p.getClass().toString())) {
dcxmlIndex = i;
}
i++;
}
assertNotEquals(vorbisIndex, fictIndex);
assertNotEquals(fictIndex, dcxmlIndex);
assertTrue(vorbisIndex > fictIndex);
assertTrue(fictIndex > dcxmlIndex);
}
use of org.apache.tika.parser.DefaultParser in project tika by apache.
the class TikaConfigSerializer method addParser.
private static void addParser(Mode mode, Element rootElement, Document doc, Parser parser) throws Exception {
// If the parser is decorated, is it a kind where we output the parser inside?
ParserDecorator decoration = null;
if (parser instanceof ParserDecorator) {
if (parser.getClass().getName().startsWith(ParserDecorator.class.getName() + "$")) {
decoration = ((ParserDecorator) parser);
parser = decoration.getWrappedParser();
}
}
boolean outputParser = true;
List<Parser> children = Collections.emptyList();
if (mode == Mode.CURRENT && parser instanceof DefaultParser) {
// Only output the parser, not the children
} else if (parser instanceof CompositeParser) {
children = ((CompositeParser) parser).getAllComponentParsers();
// Special case for a naked composite
if (parser.getClass().equals(CompositeParser.class)) {
outputParser = false;
}
// Special case for making Default to static
if (parser instanceof DefaultParser && (mode == Mode.STATIC || mode == Mode.STATIC_FULL)) {
outputParser = false;
}
}
if (outputParser) {
rootElement = addParser(mode, rootElement, doc, parser, decoration);
}
for (Parser childParser : children) {
addParser(mode, rootElement, doc, childParser);
}
// TODO Parser Exclusions
}
Aggregations