Search in sources :

Example 6 with DefaultParser

use of org.apache.tika.parser.DefaultParser in project tika by apache.

the class TikaConfigSerializer method addParsers.

private static void addParsers(Mode mode, Element rootElement, Document doc, TikaConfig config) throws Exception {
    Parser parser = config.getParser();
    if (mode == Mode.MINIMAL && parser instanceof DefaultParser) {
        // Don't output anything, all using defaults
        return;
    } else if (mode == Mode.MINIMAL) {
        mode = Mode.CURRENT;
    }
    Element parsersElement = doc.createElement("parsers");
    rootElement.appendChild(parsersElement);
    addParser(mode, parsersElement, doc, parser);
}
Also used : Element(org.w3c.dom.Element) Parser(org.apache.tika.parser.Parser) CompositeParser(org.apache.tika.parser.CompositeParser) DefaultParser(org.apache.tika.parser.DefaultParser) DefaultParser(org.apache.tika.parser.DefaultParser)

Example 7 with DefaultParser

use of org.apache.tika.parser.DefaultParser in project tika by apache.

the class Activator method start.

@Override
public void start(BundleContext context) throws Exception {
    detectorService = context.registerService(Detector.class.getName(), new DefaultDetector(Activator.class.getClassLoader()), new Properties());
    Parser parser = new DefaultParser(Activator.class.getClassLoader());
    parserService = context.registerService(Parser.class.getName(), parser, new Properties());
}
Also used : DefaultDetector(org.apache.tika.detect.DefaultDetector) BundleActivator(org.osgi.framework.BundleActivator) Properties(java.util.Properties) Parser(org.apache.tika.parser.Parser) DefaultParser(org.apache.tika.parser.DefaultParser) DefaultParser(org.apache.tika.parser.DefaultParser)

Example 8 with DefaultParser

use of org.apache.tika.parser.DefaultParser in project tika by apache.

the class TesseractOCRParserTest method offersTypesIfFound.

/*
    If Tesseract is found, test we retrieve the proper number of supporting Parsers.
     */
@Test
public void offersTypesIfFound() throws Exception {
    TesseractOCRParser parser = new TesseractOCRParser();
    DefaultParser defaultParser = new DefaultParser();
    ParseContext parseContext = new ParseContext();
    MediaType png = MediaType.image("png");
    // Assuming that Tesseract is on the path, we should find 5 Parsers that support PNG.
    assumeTrue(canRun());
    assertEquals(8, parser.getSupportedTypes(parseContext).size());
    assertTrue(parser.getSupportedTypes(parseContext).contains(png));
    // DefaultParser will now select the TesseractOCRParser.
    assertEquals(TesseractOCRParser.class, defaultParser.getParsers(parseContext).get(png).getClass());
}
Also used : ParseContext(org.apache.tika.parser.ParseContext) MediaType(org.apache.tika.mime.MediaType) DefaultParser(org.apache.tika.parser.DefaultParser) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 9 with DefaultParser

use of org.apache.tika.parser.DefaultParser in project tika by apache.

the class ServiceLoaderUtilsTest method testOrdering.

@Test
public void testOrdering() throws Exception {
    //make sure that non Tika parsers come last
    //which means that they'll overwrite Tika parsers and
    //be preferred.
    DefaultParser defaultParser = new DefaultParser();
    int vorbisIndex = -1;
    int fictIndex = -1;
    int dcxmlIndex = -1;
    int i = 0;
    for (Parser p : defaultParser.getAllComponentParsers()) {
        if ("class org.gagravarr.tika.VorbisParser".equals(p.getClass().toString())) {
            vorbisIndex = i;
        }
        if ("class org.apache.tika.parser.xml.FictionBookParser".equals(p.getClass().toString())) {
            fictIndex = i;
        }
        if ("class org.apache.tika.parser.xml.DcXMLParser".equals(p.getClass().toString())) {
            dcxmlIndex = i;
        }
        i++;
    }
    assertNotEquals(vorbisIndex, fictIndex);
    assertNotEquals(fictIndex, dcxmlIndex);
    assertTrue(vorbisIndex > fictIndex);
    assertTrue(fictIndex > dcxmlIndex);
}
Also used : DefaultParser(org.apache.tika.parser.DefaultParser) Parser(org.apache.tika.parser.Parser) DefaultParser(org.apache.tika.parser.DefaultParser) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 10 with DefaultParser

use of org.apache.tika.parser.DefaultParser in project tika by apache.

the class TikaConfigSerializer method addParser.

private static void addParser(Mode mode, Element rootElement, Document doc, Parser parser) throws Exception {
    // If the parser is decorated, is it a kind where we output the parser inside?
    ParserDecorator decoration = null;
    if (parser instanceof ParserDecorator) {
        if (parser.getClass().getName().startsWith(ParserDecorator.class.getName() + "$")) {
            decoration = ((ParserDecorator) parser);
            parser = decoration.getWrappedParser();
        }
    }
    boolean outputParser = true;
    List<Parser> children = Collections.emptyList();
    if (mode == Mode.CURRENT && parser instanceof DefaultParser) {
    // Only output the parser, not the children
    } else if (parser instanceof CompositeParser) {
        children = ((CompositeParser) parser).getAllComponentParsers();
        // Special case for a naked composite
        if (parser.getClass().equals(CompositeParser.class)) {
            outputParser = false;
        }
        // Special case for making Default to static
        if (parser instanceof DefaultParser && (mode == Mode.STATIC || mode == Mode.STATIC_FULL)) {
            outputParser = false;
        }
    }
    if (outputParser) {
        rootElement = addParser(mode, rootElement, doc, parser, decoration);
    }
    for (Parser childParser : children) {
        addParser(mode, rootElement, doc, childParser);
    }
// TODO Parser Exclusions
}
Also used : CompositeParser(org.apache.tika.parser.CompositeParser) ParserDecorator(org.apache.tika.parser.ParserDecorator) Parser(org.apache.tika.parser.Parser) CompositeParser(org.apache.tika.parser.CompositeParser) DefaultParser(org.apache.tika.parser.DefaultParser) DefaultParser(org.apache.tika.parser.DefaultParser)

Aggregations

DefaultParser (org.apache.tika.parser.DefaultParser)12 Parser (org.apache.tika.parser.Parser)10 Test (org.junit.Test)8 CompositeParser (org.apache.tika.parser.CompositeParser)7 MediaType (org.apache.tika.mime.MediaType)5 EmptyParser (org.apache.tika.parser.EmptyParser)4 ParserDecorator (org.apache.tika.parser.ParserDecorator)4 TikaTest (org.apache.tika.TikaTest)3 ParseContext (org.apache.tika.parser.ParseContext)3 ExecutableParser (org.apache.tika.parser.executable.ExecutableParser)3 XMLParser (org.apache.tika.parser.xml.XMLParser)3 TikaException (org.apache.tika.exception.TikaException)2 AutoDetectParser (org.apache.tika.parser.AutoDetectParser)2 InputStream (java.io.InputStream)1 StringWriter (java.io.StringWriter)1 HashSet (java.util.HashSet)1 Properties (java.util.Properties)1 SolrException (org.apache.solr.common.SolrException)1 NamedList (org.apache.solr.common.util.NamedList)1 TikaConfig (org.apache.tika.config.TikaConfig)1