Search in sources :

Example 16 with MediaType

use of org.apache.tika.mime.MediaType in project tika by apache.

the class MediaTypeExample method main.

public static void main(String[] args) throws Exception {
    MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
    MediaType type = MediaType.parse("image/svg+xml");
    while (type != null) {
        System.out.println(type);
        type = registry.getSupertype(type);
    }
}
Also used : MediaType(org.apache.tika.mime.MediaType) MediaTypeRegistry(org.apache.tika.mime.MediaTypeRegistry)

Example 17 with MediaType

use of org.apache.tika.mime.MediaType in project tika by apache.

the class MediaTypeExample method listAllTypes.

public static void listAllTypes() {
    MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
    for (MediaType type : registry.getTypes()) {
        Set<MediaType> aliases = registry.getAliases(type);
        System.out.println(type + ", also known as " + aliases);
    }
}
Also used : MediaType(org.apache.tika.mime.MediaType) MediaTypeRegistry(org.apache.tika.mime.MediaTypeRegistry)

Example 18 with MediaType

use of org.apache.tika.mime.MediaType in project tika by apache.

the class SourceCodeParserTest method testSupportTypes.

@Test
public void testSupportTypes() throws Exception {
    Set<MediaType> supportedTypes = sourceCodeParser.getSupportedTypes(new ParseContext());
    assertTrue(supportedTypes.contains(new MediaType("text", "x-java-source")));
    assertTrue(supportedTypes.contains(new MediaType("text", "x-groovy")));
    assertTrue(supportedTypes.contains(new MediaType("text", "x-c++src")));
    assertFalse(sourceCodeParser.getSupportedTypes(new ParseContext()).contains(new MediaType("text", "html")));
}
Also used : ParseContext(org.apache.tika.parser.ParseContext) MediaType(org.apache.tika.mime.MediaType) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 19 with MediaType

use of org.apache.tika.mime.MediaType in project tika by apache.

the class ForkParserIntegrationTest method testParserHandlingOfNonSerializable.

/**
     * If we supply a non serializable object on the ParseContext,
     *  check we get a helpful exception back
     */
@Test
public void testParserHandlingOfNonSerializable() throws Exception {
    ForkParser parser = new ForkParser(ForkParserIntegrationTest.class.getClassLoader(), tika.getParser());
    ParseContext context = new ParseContext();
    context.set(Detector.class, new Detector() {

        public MediaType detect(InputStream input, Metadata metadata) {
            return MediaType.OCTET_STREAM;
        }
    });
    try {
        ContentHandler output = new BodyContentHandler();
        InputStream stream = ForkParserIntegrationTest.class.getResourceAsStream("/test-documents/testTXT.txt");
        parser.parse(stream, output, new Metadata(), context);
        fail("Should have blown up with a non serializable ParseContext");
    } catch (TikaException e) {
        // Check the right details
        assertNotNull(e.getCause());
        assertEquals(NotSerializableException.class, e.getCause().getClass());
        assertEquals("Unable to serialize ParseContext to pass to the Forked Parser", e.getMessage());
    } finally {
        parser.close();
    }
}
Also used : ForkParser(org.apache.tika.fork.ForkParser) BodyContentHandler(org.apache.tika.sax.BodyContentHandler) NotSerializableException(java.io.NotSerializableException) Detector(org.apache.tika.detect.Detector) TikaException(org.apache.tika.exception.TikaException) InputStream(java.io.InputStream) ParseContext(org.apache.tika.parser.ParseContext) Metadata(org.apache.tika.metadata.Metadata) MediaType(org.apache.tika.mime.MediaType) BodyContentHandler(org.apache.tika.sax.BodyContentHandler) ContentHandler(org.xml.sax.ContentHandler) Test(org.junit.Test)

Example 20 with MediaType

use of org.apache.tika.mime.MediaType in project tika by apache.

the class TikaParsers method parserAsHTML.

private void parserAsHTML(ParserDetails p, boolean withMimeTypes, StringBuffer html, int level) {
    html.append("<h");
    html.append(level);
    html.append(">");
    html.append(p.shortName);
    html.append("</h");
    html.append(level);
    html.append(">");
    html.append("<p>Class: ");
    html.append(p.className);
    html.append("</p>");
    if (p.isDecorated) {
        html.append("<p>Decorated Parser");
        if (p.decoratedBy != null)
            html.append(" - ").append(p.decoratedBy);
        html.append("</p>");
    }
    if (p.isComposite) {
        html.append("<p>Composite Parser</p>");
        html.append("<div style=\"margin-left: 1em\">\n");
        for (Parser cp : p.childParsers) {
            parserAsHTML(new ParserDetails(cp), withMimeTypes, html, level + 1);
        }
        html.append("</div>\n");
    } else if (withMimeTypes) {
        html.append("<p>Mime Types:");
        html.append("<ul>");
        for (MediaType mt : p.supportedTypes) {
            html.append("<li>");
            html.append(mt.toString());
            html.append("</li>");
        }
        html.append("</ul>");
        html.append("</p>");
    }
    html.append("\n");
}
Also used : MediaType(org.apache.tika.mime.MediaType) Parser(org.apache.tika.parser.Parser) CompositeParser(org.apache.tika.parser.CompositeParser)

Aggregations

MediaType (org.apache.tika.mime.MediaType)88 Test (org.junit.Test)28 Metadata (org.apache.tika.metadata.Metadata)27 InputStream (java.io.InputStream)23 TikaInputStream (org.apache.tika.io.TikaInputStream)17 Parser (org.apache.tika.parser.Parser)17 ParseContext (org.apache.tika.parser.ParseContext)16 IOException (java.io.IOException)15 TikaException (org.apache.tika.exception.TikaException)13 CompositeParser (org.apache.tika.parser.CompositeParser)13 ContentHandler (org.xml.sax.ContentHandler)13 AutoDetectParser (org.apache.tika.parser.AutoDetectParser)12 BodyContentHandler (org.apache.tika.sax.BodyContentHandler)12 TikaTest (org.apache.tika.TikaTest)10 Detector (org.apache.tika.detect.Detector)10 HashSet (java.util.HashSet)8 ByteArrayInputStream (java.io.ByteArrayInputStream)7 TikaConfig (org.apache.tika.config.TikaConfig)7 MediaTypeRegistry (org.apache.tika.mime.MediaTypeRegistry)7 ArrayList (java.util.ArrayList)6