Search in sources :

Example 56 with Metadata

use of org.apache.tika.metadata.Metadata in project tika by apache.

the class AudioParserTest method testAU.

@Test
public void testAU() throws Exception {
    String path = "/test-documents/testAU.au";
    Metadata metadata = new Metadata();
    String content = new Tika().parseToString(AudioParserTest.class.getResourceAsStream(path), metadata);
    assertEquals("audio/basic", metadata.get(Metadata.CONTENT_TYPE));
    assertEquals("44100.0", metadata.get("samplerate"));
    assertEquals("2", metadata.get("channels"));
    assertEquals("16", metadata.get("bits"));
    assertEquals("PCM_SIGNED", metadata.get("encoding"));
    assertEquals("", content);
}
Also used : Metadata(org.apache.tika.metadata.Metadata) Tika(org.apache.tika.Tika) Test(org.junit.Test)

Example 57 with Metadata

use of org.apache.tika.metadata.Metadata in project tika by apache.

the class MidiParserTest method testMID.

@Test
public void testMID() throws Exception {
    String path = "/test-documents/testMID.mid";
    Metadata metadata = new Metadata();
    String content = new Tika().parseToString(MidiParserTest.class.getResourceAsStream(path), metadata);
    assertEquals("audio/midi", metadata.get(Metadata.CONTENT_TYPE));
    assertEquals("2", metadata.get("tracks"));
    assertEquals("0", metadata.get("patches"));
    assertEquals("PPQ", metadata.get("divisionType"));
    assertContains("Untitled", content);
}
Also used : Metadata(org.apache.tika.metadata.Metadata) Tika(org.apache.tika.Tika) Test(org.junit.Test)

Example 58 with Metadata

use of org.apache.tika.metadata.Metadata in project tika by apache.

the class DWGParserTest method testParser.

@SuppressWarnings("deprecation")
private void testParser(InputStream input) throws Exception {
    try {
        Metadata metadata = new Metadata();
        ContentHandler handler = new BodyContentHandler();
        new DWGParser().parse(input, handler, metadata);
        assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));
        assertEquals("The quick brown fox jumps over the lazy dog", metadata.get(TikaCoreProperties.TITLE));
        assertEquals("Gym class featuring a brown fox and lazy dog", metadata.get(TikaCoreProperties.DESCRIPTION));
        assertEquals("Gym class featuring a brown fox and lazy dog", metadata.get(Metadata.SUBJECT));
        assertEquals("Nevin Nollop", metadata.get(TikaCoreProperties.CREATOR));
        assertEquals("Pangram, fox, dog", metadata.get(TikaCoreProperties.KEYWORDS));
        assertEquals("Lorem ipsum", metadata.get(TikaCoreProperties.COMMENTS).substring(0, 11));
        assertEquals("http://www.alfresco.com", metadata.get(TikaCoreProperties.RELATION));
        // Check some of the old style metadata too
        assertEquals("The quick brown fox jumps over the lazy dog", metadata.get(Metadata.TITLE));
        assertEquals("Gym class featuring a brown fox and lazy dog", metadata.get(Metadata.SUBJECT));
        String content = handler.toString();
        assertContains("The quick brown fox jumps over the lazy dog", content);
        assertContains("Gym class", content);
        assertContains("www.alfresco.com", content);
    } finally {
        input.close();
    }
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) Metadata(org.apache.tika.metadata.Metadata) BodyContentHandler(org.apache.tika.sax.BodyContentHandler) ContentHandler(org.xml.sax.ContentHandler)

Example 59 with Metadata

use of org.apache.tika.metadata.Metadata in project tika by apache.

the class EnviHeaderParserTest method testParseGlobalMetadata.

@Test
public void testParseGlobalMetadata() throws Exception {
    if (System.getProperty("java.version").startsWith("1.5")) {
        return;
    }
    Parser parser = new EnviHeaderParser();
    ToXMLContentHandler handler = new ToXMLContentHandler();
    Metadata metadata = new Metadata();
    try (InputStream stream = EnviHeaderParser.class.getResourceAsStream("/test-documents/envi_test_header.hdr")) {
        assertNotNull("Test ENVI file not found", stream);
        parser.parse(stream, handler, metadata, new ParseContext());
    }
    // Check content of test file
    String content = handler.toString();
    assertContains("<body><p>ENVI</p>", content);
    assertContains("<p>samples = 2400</p>", content);
    assertContains("<p>lines   = 2400</p>", content);
    assertContains("<p>map info = {Sinusoidal, 1.5000, 1.5000, -10007091.3643, 5559289.2856, 4.6331271653e+02, 4.6331271653e+02, , units=Meters}</p>", content);
    assertContains("content=\"application/envi.hdr\"", content);
    assertContains("projection info = {16, 6371007.2, 0.000000, 0.0, 0.0, Sinusoidal, units=Meters}", content);
}
Also used : ToXMLContentHandler(org.apache.tika.sax.ToXMLContentHandler) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) Parser(org.apache.tika.parser.Parser) Test(org.junit.Test)

Example 60 with Metadata

use of org.apache.tika.metadata.Metadata in project tika by apache.

the class DWGParserTest method testDWG2010CustomPropertiesParser.

@Test
public void testDWG2010CustomPropertiesParser() throws Exception {
    // Check that standard parsing works
    InputStream testInput = DWGParserTest.class.getResourceAsStream("/test-documents/testDWG2010_custom_props.dwg");
    testParser(testInput);
    // Check that custom properties with alternate padding work
    try (InputStream input = DWGParserTest.class.getResourceAsStream("/test-documents/testDWG2010_custom_props.dwg")) {
        Metadata metadata = new Metadata();
        ContentHandler handler = new BodyContentHandler();
        new DWGParser().parse(input, handler, metadata, null);
        assertEquals("valueforcustomprop1", metadata.get("customprop1"));
        assertEquals("valueforcustomprop2", metadata.get("customprop2"));
    }
}
Also used : BodyContentHandler(org.apache.tika.sax.BodyContentHandler) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) BodyContentHandler(org.apache.tika.sax.BodyContentHandler) ContentHandler(org.xml.sax.ContentHandler) Test(org.junit.Test)

Aggregations

Metadata (org.apache.tika.metadata.Metadata)651 Test (org.junit.Test)467 InputStream (java.io.InputStream)320 ParseContext (org.apache.tika.parser.ParseContext)283 BodyContentHandler (org.apache.tika.sax.BodyContentHandler)269 TikaTest (org.apache.tika.TikaTest)257 ContentHandler (org.xml.sax.ContentHandler)229 AutoDetectParser (org.apache.tika.parser.AutoDetectParser)154 ByteArrayInputStream (java.io.ByteArrayInputStream)143 Parser (org.apache.tika.parser.Parser)136 TikaInputStream (org.apache.tika.io.TikaInputStream)133 IOException (java.io.IOException)66 DefaultHandler (org.xml.sax.helpers.DefaultHandler)59 TikaException (org.apache.tika.exception.TikaException)48 ExcelParserTest (org.apache.tika.parser.microsoft.ExcelParserTest)36 WordParserTest (org.apache.tika.parser.microsoft.WordParserTest)36 StringWriter (java.io.StringWriter)33 Tika (org.apache.tika.Tika)29 MediaType (org.apache.tika.mime.MediaType)29 SAXException (org.xml.sax.SAXException)29