Search in sources :

Example 56 with DefaultHandler

use of org.xml.sax.helpers.DefaultHandler in project tika by apache.

the class BPGParserTest method testBPG.

/**
     * Tests a very basic file, without much metadata
     */
@Test
public void testBPG() throws Exception {
    Metadata metadata = new Metadata();
    metadata.set(Metadata.CONTENT_TYPE, "image/x-bpg");
    InputStream stream = getClass().getResourceAsStream("/test-documents/testBPG.bpg");
    parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
    assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
    assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
    assertEquals("10", metadata.get(Metadata.BITS_PER_SAMPLE));
    assertEquals("YCbCr Colour", metadata.get(Photoshop.COLOR_MODE));
}
Also used : InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) DefaultHandler(org.xml.sax.helpers.DefaultHandler) Test(org.junit.Test)

Example 57 with DefaultHandler

use of org.xml.sax.helpers.DefaultHandler in project tika by apache.

the class JpegParserTest method testJPEGoddTagComponent.

@Test
public void testJPEGoddTagComponent() throws Exception {
    Metadata metadata = new Metadata();
    metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
    InputStream stream = getClass().getResourceAsStream("/test-documents/testJPEG_oddTagComponent.jpg");
    parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
    assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
    assertEquals(null, metadata.get(TikaCoreProperties.DESCRIPTION));
    assertEquals("251", metadata.get(Metadata.IMAGE_WIDTH));
    assertEquals("384", metadata.get(Metadata.IMAGE_LENGTH));
}
Also used : InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) DefaultHandler(org.xml.sax.helpers.DefaultHandler) Test(org.junit.Test)

Example 58 with DefaultHandler

use of org.xml.sax.helpers.DefaultHandler in project tika by apache.

the class JpegParserTest method testJPEGEmptyEXIFDateTime.

@Test
public void testJPEGEmptyEXIFDateTime() throws Exception {
    Metadata metadata = new Metadata();
    metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
    InputStream stream = getClass().getResourceAsStream("/test-documents/testJPEG_EXIF_emptyDateTime.jpg");
    parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
    assertEquals("300.0", metadata.get(TIFF.RESOLUTION_HORIZONTAL));
    assertEquals("300.0", metadata.get(TIFF.RESOLUTION_VERTICAL));
}
Also used : InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) DefaultHandler(org.xml.sax.helpers.DefaultHandler) Test(org.junit.Test)

Example 59 with DefaultHandler

use of org.xml.sax.helpers.DefaultHandler in project tika by apache.

the class JpegParserTest method testJPEGGeo2.

/**
     * Test for an image with the geographic information stored in a slightly
     * different way, see TIKA-915 for details
     * Disabled for now, pending a fix to the underlying library
     */
@Test
public void testJPEGGeo2() throws Exception {
    Metadata metadata = new Metadata();
    metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
    InputStream stream = getClass().getResourceAsStream("/test-documents/testJPEG_GEO_2.jpg");
    parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
    // Geo tags should be there with 5dp, and not rounded
    assertEquals("51.575762", metadata.get(Metadata.LATITUDE));
    assertEquals("-1.567886", metadata.get(Metadata.LONGITUDE));
}
Also used : InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) DefaultHandler(org.xml.sax.helpers.DefaultHandler) Test(org.junit.Test)

Example 60 with DefaultHandler

use of org.xml.sax.helpers.DefaultHandler in project tika by apache.

the class TiffParserTest method testTIFF.

@Test
public void testTIFF() throws Exception {
    Metadata metadata = new Metadata();
    metadata.set(Metadata.CONTENT_TYPE, "image/tiff");
    InputStream stream = getClass().getResourceAsStream("/test-documents/testTIFF.tif");
    parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
    assertEquals("Licensed to the Apache Software Foundation (ASF) under one or " + "more contributor license agreements.  See the NOTICE file " + "distributed with this work for additional information regarding " + "copyright ownership.", metadata.get(TikaCoreProperties.DESCRIPTION));
    // All EXIF/TIFF tags
    assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
    // Core EXIF/TIFF tags
    assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
    assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
    assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
    assertEquals("3", metadata.get(Metadata.SAMPLES_PER_PIXEL));
    // Embedded XMP
    List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS));
    assertTrue("got " + keywords, keywords.contains("cat"));
    assertTrue("got " + keywords, keywords.contains("garden"));
    List<String> subject = Arrays.asList(metadata.getValues(Metadata.SUBJECT));
    assertTrue("got " + subject, subject.contains("cat"));
    assertTrue("got " + subject, subject.contains("garden"));
}
Also used : InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) DefaultHandler(org.xml.sax.helpers.DefaultHandler) Test(org.junit.Test)

Aggregations

DefaultHandler (org.xml.sax.helpers.DefaultHandler)148 InputStream (java.io.InputStream)65 Metadata (org.apache.tika.metadata.Metadata)59 ParseContext (org.apache.tika.parser.ParseContext)52 Test (org.junit.Test)44 Attributes (org.xml.sax.Attributes)41 SAXParser (javax.xml.parsers.SAXParser)40 SAXException (org.xml.sax.SAXException)39 ByteArrayInputStream (java.io.ByteArrayInputStream)32 SAXParserFactory (javax.xml.parsers.SAXParserFactory)29 IOException (java.io.IOException)26 InputSource (org.xml.sax.InputSource)23 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)22 Parser (org.apache.tika.parser.Parser)22 TikaInputStream (org.apache.tika.io.TikaInputStream)20 ContentHandler (org.xml.sax.ContentHandler)20 File (java.io.File)19 AutoDetectParser (org.apache.tika.parser.AutoDetectParser)17 BodyContentHandler (org.apache.tika.sax.BodyContentHandler)16 FileInputStream (java.io.FileInputStream)15