Search in sources :

Example 61 with DefaultHandler

use of org.xml.sax.helpers.DefaultHandler in project tika by apache.

the class WebPParserTest method testSimple.

/*
        Two photos in test-documents (testWebp_Alpha_Lossy.webp and testWebp_Alpha_Lossless.webp)
        are in the public domain.  These files were retrieved from:
        https://github.com/drewnoakes/metadata-extractor-images/tree/master/webp
        These photos are also available here:
        https://developers.google.com/speed/webp/gallery2#webp_links
        Credits for the photo:
        "Free Stock Photo in High Resolution - Yellow Rose 3 - Flowers"
        Image Author: Jon Sullivan
     */
@Test
public void testSimple() throws Exception {
    Metadata metadata = new Metadata();
    InputStream stream = getClass().getResourceAsStream("/test-documents/testWebp_Alpha_Lossy.webp");
    parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
    assertEquals("301", metadata.get("Image Height"));
    assertEquals("400", metadata.get("Image Width"));
    assertEquals("true", metadata.get("Has Alpha"));
    assertEquals("false", metadata.get("Is Animation"));
    assertEquals("image/webp", metadata.get(Metadata.CONTENT_TYPE));
    IOUtils.closeQuietly(stream);
    metadata = new Metadata();
    stream = getClass().getResourceAsStream("/test-documents/testWebp_Alpha_Lossless.webp");
    parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
    //unfortunately, there isn't much metadata in lossless
    assertEquals("image/webp", metadata.get(Metadata.CONTENT_TYPE));
}
Also used : InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) DefaultHandler(org.xml.sax.helpers.DefaultHandler) Test(org.junit.Test)

Example 62 with DefaultHandler

use of org.xml.sax.helpers.DefaultHandler in project tika by apache.

the class ICNSParserTest method testICNS.

/**
     * Tests a file with multiple icons and masks
     */
@Test
public void testICNS() throws Exception {
    Metadata metadata = new Metadata();
    metadata.set(Metadata.CONTENT_TYPE, "image/icns");
    metadata.set("Icons count", "2");
    metadata.set("Icons details", "16x16 (24 bpp), 32x32 (24 bpp)");
    metadata.set("Masked icon count", "2");
    metadata.set("Masked icon details", "16x16 (8 bpp), 32x32 (8 bpp)");
    InputStream stream = getClass().getResourceAsStream("/test-documents/testICNS.icns");
    parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
}
Also used : InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) DefaultHandler(org.xml.sax.helpers.DefaultHandler) Test(org.junit.Test)

Example 63 with DefaultHandler

use of org.xml.sax.helpers.DefaultHandler in project tika by apache.

the class ImageParserTest method testBMP.

@Test
public void testBMP() throws Exception {
    Metadata metadata = new Metadata();
    metadata.set(Metadata.CONTENT_TYPE, "image/bmp");
    InputStream stream = getClass().getResourceAsStream("/test-documents/testBMP.bmp");
    parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
    assertEquals("75", metadata.get("height"));
    assertEquals("100", metadata.get("width"));
    assertEquals("8 8 8", metadata.get("Data BitsPerSample"));
    assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
    //TODO: figure out why we're getting 0.35273367 in Ubuntu, but not Windows
    //assertEquals("0", metadata.get("Dimension VerticalPhysicalPixelSpacing"));
    //assertEquals("0", metadata.get("Dimension HorizontalPhysicalPixelSpacing"));
    assertEquals("BI_RGB", metadata.get("Compression CompressionTypeName"));
    assertEquals("image/bmp", metadata.get("Content-Type"));
    assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
    assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
    assertEquals("8 8 8", metadata.get(Metadata.BITS_PER_SAMPLE));
}
Also used : InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) DefaultHandler(org.xml.sax.helpers.DefaultHandler) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 64 with DefaultHandler

use of org.xml.sax.helpers.DefaultHandler in project tika by apache.

the class ImageParserTest method testGIF.

@Test
public void testGIF() throws Exception {
    Metadata metadata = new Metadata();
    metadata.set(Metadata.CONTENT_TYPE, "image/gif");
    InputStream stream = getClass().getResourceAsStream("/test-documents/testGIF.gif");
    parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
    assertEquals("75", metadata.get("height"));
    assertEquals("100", metadata.get("width"));
    assertEquals("true", metadata.get("Compression Lossless"));
    assertEquals("Normal", metadata.get("Dimension ImageOrientation"));
    assertEquals("lzw", metadata.get("Compression CompressionTypeName"));
    assertEquals("0", metadata.get("Dimension HorizontalPixelOffset"));
    assertEquals("imageLeftPosition=0, imageTopPosition=0, imageWidth=100, imageHeight=75, interlaceFlag=false", metadata.get("ImageDescriptor"));
    assertEquals("Index", metadata.get("Data SampleFormat"));
    assertEquals("3", metadata.get("Chroma NumChannels"));
    assertEquals("1", metadata.get("Compression NumProgressiveScans"));
    assertEquals("RGB", metadata.get("Chroma ColorSpaceType"));
    assertEquals("Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get("CommentExtensions CommentExtension"));
    assertEquals("value=Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file distributed with this work for additional information regarding copyright ownership., encoding=ISO-8859-1, compression=none", metadata.get("Text TextEntry"));
    assertEquals("true", metadata.get("Chroma BlackIsZero"));
    assertEquals("disposalMethod=none, userInputFlag=false, transparentColorFlag=false, delayTime=0, transparentColorIndex=0", metadata.get("GraphicControlExtension"));
    assertEquals("0", metadata.get("Dimension VerticalPixelOffset"));
    assertEquals("image/gif", metadata.get("Content-Type"));
    assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
    assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
    assertEquals("Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get(TikaCoreProperties.COMMENTS));
}
Also used : InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) DefaultHandler(org.xml.sax.helpers.DefaultHandler) Test(org.junit.Test) TikaTest(org.apache.tika.TikaTest)

Example 65 with DefaultHandler

use of org.xml.sax.helpers.DefaultHandler in project tika by apache.

the class HtmlParserTest method assertScriptLink.

private void assertScriptLink(String html, String url) throws Exception {
    // IdentityHtmlMapper is needed to extract <script> tags
    ParseContext context = new ParseContext();
    context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
    Metadata metadata = new Metadata();
    metadata.set(Metadata.CONTENT_TYPE, "text/html");
    final List<String> links = new ArrayList<String>();
    new HtmlParser().parse(new ByteArrayInputStream(html.getBytes(UTF_8)), new DefaultHandler() {

        @Override
        public void startElement(String u, String l, String name, Attributes atts) {
            if (name.equals("script") && atts.getValue("", "src") != null) {
                links.add(atts.getValue("", "src"));
            }
        }
    }, metadata, context);
    assertEquals(1, links.size());
    assertEquals(url, links.get(0));
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) ParseContext(org.apache.tika.parser.ParseContext) Metadata(org.apache.tika.metadata.Metadata) ArrayList(java.util.ArrayList) Attributes(org.xml.sax.Attributes) DefaultHandler(org.xml.sax.helpers.DefaultHandler)

Aggregations

DefaultHandler (org.xml.sax.helpers.DefaultHandler)148 InputStream (java.io.InputStream)65 Metadata (org.apache.tika.metadata.Metadata)59 ParseContext (org.apache.tika.parser.ParseContext)52 Test (org.junit.Test)44 Attributes (org.xml.sax.Attributes)41 SAXParser (javax.xml.parsers.SAXParser)40 SAXException (org.xml.sax.SAXException)39 ByteArrayInputStream (java.io.ByteArrayInputStream)32 SAXParserFactory (javax.xml.parsers.SAXParserFactory)29 IOException (java.io.IOException)26 InputSource (org.xml.sax.InputSource)23 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)22 Parser (org.apache.tika.parser.Parser)22 TikaInputStream (org.apache.tika.io.TikaInputStream)20 ContentHandler (org.xml.sax.ContentHandler)20 File (java.io.File)19 AutoDetectParser (org.apache.tika.parser.AutoDetectParser)17 BodyContentHandler (org.apache.tika.sax.BodyContentHandler)16 FileInputStream (java.io.FileInputStream)15