use of org.xml.sax.helpers.DefaultHandler in project tika by apache.
the class WebPParserTest method testSimple.
/*
Two photos in test-documents (testWebp_Alpha_Lossy.webp and testWebp_Alpha_Lossless.webp)
are in the public domain. These files were retrieved from:
https://github.com/drewnoakes/metadata-extractor-images/tree/master/webp
These photos are also available here:
https://developers.google.com/speed/webp/gallery2#webp_links
Credits for the photo:
"Free Stock Photo in High Resolution - Yellow Rose 3 - Flowers"
Image Author: Jon Sullivan
*/
@Test
public void testSimple() throws Exception {
Metadata metadata = new Metadata();
InputStream stream = getClass().getResourceAsStream("/test-documents/testWebp_Alpha_Lossy.webp");
parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
assertEquals("301", metadata.get("Image Height"));
assertEquals("400", metadata.get("Image Width"));
assertEquals("true", metadata.get("Has Alpha"));
assertEquals("false", metadata.get("Is Animation"));
assertEquals("image/webp", metadata.get(Metadata.CONTENT_TYPE));
IOUtils.closeQuietly(stream);
metadata = new Metadata();
stream = getClass().getResourceAsStream("/test-documents/testWebp_Alpha_Lossless.webp");
parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
//unfortunately, there isn't much metadata in lossless
assertEquals("image/webp", metadata.get(Metadata.CONTENT_TYPE));
}
use of org.xml.sax.helpers.DefaultHandler in project tika by apache.
the class ICNSParserTest method testICNS.
/**
* Tests a file with multiple icons and masks
*/
@Test
public void testICNS() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/icns");
metadata.set("Icons count", "2");
metadata.set("Icons details", "16x16 (24 bpp), 32x32 (24 bpp)");
metadata.set("Masked icon count", "2");
metadata.set("Masked icon details", "16x16 (8 bpp), 32x32 (8 bpp)");
InputStream stream = getClass().getResourceAsStream("/test-documents/testICNS.icns");
parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
}
use of org.xml.sax.helpers.DefaultHandler in project tika by apache.
the class ImageParserTest method testBMP.
@Test
public void testBMP() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/bmp");
InputStream stream = getClass().getResourceAsStream("/test-documents/testBMP.bmp");
parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
assertEquals("75", metadata.get("height"));
assertEquals("100", metadata.get("width"));
assertEquals("8 8 8", metadata.get("Data BitsPerSample"));
assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
//TODO: figure out why we're getting 0.35273367 in Ubuntu, but not Windows
//assertEquals("0", metadata.get("Dimension VerticalPhysicalPixelSpacing"));
//assertEquals("0", metadata.get("Dimension HorizontalPhysicalPixelSpacing"));
assertEquals("BI_RGB", metadata.get("Compression CompressionTypeName"));
assertEquals("image/bmp", metadata.get("Content-Type"));
assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
assertEquals("8 8 8", metadata.get(Metadata.BITS_PER_SAMPLE));
}
use of org.xml.sax.helpers.DefaultHandler in project tika by apache.
the class ImageParserTest method testGIF.
@Test
public void testGIF() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/gif");
InputStream stream = getClass().getResourceAsStream("/test-documents/testGIF.gif");
parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
assertEquals("75", metadata.get("height"));
assertEquals("100", metadata.get("width"));
assertEquals("true", metadata.get("Compression Lossless"));
assertEquals("Normal", metadata.get("Dimension ImageOrientation"));
assertEquals("lzw", metadata.get("Compression CompressionTypeName"));
assertEquals("0", metadata.get("Dimension HorizontalPixelOffset"));
assertEquals("imageLeftPosition=0, imageTopPosition=0, imageWidth=100, imageHeight=75, interlaceFlag=false", metadata.get("ImageDescriptor"));
assertEquals("Index", metadata.get("Data SampleFormat"));
assertEquals("3", metadata.get("Chroma NumChannels"));
assertEquals("1", metadata.get("Compression NumProgressiveScans"));
assertEquals("RGB", metadata.get("Chroma ColorSpaceType"));
assertEquals("Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get("CommentExtensions CommentExtension"));
assertEquals("value=Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership., encoding=ISO-8859-1, compression=none", metadata.get("Text TextEntry"));
assertEquals("true", metadata.get("Chroma BlackIsZero"));
assertEquals("disposalMethod=none, userInputFlag=false, transparentColorFlag=false, delayTime=0, transparentColorIndex=0", metadata.get("GraphicControlExtension"));
assertEquals("0", metadata.get("Dimension VerticalPixelOffset"));
assertEquals("image/gif", metadata.get("Content-Type"));
assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
assertEquals("Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get(TikaCoreProperties.COMMENTS));
}
use of org.xml.sax.helpers.DefaultHandler in project tika by apache.
the class HtmlParserTest method assertScriptLink.
private void assertScriptLink(String html, String url) throws Exception {
// IdentityHtmlMapper is needed to extract <script> tags
ParseContext context = new ParseContext();
context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "text/html");
final List<String> links = new ArrayList<String>();
new HtmlParser().parse(new ByteArrayInputStream(html.getBytes(UTF_8)), new DefaultHandler() {
@Override
public void startElement(String u, String l, String name, Attributes atts) {
if (name.equals("script") && atts.getValue("", "src") != null) {
links.add(atts.getValue("", "src"));
}
}
}, metadata, context);
assertEquals(1, links.size());
assertEquals(url, links.get(0));
}
Aggregations