use of org.xml.sax.ContentHandler in project tika by apache.
the class FontParsersTest method testTTFParsing.
@Test
public void testTTFParsing() throws Exception {
// Should auto-detect!
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
try (TikaInputStream stream = TikaInputStream.get(FontParsersTest.class.getResource("/test-documents/testTrueType3.ttf"))) {
parser.parse(stream, handler, metadata, context);
}
assertEquals("application/x-font-ttf", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Open Sans Bold", metadata.get(TikaCoreProperties.TITLE));
assertEquals("2010-12-30T11:04:00Z", metadata.get(Metadata.CREATION_DATE));
assertEquals("2010-12-30T11:04:00Z", metadata.get(TikaCoreProperties.CREATED));
assertEquals("2011-05-05T12:37:53Z", metadata.get(TikaCoreProperties.MODIFIED));
assertEquals("Open Sans Bold", metadata.get(MET_FONT_NAME));
assertEquals("Open Sans", metadata.get(MET_FONT_FAMILY_NAME));
assertEquals("Bold", metadata.get(MET_FONT_SUB_FAMILY_NAME));
assertEquals("OpenSans-Bold", metadata.get(MET_PS_NAME));
assertEquals("Digitized", metadata.get("Copyright").substring(0, 9));
assertEquals("Open Sans", metadata.get("Trademark").substring(0, 9));
// Not extracted
assertEquals(null, metadata.get(MET_FONT_FULL_NAME));
assertEquals(null, metadata.get(MET_FONT_WEIGHT));
assertEquals(null, metadata.get(MET_FONT_VERSION));
// Currently, the parser doesn't extract any contents
String content = handler.toString();
assertEquals("", content);
}
use of org.xml.sax.ContentHandler in project tika by apache.
the class HDFParserTest method testHDF4.
@Test
public void testHDF4() throws Exception {
if (System.getProperty("java.version").startsWith("1.5")) {
return;
}
Parser parser = new HDFParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
/*
* this is a publicly available HDF4 file from the HD4 examples:
*
* http://www.hdfgroup.org/training/hdf4_chunking/Chunkit/bin/input54kmdata.hdf
*/
try (InputStream stream = HDFParser.class.getResourceAsStream("/test-documents/test.hdf")) {
parser.parse(stream, handler, metadata, new ParseContext());
}
assertNotNull(metadata);
assertEquals("Direct read of HDF4 file through CDM library", metadata.get("_History"));
assertEquals("Ascending", metadata.get("Pass"));
assertEquals("Hierarchical Data Format, version 4", metadata.get("File-Type-Description"));
}
use of org.xml.sax.ContentHandler in project tika by apache.
the class GribParserTest method testParseGlobalMetadata.
@Test
public void testParseGlobalMetadata() throws Exception {
Parser parser = new GribParser();
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
try (InputStream stream = GribParser.class.getResourceAsStream("/test-documents/gdas1.forecmwf.2014062612.grib2")) {
parser.parse(stream, handler, metadata, new ParseContext());
}
assertNotNull(metadata);
String content = handler.toString();
assertTrue(content.contains("dimensions:"));
assertTrue(content.contains("variables:"));
}
use of org.xml.sax.ContentHandler in project tika by apache.
the class DWGParserTest method testParser.
@SuppressWarnings("deprecation")
private void testParser(InputStream input) throws Exception {
try {
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
new DWGParser().parse(input, handler, metadata);
assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("The quick brown fox jumps over the lazy dog", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Gym class featuring a brown fox and lazy dog", metadata.get(TikaCoreProperties.DESCRIPTION));
assertEquals("Gym class featuring a brown fox and lazy dog", metadata.get(Metadata.SUBJECT));
assertEquals("Nevin Nollop", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("Pangram, fox, dog", metadata.get(TikaCoreProperties.KEYWORDS));
assertEquals("Lorem ipsum", metadata.get(TikaCoreProperties.COMMENTS).substring(0, 11));
assertEquals("http://www.alfresco.com", metadata.get(TikaCoreProperties.RELATION));
// Check some of the old style metadata too
assertEquals("The quick brown fox jumps over the lazy dog", metadata.get(Metadata.TITLE));
assertEquals("Gym class featuring a brown fox and lazy dog", metadata.get(Metadata.SUBJECT));
String content = handler.toString();
assertContains("The quick brown fox jumps over the lazy dog", content);
assertContains("Gym class", content);
assertContains("www.alfresco.com", content);
} finally {
input.close();
}
}
use of org.xml.sax.ContentHandler in project tika by apache.
the class DWGParserTest method testDWG2010CustomPropertiesParser.
@Test
public void testDWG2010CustomPropertiesParser() throws Exception {
// Check that standard parsing works
InputStream testInput = DWGParserTest.class.getResourceAsStream("/test-documents/testDWG2010_custom_props.dwg");
testParser(testInput);
// Check that custom properties with alternate padding work
try (InputStream input = DWGParserTest.class.getResourceAsStream("/test-documents/testDWG2010_custom_props.dwg")) {
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
new DWGParser().parse(input, handler, metadata, null);
assertEquals("valueforcustomprop1", metadata.get("customprop1"));
assertEquals("valueforcustomprop2", metadata.get("customprop2"));
}
}
Aggregations