use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class GribParserTest method testParseGlobalMetadata.
@Test
public void testParseGlobalMetadata() throws Exception {
Parser parser = new GribParser();
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
try (InputStream stream = GribParser.class.getResourceAsStream("/test-documents/gdas1.forecmwf.2014062612.grib2")) {
parser.parse(stream, handler, metadata, new ParseContext());
}
assertNotNull(metadata);
String content = handler.toString();
assertTrue(content.contains("dimensions:"));
assertTrue(content.contains("variables:"));
}
use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class DWGParserTest method testParser.
@SuppressWarnings("deprecation")
private void testParser(InputStream input) throws Exception {
try {
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
new DWGParser().parse(input, handler, metadata);
assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("The quick brown fox jumps over the lazy dog", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Gym class featuring a brown fox and lazy dog", metadata.get(TikaCoreProperties.DESCRIPTION));
assertEquals("Gym class featuring a brown fox and lazy dog", metadata.get(Metadata.SUBJECT));
assertEquals("Nevin Nollop", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("Pangram, fox, dog", metadata.get(TikaCoreProperties.KEYWORDS));
assertEquals("Lorem ipsum", metadata.get(TikaCoreProperties.COMMENTS).substring(0, 11));
assertEquals("http://www.alfresco.com", metadata.get(TikaCoreProperties.RELATION));
// Check some of the old style metadata too
assertEquals("The quick brown fox jumps over the lazy dog", metadata.get(Metadata.TITLE));
assertEquals("Gym class featuring a brown fox and lazy dog", metadata.get(Metadata.SUBJECT));
String content = handler.toString();
assertContains("The quick brown fox jumps over the lazy dog", content);
assertContains("Gym class", content);
assertContains("www.alfresco.com", content);
} finally {
input.close();
}
}
use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class DWGParserTest method testDWG2010CustomPropertiesParser.
@Test
public void testDWG2010CustomPropertiesParser() throws Exception {
// Check that standard parsing works
InputStream testInput = DWGParserTest.class.getResourceAsStream("/test-documents/testDWG2010_custom_props.dwg");
testParser(testInput);
// Check that custom properties with alternate padding work
try (InputStream input = DWGParserTest.class.getResourceAsStream("/test-documents/testDWG2010_custom_props.dwg")) {
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
new DWGParser().parse(input, handler, metadata, null);
assertEquals("valueforcustomprop1", metadata.get("customprop1"));
assertEquals("valueforcustomprop2", metadata.get("customprop2"));
}
}
use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class HtmlParserTest method testDetectOfCharset.
/**
* Test case for TIKA-334
*
* @see <a href="https://issues.apache.org/jira/browse/TIKA-334">TIKA-334</a>
*/
@Test
public void testDetectOfCharset() throws Exception {
String test = "<html><head><title>Ž</title></head><body></body></html>";
Metadata metadata = new Metadata();
new HtmlParser().parse(new ByteArrayInputStream(test.getBytes(UTF_8)), new BodyContentHandler(), metadata, new ParseContext());
assertEquals("Ž", metadata.get(TikaCoreProperties.TITLE));
}
use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class HtmlParserTest method testParseEmpty.
@Test
public void testParseEmpty() throws Exception {
ContentHandler handler = new BodyContentHandler();
new HtmlParser().parse(new ByteArrayInputStream(new byte[0]), handler, new Metadata(), new ParseContext());
assertEquals("", handler.toString());
}
Aggregations