use of org.xml.sax.helpers.DefaultHandler in project tika by apache.
the class DcXMLParserTest method testXMLParserNonAsciiChars.
@Test
public void testXMLParserNonAsciiChars() throws Exception {
try (InputStream input = DcXMLParserTest.class.getResourceAsStream("/test-documents/testXML.xml")) {
Metadata metadata = new Metadata();
new DcXMLParser().parse(input, new DefaultHandler(), metadata);
final String expected = "Archimède et Lius à Châteauneuf testing chars en été";
assertEquals(expected, metadata.get(TikaCoreProperties.RIGHTS));
}
}
use of org.xml.sax.helpers.DefaultHandler in project tika by apache.
the class RFC822ParserTest method getDate.
private Date getDate(String dateString) throws Exception {
String mail = "From: dev@tika.apache.org\n" + "Date: " + dateString + "\n";
Parser p = new RFC822Parser();
Metadata m = new Metadata();
try (InputStream is = TikaInputStream.get(mail.getBytes(StandardCharsets.UTF_8))) {
p.parse(is, new DefaultHandler(), m, new ParseContext());
}
return m.getDate(TikaCoreProperties.CREATED);
}
use of org.xml.sax.helpers.DefaultHandler in project tika by apache.
the class HtmlParserTest method testParseAscii.
@Test
public void testParseAscii() throws Exception {
String path = "/test-documents/testHTML.html";
final StringWriter href = new StringWriter();
final StringWriter name = new StringWriter();
ContentHandler body = new BodyContentHandler();
Metadata metadata = new Metadata();
try (InputStream stream = HtmlParserTest.class.getResourceAsStream(path)) {
ContentHandler link = new DefaultHandler() {
@Override
public void startElement(String u, String l, String n, Attributes a) throws SAXException {
if ("a".equals(l)) {
if (a.getValue("href") != null) {
href.append(a.getValue("href"));
} else if (a.getValue("name") != null) {
name.append(a.getValue("name"));
}
}
}
};
new HtmlParser().parse(stream, new TeeContentHandler(body, link), metadata, new ParseContext());
}
assertEquals("Title : Test Indexation Html", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Tika Developers", metadata.get("Author"));
assertEquals("5", metadata.get("refresh"));
assertEquals("51.2312", metadata.get(Geographic.LATITUDE));
assertEquals("-5.1987", metadata.get(Geographic.LONGITUDE));
assertEquals("http://www.apache.org/", href.toString());
assertEquals("test-anchor", name.toString());
String content = body.toString();
assertTrue("Did not contain expected text:" + "Test Indexation Html", content.contains("Test Indexation Html"));
assertTrue("Did not contain expected text:" + "Indexation du fichier", content.contains("Indexation du fichier"));
}
use of org.xml.sax.helpers.DefaultHandler in project tika by apache.
the class BPGParserTest method testBPG_Geo.
/**
* Tests a file with geographic information in it
*/
@Test
public void testBPG_Geo() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/x-bpg");
InputStream stream = getClass().getResourceAsStream("/test-documents/testBPG_GEO.bpg");
parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
assertEquals("10", metadata.get(Metadata.BITS_PER_SAMPLE));
assertEquals("YCbCr Colour", metadata.get(Photoshop.COLOR_MODE));
// TODO Get the geographic data to be properly extracted, see TIKA-1495
if (false) {
assertEquals("12.54321", metadata.get(Metadata.LATITUDE));
assertEquals("-54.1234", metadata.get(Metadata.LONGITUDE));
}
// TODO Get the exif data to be properly extracted, see TIKA-1495
if (false) {
// 1/1600
assertEquals("6.25E-4", metadata.get(Metadata.EXPOSURE_TIME));
assertEquals("5.6", metadata.get(Metadata.F_NUMBER));
assertEquals("false", metadata.get(Metadata.FLASH_FIRED));
assertEquals("194.0", metadata.get(Metadata.FOCAL_LENGTH));
assertEquals("400", metadata.get(Metadata.ISO_SPEED_RATINGS));
assertEquals("Canon", metadata.get(Metadata.EQUIPMENT_MAKE));
assertEquals("Canon EOS 40D", metadata.get(Metadata.EQUIPMENT_MODEL));
assertEquals("Adobe Photoshop CS3 Macintosh", metadata.get(Metadata.SOFTWARE));
assertEquals("240.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
assertEquals("240.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
}
}
use of org.xml.sax.helpers.DefaultHandler in project tika by apache.
the class BPGParserTest method testBPG_Commented.
/**
* Tests a file with comments
*/
@Test
public void testBPG_Commented() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/x-bpg");
InputStream stream = getClass().getResourceAsStream("/test-documents/testBPG_commented.bpg");
parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
assertEquals("103", metadata.get(Metadata.IMAGE_WIDTH));
assertEquals("77", metadata.get(Metadata.IMAGE_LENGTH));
assertEquals("10", metadata.get(Metadata.BITS_PER_SAMPLE));
assertEquals("YCbCr Colour", metadata.get(Photoshop.COLOR_MODE));
// TODO Get the exif comment data to be properly extracted, see TIKA-1495
if (false) {
assertEquals("Tosteberga Ängar", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Bird site in north eastern Skåne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
List<String> keywords = Arrays.asList(metadata.getValues(Metadata.KEYWORDS));
assertTrue(keywords.contains("coast"));
assertTrue(keywords.contains("bird watching"));
assertEquals(keywords, Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS)));
}
// TODO Get the exif data to be properly extracted, see TIKA-1495
if (false) {
// 1/1000000
assertEquals("1.0E-6", metadata.get(Metadata.EXPOSURE_TIME));
assertEquals("2.8", metadata.get(Metadata.F_NUMBER));
assertEquals("4.6", metadata.get(Metadata.FOCAL_LENGTH));
assertEquals("114", metadata.get(Metadata.ISO_SPEED_RATINGS));
assertEquals(null, metadata.get(Metadata.EQUIPMENT_MAKE));
assertEquals(null, metadata.get(Metadata.EQUIPMENT_MODEL));
assertEquals(null, metadata.get(Metadata.SOFTWARE));
assertEquals("1", metadata.get(Metadata.ORIENTATION));
assertEquals("300.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
assertEquals("300.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
}
}
Aggregations