use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class ODFParserTest method testOO3.
@Test
public void testOO3() throws Exception {
for (Parser parser : getParsers()) {
try (InputStream input = ODFParserTest.class.getResourceAsStream("/test-documents/testODFwithOOo3.odt")) {
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
parser.parse(input, handler, metadata, new ParseContext());
assertEquals("application/vnd.oasis.opendocument.text", metadata.get(Metadata.CONTENT_TYPE));
String content = handler.toString();
assertContains("Tika is part of the Lucene project.", content);
assertContains("Solr", content);
assertContains("one embedded", content);
assertContains("Rectangle Title", content);
assertContains("a blue background and dark border", content);
}
}
}
use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class ArParserTest method testEmbedded.
/**
* Tests that the ParseContext parser is correctly fired for all the
* embedded entries.
*/
@Test
public void testEmbedded() throws Exception {
// Should auto-detect!
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
try (InputStream stream = ArParserTest.class.getResourceAsStream("/test-documents/testARofText.ar")) {
parser.parse(stream, handler, metadata, trackingContext);
}
assertEquals(1, tracker.filenames.size());
assertEquals(1, tracker.mediatypes.size());
assertEquals(1, tracker.modifiedAts.size());
assertEquals("testTXT.txt", tracker.filenames.get(0));
String modifiedAt = tracker.modifiedAts.get(0);
assertTrue("Modified at " + modifiedAt, modifiedAt.startsWith("201"));
for (String type : tracker.mediatypes) {
assertNull(type);
}
for (String crt : tracker.createdAts) {
assertNull(crt);
}
tracker.reset();
try (InputStream stream = ArParserTest.class.getResourceAsStream("/test-documents/testARofSND.ar")) {
parser.parse(stream, handler, metadata, trackingContext);
}
assertEquals(1, tracker.filenames.size());
assertEquals(1, tracker.mediatypes.size());
assertEquals(1, tracker.modifiedAts.size());
assertEquals("testAU.au", tracker.filenames.get(0));
modifiedAt = tracker.modifiedAts.get(0);
assertTrue("Modified at " + modifiedAt, modifiedAt.startsWith("201"));
for (String type : tracker.mediatypes) {
assertNull(type);
}
for (String crt : tracker.createdAts) {
assertNull(crt);
}
}
use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class Bzip2ParserTest method testEmbedded.
/**
* Tests that the ParseContext parser is correctly
* fired for all the embedded entries.
*/
@Test
public void testEmbedded() throws Exception {
// Should auto-detect!
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
try (InputStream stream = ZipParserTest.class.getResourceAsStream("/test-documents/test-documents.tbz2")) {
parser.parse(stream, handler, metadata, trackingContext);
}
// Should find a single entry, for the (compressed) tar file
assertEquals(1, tracker.filenames.size());
assertEquals(1, tracker.mediatypes.size());
assertEquals(1, tracker.modifiedAts.size());
assertEquals(null, tracker.filenames.get(0));
assertEquals(null, tracker.mediatypes.get(0));
assertEquals(null, tracker.createdAts.get(0));
assertEquals(null, tracker.modifiedAts.get(0));
// Tar file starts with the directory name
assertEquals("test-documents/", new String(tracker.lastSeenStart, 0, 15, US_ASCII));
}
use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class Bzip2ParserTest method testBzip2Parsing.
@Test
public void testBzip2Parsing() throws Exception {
// Should auto-detect!
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
try (InputStream stream = Bzip2ParserTest.class.getResourceAsStream("/test-documents/test-documents.tbz2")) {
parser.parse(stream, handler, metadata, recursingContext);
}
assertEquals("application/x-bzip2", metadata.get(Metadata.CONTENT_TYPE));
String content = handler.toString();
assertContains("test-documents/testEXCEL.xls", content);
assertContains("Sample Excel Worksheet", content);
assertContains("test-documents/testHTML.html", content);
assertContains("Test Indexation Html", content);
assertContains("test-documents/testOpenOffice2.odt", content);
assertContains("This is a sample Open Office document", content);
assertContains("test-documents/testPDF.pdf", content);
assertContains("Apache Tika", content);
assertContains("test-documents/testPPT.ppt", content);
assertContains("Sample Powerpoint Slide", content);
assertContains("test-documents/testRTF.rtf", content);
assertContains("indexation Word", content);
assertContains("test-documents/testTXT.txt", content);
assertContains("Test d'indexation de Txt", content);
assertContains("test-documents/testWORD.doc", content);
assertContains("This is a sample Microsoft Word Document", content);
assertContains("test-documents/testXML.xml", content);
assertContains("Rida Benjelloun", content);
}
use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class CompressParserTest method testEmbedded.
/**
* Tests that the ParseContext parser is correctly
* fired for all the embedded entries.
*/
@Test
public void testEmbedded() throws Exception {
// Should auto-detect!
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
InputStream stream = ZipParserTest.class.getResourceAsStream("/test-documents/test-documents.tar.Z");
try {
parser.parse(stream, handler, metadata, trackingContext);
} finally {
stream.close();
}
// Should find a single entry, for the (compressed) tar file
assertEquals(1, tracker.filenames.size());
assertEquals(1, tracker.mediatypes.size());
assertEquals(1, tracker.modifiedAts.size());
assertEquals(null, tracker.filenames.get(0));
assertEquals(null, tracker.mediatypes.get(0));
assertEquals(null, tracker.modifiedAts.get(0));
// Tar file starts with the directory name
assertEquals("test-documents/", new String(tracker.lastSeenStart, 0, 15, US_ASCII));
}
Aggregations