use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class Mp3ParserTest method testMp3ParsingLyrics.
/**
* Tests that a file with both lyrics and
* ID3v2 tags gets both extracted correctly
*/
@Test
public void testMp3ParsingLyrics() throws Exception {
// Should auto-detect!
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
try (InputStream stream = Mp3ParserTest.class.getResourceAsStream("/test-documents/testMP3lyrics.mp3")) {
parser.parse(stream, handler, metadata, new ParseContext());
}
assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertContains("Test Title", content);
assertContains("Test Artist", content);
assertContains("Test Album", content);
assertContains("2008", content);
assertContains("Test Comment", content);
assertContains("Rock", content);
assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
assertEquals("44100", metadata.get("samplerate"));
assertEquals("2", metadata.get("channels"));
checkDuration(metadata, 1);
}
use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class ODFParserTest method testOO2Metadata.
/**
* Similar to {@link #testOO2()}, but using a different
* OO2 file with different metadata in it
*/
@Test
public void testOO2Metadata() throws Exception {
try (InputStream input = ODFParserTest.class.getResourceAsStream("/test-documents/testOpenOffice2.odf")) {
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
new OpenDocumentParser().parse(input, handler, metadata);
assertEquals("application/vnd.oasis.opendocument.formula", metadata.get(Metadata.CONTENT_TYPE));
assertEquals(null, metadata.get(TikaCoreProperties.MODIFIED));
assertEquals("2006-01-27T11:55:22", metadata.get(Metadata.CREATION_DATE));
assertEquals("The quick brown fox jumps over the lazy dog", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Gym class featuring a brown fox and lazy dog", metadata.get(TikaCoreProperties.DESCRIPTION));
assertEquals("Gym class featuring a brown fox and lazy dog", metadata.get(OfficeOpenXMLCore.SUBJECT));
assertEquals("Gym class featuring a brown fox and lazy dog", metadata.get(Metadata.SUBJECT));
assertEquals("PT0S", metadata.get(Metadata.EDIT_TIME));
assertEquals("1", metadata.get("editing-cycles"));
assertEquals("OpenOffice.org/2.2$Win32 OpenOffice.org_project/680m14$Build-9134", metadata.get("generator"));
assertEquals("Pangram, fox, dog", metadata.get(Metadata.KEYWORDS));
// User defined metadata
assertEquals("Text 1", metadata.get("custom:Info 1"));
assertEquals("2", metadata.get("custom:Info 2"));
assertEquals("false", metadata.get("custom:Info 3"));
assertEquals("true", metadata.get("custom:Info 4"));
// No statistics present
assertEquals(null, metadata.get(Metadata.PAGE_COUNT));
assertEquals(null, metadata.get(Metadata.PARAGRAPH_COUNT));
assertEquals(null, metadata.get(Metadata.WORD_COUNT));
assertEquals(null, metadata.get(Metadata.CHARACTER_COUNT));
assertEquals(null, metadata.get(Metadata.TABLE_COUNT));
assertEquals(null, metadata.get(Metadata.OBJECT_COUNT));
assertEquals(null, metadata.get(Metadata.IMAGE_COUNT));
assertEquals(null, metadata.get("nbTab"));
assertEquals(null, metadata.get("nbObject"));
assertEquals(null, metadata.get("nbImg"));
assertEquals(null, metadata.get("nbPage"));
assertEquals(null, metadata.get("nbPara"));
assertEquals(null, metadata.get("nbWord"));
assertEquals(null, metadata.get("nbCharacter"));
// Note - contents of maths files not currently supported
String content = handler.toString().trim();
assertEquals("", content);
}
}
use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class ODFParserTest method testNullStylesInODTFooter.
//TIKA-1600: Test that null pointer doesn't break parsing.
@Test
public void testNullStylesInODTFooter() throws Exception {
Parser parser = new OpenDocumentParser();
try (InputStream input = ODFParserTest.class.getResourceAsStream("/test-documents/testODT-TIKA-6000.odt")) {
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
parser.parse(input, handler, metadata, getNonRecursingParseContext());
assertEquals("application/vnd.oasis.opendocument.text", metadata.get(Metadata.CONTENT_TYPE));
String content = handler.toString();
assertContains("Utilisation de ce document", content);
assertContains("Copyright and License", content);
assertContains("Changer la langue", content);
assertContains("La page d’accueil permet de faire une recherche simple", content);
}
}
use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class ODFParserTest method testODTFooter.
@Test
public void testODTFooter() throws Exception {
try (InputStream input = ODFParserTest.class.getResourceAsStream("/test-documents/testFooter.odt")) {
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
new AutoDetectParser().parse(input, handler, metadata);
String content = handler.toString();
assertContains("Here is some text...", content);
assertContains("Here is some text on page 2", content);
assertContains("Here is footer text", content);
}
}
use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class ODFParserTest method testNPEFromFile.
@Test
public void testNPEFromFile() throws Exception {
OpenDocumentParser parser = new OpenDocumentParser();
try (TikaInputStream tis = TikaInputStream.get(this.getClass().getResource("/test-documents/testNPEOpenDocument.odt"))) {
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
parser.parse(tis, handler, metadata, new ParseContext());
assertEquals("application/vnd.oasis.opendocument.text", metadata.get(Metadata.CONTENT_TYPE));
String content = handler.toString();
assertContains("primero hay que generar un par de claves", content);
}
}
Aggregations