use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class Mp3ParserTest method testTIKA474.
/**
* This tests that we can handle without errors (but perhaps not
* all content) a file with a very very large ID3 frame that
* has been truncated before the end of the ID3 tags.
* In this case, it is a file with JPEG data in the ID3, which
* is trunacted before the end of the JPEG bit of the ID3 frame.
*/
@Test
public void testTIKA474() throws Exception {
// Should auto-detect!
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
try (InputStream stream = Mp3ParserTest.class.getResourceAsStream("/test-documents/testMP3truncated.mp3")) {
parser.parse(stream, handler, metadata, new ParseContext());
}
// Check we could get the headers from the start
assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Girl you have no faith in medicine", metadata.get(TikaCoreProperties.TITLE));
assertEquals("The White Stripes", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("The White Stripes", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertContains("Girl you have no faith in medicine", content);
assertContains("The White Stripes", content);
assertContains("Elephant", content);
assertContains("2003", content);
// File lacks any audio frames, so we can't know these
assertEquals(null, metadata.get("version"));
assertEquals(null, metadata.get("samplerate"));
assertEquals(null, metadata.get("channels"));
}
use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class Mp3ParserTest method testMp3ParsingID3v1v2.
/**
* Test that with both id3v2 and id3v1, we prefer the
* details from id3v2
*/
@Test
public void testMp3ParsingID3v1v2() throws Exception {
// Should auto-detect!
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
try (InputStream stream = Mp3ParserTest.class.getResourceAsStream("/test-documents/testMP3id3v1_v2.mp3")) {
parser.parse(stream, handler, metadata, new ParseContext());
}
assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertContains("Test Title", content);
assertContains("Test Artist", content);
assertContains("Test Album", content);
assertContains("2008", content);
assertContains("Test Comment", content);
assertContains("Rock", content);
assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
assertEquals("44100", metadata.get("samplerate"));
assertEquals("1", metadata.get("channels"));
checkDuration(metadata, 2);
}
use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class Mp3ParserTest method testTIKA1589_noId3ReturnsDurationCorrectly.
@Test
public void testTIKA1589_noId3ReturnsDurationCorrectly() throws Exception {
// Should auto-detect!
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
try (InputStream stream = Mp3ParserTest.class.getResourceAsStream("/test-documents/testMP3noid3.mp3")) {
parser.parse(stream, handler, metadata, new ParseContext());
}
assertEquals("2455.510986328125", metadata.get(XMPDM.DURATION));
}
use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class Mp3ParserTest method testMp3ParsingID3v1.
/**
* Test that with only ID3v1 tags, we get some information out
*/
@Test
public void testMp3ParsingID3v1() throws Exception {
// Should auto-detect!
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
try (InputStream stream = Mp3ParserTest.class.getResourceAsStream("/test-documents/testMP3id3v1.mp3")) {
parser.parse(stream, handler, metadata, new ParseContext());
}
assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertContains("Test Title", content);
assertContains("Test Artist", content);
assertContains("Test Album", content);
assertContains("2008", content);
assertContains("Test Comment", content);
assertContains("Rock", content);
assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
assertEquals("44100", metadata.get("samplerate"));
assertEquals("1", metadata.get("channels"));
checkDuration(metadata, 2);
}
use of org.apache.tika.sax.BodyContentHandler in project tika by apache.
the class Mp3ParserTest method testTIKA424.
/**
* This test will do nothing, unless you've downloaded the
* mp3 file from TIKA-424 - the file cannot be
* distributed with Tika.
* This test will check for the complicated set of ID3v2.4
* tags.
*/
@Test
public void testTIKA424() throws Exception {
// Should auto-detect!
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
try (InputStream stream = Mp3ParserTest.class.getResourceAsStream("/test-documents/test2.mp3")) {
if (stream == null) {
// Skip the test
return;
}
parser.parse(stream, handler, metadata, new ParseContext());
}
assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Plus loin vers l'ouest", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Merzhin", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("Merzhin", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertContains("Plus loin vers l'ouest", content);
assertEquals("MPEG 3 Layer III Version 1", metadata.get("version"));
assertEquals("44100", metadata.get("samplerate"));
assertEquals("2", metadata.get("channels"));
}
Aggregations