use of org.apache.tika.io.TikaInputStream in project tika by apache.
the class TestContainerAwareDetector method testOpenContainer.
@Test
public void testOpenContainer() throws Exception {
try (TikaInputStream stream = TikaInputStream.get(TestContainerAwareDetector.class.getResource("/test-documents/testPPT.ppt"))) {
assertNull(stream.getOpenContainer());
assertEquals(MediaType.parse("application/vnd.ms-powerpoint"), detector.detect(stream, new Metadata()));
assertTrue(stream.getOpenContainer() instanceof NPOIFSFileSystem);
}
}
use of org.apache.tika.io.TikaInputStream in project tika by apache.
the class TestContainerAwareDetector method getTruncatedFile.
private TikaInputStream getTruncatedFile(String name, int n) throws IOException {
try (InputStream input = TestContainerAwareDetector.class.getResourceAsStream("/test-documents/" + name)) {
byte[] bytes = new byte[n];
int m = 0;
while (m < bytes.length) {
int i = input.read(bytes, m, bytes.length - m);
if (i != -1) {
m += i;
} else {
throw new IOException("Unexpected end of stream");
}
}
return TikaInputStream.get(bytes);
}
}
use of org.apache.tika.io.TikaInputStream in project tika by apache.
the class TestContainerAwareDetector method assertRemovalTempfiles.
private void assertRemovalTempfiles(String fileName) throws Exception {
int numberOfTempFiles = countTemporaryFiles();
try (TikaInputStream stream = TikaInputStream.get(TestContainerAwareDetector.class.getResource("/test-documents/" + fileName))) {
detector.detect(stream, new Metadata());
}
assertEquals(numberOfTempFiles, countTemporaryFiles());
}
use of org.apache.tika.io.TikaInputStream in project tika by apache.
the class RFC822ParserTest method testGetAttachmentsAsEmbeddedResources.
/**
* TIKA-1222 When requested, ensure that the various attachments of
* the mail come through properly as embedded resources
*/
@Test
public void testGetAttachmentsAsEmbeddedResources() throws Exception {
TrackingHandler tracker = new TrackingHandler();
ContainerExtractor ex = new ParserContainerExtractor();
try (TikaInputStream tis = TikaInputStream.get(getStream("test-documents/testRFC822-multipart"))) {
assertEquals(true, ex.isSupported(tis));
ex.extract(tis, ex, tracker);
}
// Check we found all 3 parts
assertEquals(3, tracker.filenames.size());
assertEquals(3, tracker.mediaTypes.size());
// No filenames available
assertEquals(null, tracker.filenames.get(0));
assertEquals(null, tracker.filenames.get(1));
assertEquals(null, tracker.filenames.get(2));
// Types are available
assertEquals(MediaType.TEXT_PLAIN, tracker.mediaTypes.get(0));
assertEquals(MediaType.TEXT_HTML, tracker.mediaTypes.get(1));
assertEquals(MediaType.image("gif"), tracker.mediaTypes.get(2));
}
use of org.apache.tika.io.TikaInputStream in project tika by apache.
the class SQLite3ParserTest method testParserContainerExtractor.
@Test
public void testParserContainerExtractor() throws Exception {
//There should be 6 embedded documents:
//2x tables -- UTF-8 csv representations of the tables
//2x word files, one doc and one docx
//2x png files, the same image embedded in each of the doc and docx
ParserContainerExtractor ex = new ParserContainerExtractor();
ByteCopyingHandler byteCopier = new ByteCopyingHandler();
Metadata metadata = new Metadata();
try (TikaInputStream is = TikaInputStream.get(getResourceAsStream(TEST_FILE1))) {
metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
ex.extract(is, ex, byteCopier);
}
assertEquals(4, byteCopier.bytes.size());
String[] strings = new String[4];
for (int i = 1; i < byteCopier.bytes.size(); i++) {
byte[] byteArr = byteCopier.bytes.get(i);
String s = new String(byteArr, 0, Math.min(byteArr.length, 1000), UTF_8);
strings[i] = s;
}
byte[] oleBytes = new byte[] { (byte) -48, (byte) -49, (byte) 17, (byte) -32, (byte) -95, (byte) -79, (byte) 26, (byte) -31, (byte) 0, (byte) 0 };
//test OLE
for (int i = 0; i < 10; i++) {
assertEquals(oleBytes[i], byteCopier.bytes.get(0)[i]);
}
assertContains("PNG", strings[1]);
assertContains("PK", strings[2]);
assertContains("PNG", strings[3]);
}
Aggregations