use of org.apache.tika.Tika in project tika by apache.
the class AudioParserTest method testAIFF.
@Test
public void testAIFF() throws Exception {
String path = "/test-documents/testAIFF.aif";
Metadata metadata = new Metadata();
String content = new Tika().parseToString(AudioParserTest.class.getResourceAsStream(path), metadata);
assertEquals("audio/x-aiff", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("44100.0", metadata.get("samplerate"));
assertEquals("2", metadata.get("channels"));
assertEquals("16", metadata.get("bits"));
assertEquals("PCM_SIGNED", metadata.get("encoding"));
assertEquals("", content);
}
use of org.apache.tika.Tika in project tika by apache.
the class ObjectRecognitionParserTest method jpegTesorflowTest.
@Ignore("If tensorflow not available Ignore")
@Test
public void jpegTesorflowTest() throws IOException, TikaException, SAXException {
try (InputStream stream = loader.getResourceAsStream(CONFIG_FILE)) {
assert stream != null;
Tika tika = new Tika(new TikaConfig(stream));
Metadata metadata = new Metadata();
try (InputStream imageStream = loader.getResourceAsStream(CAT_IMAGE)) {
Reader reader = tika.parse(imageStream, metadata);
List<String> lines = IOUtils.readLines(reader);
String text = StringUtils.join(lines, " ");
String[] expectedObjects = { "Egyptian cat", "tabby, tabby cat" };
String metaValues = StringUtils.join(metadata.getValues(ObjectRecognitionParser.MD_KEY), " ");
for (String expectedObject : expectedObjects) {
String message = "'" + expectedObject + "' must have been detected";
Assert.assertTrue(message, text.contains(expectedObject));
Assert.assertTrue(message, metaValues.contains(expectedObject));
}
}
}
}
use of org.apache.tika.Tika in project tika by apache.
the class ObjectRecognitionParserTest method testREST.
@Ignore("Configure Rest API service")
@Test
public void testREST() throws Exception {
try (InputStream stream = loader.getResourceAsStream(CONFIG_REST_FILE)) {
assert stream != null;
Tika tika = new Tika(new TikaConfig(stream));
Metadata metadata = new Metadata();
try (InputStream imageStream = loader.getResourceAsStream(CAT_IMAGE)) {
Reader reader = tika.parse(imageStream, metadata);
String text = IOUtils.toString(reader);
String[] expectedObjects = { "Egyptian cat", "tabby, tabby cat" };
String metaValues = StringUtils.join(metadata.getValues(ObjectRecognitionParser.MD_KEY), " ");
for (String expectedObject : expectedObjects) {
String message = "'" + expectedObject + "' must have been detected";
Assert.assertTrue(message, text.contains(expectedObject));
Assert.assertTrue(message, metaValues.contains(expectedObject));
}
}
}
}
use of org.apache.tika.Tika in project tika by apache.
the class FLVParserTest method testFLV.
@Test
public void testFLV() throws Exception {
String path = "/test-documents/testFLV.flv";
Metadata metadata = new Metadata();
String content = new Tika().parseToString(FLVParserTest.class.getResourceAsStream(path), metadata);
assertEquals("", content);
assertEquals("video/x-flv", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("true", metadata.get("hasVideo"));
assertEquals("false", metadata.get("stereo"));
assertEquals("true", metadata.get("hasAudio"));
assertEquals("120.0", metadata.get("height"));
assertEquals("16.0", metadata.get("audiosamplesize"));
}
use of org.apache.tika.Tika in project ddf by codice.
the class URLResourceReader method getMimeType.
private String getMimeType(URI resourceURI, String productName) throws MimeTypeResolutionException, IOException {
// Determine the mime type in a hierarchical fashion. The hierarchy is based on the
// most accurate mime type resolution being used and lesser accurate approaches being
// used
// if a mime type is not resolved.
// The approaches, in order, are:
// 1. Try using the DDF MimeTypeMapper so that custom MimeTypeResolvers are used
// 2. Try using Apache Tika directly on the URL
String mimeType = null;
if (mimeTypeMapper == null) {
LOGGER.debug("mimeTypeMapper is NULL");
} else {
// Extract the file extension (if any) from the URL's filename
String fileExtension = FilenameUtils.getExtension(productName);
mimeType = mimeTypeMapper.getMimeTypeForFileExtension(fileExtension);
}
// mime type resolution than just file extension mime type mapping
if ((mimeType == null || mimeType.isEmpty() || mimeType.equals(DEFAULT_MIME_TYPE)) && URL_FILE_SCHEME.equalsIgnoreCase(resourceURI.getScheme())) {
// Use Apache Tika to detect mime type from URL
Tika tika = new Tika();
mimeType = tika.detect(resourceURI.toURL());
LOGGER.debug("Tika determined mimeType for url = {}", mimeType);
} else {
LOGGER.debug("mimeType = {} set by MimeTypeMapper", mimeType);
}
// never be returned.
if (mimeType == null || mimeType.equals("content/unknown")) {
mimeType = "application/unknown";
}
LOGGER.debug("mimeType set to: {}", mimeType);
return mimeType;
}
Aggregations