use of org.apache.tika.detect.DefaultProbDetector in project ddf by codice.
the class OperationsMetacardSupport method guessMimeType.
// package-private for unit testing
String guessMimeType(String mimeTypeRaw, String fileName, Path tmpContentPath) throws IOException {
if (ContentItem.DEFAULT_MIME_TYPE.equals(mimeTypeRaw)) {
try (InputStream inputStreamMessageCopy = com.google.common.io.Files.asByteSource(tmpContentPath.toFile()).openStream()) {
String mimeTypeGuess = frameworkProperties.getMimeTypeMapper().guessMimeType(inputStreamMessageCopy, FilenameUtils.getExtension(fileName));
if (StringUtils.isNotEmpty(mimeTypeGuess)) {
mimeTypeRaw = mimeTypeGuess;
}
} catch (MimeTypeResolutionException e) {
LOGGER.debug(MIME_TYPE_MSG, e);
}
if (ContentItem.DEFAULT_MIME_TYPE.equals(mimeTypeRaw)) {
Detector detector = new DefaultProbDetector();
try (InputStream inputStreamMessageCopy = TikaInputStream.get(tmpContentPath)) {
MediaType mediaType = detector.detect(inputStreamMessageCopy, new Metadata());
mimeTypeRaw = mediaType.toString();
} catch (IOException e) {
LOGGER.debug(MIME_TYPE_MSG, e);
}
}
if (mimeTypeRaw.equals("text/plain")) {
try (InputStream inputStreamMessageCopy = com.google.common.io.Files.asByteSource(tmpContentPath.toFile()).openStream();
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStreamMessageCopy, Charset.forName("UTF-8")))) {
String line = bufferedReader.lines().map(String::trim).filter(StringUtils::isNotEmpty).findFirst().orElse("");
if (line.startsWith("<")) {
mimeTypeRaw = "text/xml";
} else if (line.startsWith("{") || line.startsWith("[")) {
mimeTypeRaw = "application/json";
}
} catch (IOException e) {
LOGGER.debug(MIME_TYPE_MSG, e);
}
}
}
return mimeTypeRaw;
}
use of org.apache.tika.detect.DefaultProbDetector in project tika by apache.
the class ProbabilisticMimeDetectionTestWithTika method setUp.
/** @inheritDoc */
@Before
public void setUp() {
MimeTypes types = MimeTypes.getDefaultMimeTypes();
ServiceLoader loader = new ServiceLoader();
registry = types.getMediaTypeRegistry();
/*
* here is an example with the use of the builder to
* instantiate the object.
*/
Builder builder = new ProbabilisticMimeDetectionSelector.Builder();
proSelector = new ProbabilisticMimeDetectionSelector(types, builder.priorMagicFileType(0.5f).priorExtensionFileType(0.5f).priorMetaFileType(0.5f));
DefaultProbDetector detector = new DefaultProbDetector(proSelector, loader);
// Use a default Tika, except for our different detector
tika = new Tika(detector);
}
Aggregations