use of org.apache.tika.mime.MimeTypeException in project tika by apache.
the class TensorflowRESTVideoRecogniser method getApiUri.
@Override
protected URI getApiUri(Metadata metadata) {
TikaConfig config = TikaConfig.getDefaultConfig();
String ext = null;
//Find extension for video. It's required for OpenCv in InceptionAPI to decode video
try {
MimeType mimeType = config.getMimeRepository().forName(metadata.get("Content-Type"));
ext = mimeType.getExtension();
return UriBuilder.fromUri(apiUri).queryParam("ext", ext).build();
} catch (MimeTypeException e) {
LOG.error("Can't find extension from metadata");
return apiUri;
}
}
use of org.apache.tika.mime.MimeTypeException in project nutch by apache.
the class MimeUtil method autoResolveContentType.
/**
* A facade interface to trying all the possible mime type resolution
* strategies available within Tika. First, the mime type provided in
* <code>typeName</code> is cleaned, with {@link #cleanMimeType(String)}. Then
* the cleaned mime type is looked up in the underlying Tika {@link MimeTypes}
* registry, by its cleaned name. If the {@link MimeType} is found, then that
* mime type is used, otherwise URL resolution is used to try and determine
* the mime type. However, if <code>mime.type.magic</code> is enabled in
* {@link NutchConfiguration}, then mime type magic resolution is used to try
* and obtain a better-than-the-default approximation of the {@link MimeType}.
*
* @param typeName
* The original mime type, returned from a {@link ProtocolOutput}.
* @param url
* The given @see url, that Nutch was trying to crawl.
* @param data
* The byte data, returned from the crawl, if any.
* @return The correctly, automatically guessed {@link MimeType} name.
*/
public String autoResolveContentType(String typeName, String url, byte[] data) {
String retType = null;
MimeType type = null;
String cleanedMimeType = null;
cleanedMimeType = MimeUtil.cleanMimeType(typeName);
// first try to get the type from the cleaned type name
if (cleanedMimeType != null) {
try {
type = mimeTypes.forName(cleanedMimeType);
cleanedMimeType = type.getName();
} catch (MimeTypeException mte) {
// Seems to be a malformed mime type name...
cleanedMimeType = null;
}
}
// if returned null, or if it's the default type then try url resolution
if (type == null || (type != null && type.getName().equals(MimeTypes.OCTET_STREAM))) {
// mime-type, then guess a mime-type from the url pattern
try {
retType = tika.detect(url) != null ? tika.detect(url) : null;
} catch (Exception e) {
String message = "Problem loading default Tika configuration";
LOG.error(message, e);
throw new RuntimeException(e);
}
} else {
retType = type.getName();
}
// returned by the magic
if (this.mimeMagic) {
String magicType = null;
// pass URL (file name) and (cleansed) content type from protocol to Tika
Metadata tikaMeta = new Metadata();
tikaMeta.add(Metadata.RESOURCE_NAME_KEY, url);
tikaMeta.add(Metadata.CONTENT_TYPE, (cleanedMimeType != null ? cleanedMimeType : typeName));
try {
try (InputStream stream = TikaInputStream.get(data)) {
magicType = mimeTypes.detect(stream, tikaMeta).toString();
}
} catch (IOException ignore) {
}
if (magicType != null && !magicType.equals(MimeTypes.OCTET_STREAM) && !magicType.equals(MimeTypes.PLAIN_TEXT) && retType != null && !retType.equals(magicType)) {
// If magic enabled and the current mime type differs from that of the
// one returned from the magic, take the magic mimeType
retType = magicType;
}
// default type
if (retType == null) {
try {
retType = MimeTypes.OCTET_STREAM;
} catch (Exception ignore) {
}
}
}
return retType;
}
use of org.apache.tika.mime.MimeTypeException in project xwiki-platform by xwiki.
the class ImageFilter method getFileName.
private String getFileName(Attr source) throws MimeTypeException {
String value = source.getValue();
String fileName = null;
@SuppressWarnings("unchecked") Map<String, byte[]> embeddedImages = (Map<String, byte[]>) source.getOwnerDocument().getUserData(EMBEDDED_IMAGES);
if (embeddedImages != null && value.startsWith("data:")) {
// An image embedded using the Data URI scheme.
DataUri dataURI = DataUri.parse(value, Charset.forName(UTF_8));
fileName = dataURI.getFilename();
if (StringUtils.isEmpty(fileName)) {
fileName = String.valueOf(Math.abs(dataURI.hashCode()));
if (!StringUtils.isEmpty(dataURI.getMime())) {
String extension = MimeTypes.getDefaultMimeTypes().forName(dataURI.getMime()).getExtension();
fileName += extension;
}
}
embeddedImages.put(fileName, dataURI.getData());
} else if (!value.contains("://")) {
// A relative path.
int separator = value.lastIndexOf('/');
fileName = separator < 0 ? value : value.substring(separator + 1);
try {
// We have to decode the image file name in case it contains URL special characters.
fileName = URLDecoder.decode(fileName, UTF_8);
} catch (Exception e) {
// This shouldn't happen. Use the encoded image file name.
}
}
return fileName;
}
Aggregations