use of org.apache.tika.mime.MimeType in project ddf by codice.
the class TikaMimeTypeResolver method getFileExtensionForMimeType.
@Override
public String getFileExtensionForMimeType(String contentType) {
LOGGER.trace("ENTERING: getFileExtensionForMimeType()");
MimeTypes mimeTypes = config.getMimeRepository();
String extension = null;
if (StringUtils.isNotEmpty(contentType)) {
try {
MimeType mimeType = mimeTypes.forName(contentType);
extension = mimeType.getExtension();
} catch (Exception e) {
LOGGER.debug("Exception caught getting file extension for mime type {}", contentType, e);
}
}
LOGGER.debug("mimeType = {}, file extension = [{}]", contentType, extension);
LOGGER.trace("EXITING: getFileExtensionForMimeType()");
return extension;
}
use of org.apache.tika.mime.MimeType in project Xponents by OpenSextant.
the class HyperLink method setMIMEType.
/**
* Set the MIME type of a found link, i.e., once you'ved downloaded the content you then know the ContentType
* possibly.
* Which may differ from your perception of the URL path
*
* - reset the file extension,
* - reset the path
* - folder vs. file
*
* Set the MIME Type, file type, path, etc... prior to saving content to disk.
*
* @param t
* the new MIME type
*/
public void setMIMEType(String t) {
mimeType = t;
if (mimeType == null) {
return;
}
try {
MimeType mt;
/* Isolate the MIME type without parameters.
*
*/
mt = defaultMIME.forName(t.split(";", 2)[0]);
if (mt != null) {
fixPathExtension(mt.getExtension());
}
} catch (MimeTypeException ignore) {
// Hmm.
}
}
use of org.apache.tika.mime.MimeType in project tika by apache.
the class AbstractPOIFSExtractor method handleEmbeddedOfficeDoc.
/**
* Handle an office document that's embedded at the POIFS level
*/
protected void handleEmbeddedOfficeDoc(DirectoryEntry dir, String resourceName, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
if (dir.hasEntry("Package")) {
// It's OOXML (has a ZipFile):
Entry ooxml = dir.getEntry("Package");
try (TikaInputStream stream = TikaInputStream.get(new DocumentInputStream((DocumentEntry) ooxml))) {
ZipContainerDetector detector = new ZipContainerDetector();
MediaType type = null;
try {
//if there's a stream error while detecting...
type = detector.detect(stream, new Metadata());
} catch (Exception e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
return;
}
handleEmbeddedResource(stream, null, dir.getName(), dir.getStorageClsid(), type.toString(), xhtml, true);
return;
}
}
// It's regular OLE2:
// What kind of document is it?
Metadata metadata = new Metadata();
metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, dir.getName());
if (dir.getStorageClsid() != null) {
metadata.set(Metadata.EMBEDDED_STORAGE_CLASS_ID, dir.getStorageClsid().toString());
}
POIFSDocumentType type = POIFSDocumentType.detectType(dir);
TikaInputStream embedded = null;
String rName = (resourceName == null) ? dir.getName() : resourceName;
try {
if (type == POIFSDocumentType.OLE10_NATIVE) {
try {
// Try to un-wrap the OLE10Native record:
Ole10Native ole = Ole10Native.createFromEmbeddedOleObject((DirectoryNode) dir);
if (ole.getLabel() != null) {
metadata.set(Metadata.RESOURCE_NAME_KEY, rName + '/' + ole.getLabel());
}
if (ole.getCommand() != null) {
metadata.add(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, ole.getCommand());
}
if (ole.getFileName() != null) {
metadata.add(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, ole.getFileName());
}
byte[] data = ole.getDataBuffer();
embedded = TikaInputStream.get(data);
} catch (Ole10NativeException ex) {
// Not a valid OLE10Native record, skip it
} catch (Exception e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
return;
}
} else if (type == POIFSDocumentType.COMP_OBJ) {
try {
//TODO: figure out if the equivalent of OLE 1.0's
//getCommand() and getFileName() exist for OLE 2.0 to populate
//TikaCoreProperties.ORIGINAL_RESOURCE_NAME
// Grab the contents and process
DocumentEntry contentsEntry;
try {
contentsEntry = (DocumentEntry) dir.getEntry("CONTENTS");
} catch (FileNotFoundException ioe) {
contentsEntry = (DocumentEntry) dir.getEntry("Contents");
}
DocumentInputStream inp = new DocumentInputStream(contentsEntry);
byte[] contents = new byte[contentsEntry.getSize()];
inp.readFully(contents);
embedded = TikaInputStream.get(contents);
// Try to work out what it is
MediaType mediaType = getDetector().detect(embedded, new Metadata());
String extension = type.getExtension();
try {
MimeType mimeType = getMimeTypes().forName(mediaType.toString());
extension = mimeType.getExtension();
} catch (MimeTypeException mte) {
// No details on this type are known
}
// Record what we can do about it
metadata.set(Metadata.CONTENT_TYPE, mediaType.getType().toString());
metadata.set(Metadata.RESOURCE_NAME_KEY, rName + extension);
} catch (Exception e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
return;
}
} else {
metadata.set(Metadata.CONTENT_TYPE, type.getType().toString());
metadata.set(Metadata.RESOURCE_NAME_KEY, rName + '.' + type.getExtension());
}
// Should we parse it?
if (embeddedDocumentUtil.shouldParseEmbedded(metadata)) {
if (embedded == null) {
// Make a TikaInputStream that just
// passes the root directory of the
// embedded document, and is otherwise
// empty (byte[0]):
embedded = TikaInputStream.get(new byte[0]);
embedded.setOpenContainer(dir);
}
embeddedDocumentUtil.parseEmbedded(embedded, xhtml, metadata, true);
}
} catch (IOException e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata);
} finally {
if (embedded != null) {
embedded.close();
}
}
}
use of org.apache.tika.mime.MimeType in project tika by apache.
the class EmbeddedDocumentUtil method getExtension.
public String getExtension(TikaInputStream is, Metadata metadata) {
String mimeString = metadata.get(Metadata.CONTENT_TYPE);
TikaConfig config = getConfig();
MimeType mimeType = null;
MimeTypes types = config.getMimeRepository();
boolean detected = false;
if (mimeString != null) {
try {
mimeType = types.forName(mimeString);
} catch (MimeTypeException e) {
//swallow
}
}
if (mimeType == null) {
Detector detector = config.getDetector();
try {
MediaType mediaType = detector.detect(is, metadata);
mimeType = types.forName(mediaType.toString());
detected = true;
is.reset();
} catch (IOException e) {
//swallow
} catch (MimeTypeException e) {
//swallow
}
}
if (mimeType != null) {
if (detected) {
//set or correct the mime type
metadata.set(Metadata.CONTENT_TYPE, mimeType.toString());
}
return mimeType.getExtension();
}
return ".bin";
}
use of org.apache.tika.mime.MimeType in project tika by apache.
the class TensorflowRESTVideoRecogniser method getApiUri.
@Override
protected URI getApiUri(Metadata metadata) {
TikaConfig config = TikaConfig.getDefaultConfig();
String ext = null;
//Find extension for video. It's required for OpenCv in InceptionAPI to decode video
try {
MimeType mimeType = config.getMimeRepository().forName(metadata.get("Content-Type"));
ext = mimeType.getExtension();
return UriBuilder.fromUri(apiUri).queryParam("ext", ext).build();
} catch (MimeTypeException e) {
LOG.error("Can't find extension from metadata");
return apiUri;
}
}
Aggregations