use of org.apache.tika.mime.MimeTypeException in project lucene-solr by apache.
the class ExtractingRequestHandler method inform.
@Override
public void inform(SolrCore core) {
if (initArgs != null) {
//if relative,then relative to config dir, otherwise, absolute path
String tikaConfigLoc = (String) initArgs.get(CONFIG_LOCATION);
if (tikaConfigLoc != null) {
File configFile = new File(tikaConfigLoc);
if (configFile.isAbsolute() == false) {
configFile = new File(core.getResourceLoader().getConfigDir(), configFile.getPath());
}
try {
config = new TikaConfig(configFile);
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
}
String parseContextConfigLoc = (String) initArgs.get(PARSE_CONTEXT_CONFIG);
if (parseContextConfigLoc != null) {
try {
parseContextConfig = new ParseContextConfig(core.getResourceLoader(), parseContextConfigLoc);
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
}
NamedList configDateFormats = (NamedList) initArgs.get(DATE_FORMATS);
if (configDateFormats != null && configDateFormats.size() > 0) {
dateFormats = new HashSet<>();
Iterator<Map.Entry> it = configDateFormats.iterator();
while (it.hasNext()) {
String format = (String) it.next().getValue();
log.info("Adding Date Format: " + format);
dateFormats.add(format);
}
}
}
if (config == null) {
try {
config = getDefaultConfig(core.getResourceLoader().getClassLoader());
} catch (MimeTypeException | IOException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
}
if (parseContextConfig == null) {
parseContextConfig = new ParseContextConfig();
}
factory = createFactory();
}
use of org.apache.tika.mime.MimeTypeException in project sling by apache.
the class TikaMimeTypeProvider method getExtension.
public String getExtension(String mimeType) {
try {
MimeType type = types.forName(mimeType);
String extension = type.getExtension();
if (extension != null && extension.length() > 1) {
// skip leading "."
return extension.substring(1);
}
} catch (MimeTypeException e) {
// ignore
}
// fall back
return null;
}
use of org.apache.tika.mime.MimeTypeException in project nifi by apache.
the class IdentifyMimeType method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = getLogger();
final AtomicReference<String> mimeTypeRef = new AtomicReference<>(null);
final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream stream) throws IOException {
try (final InputStream in = new BufferedInputStream(stream)) {
TikaInputStream tikaStream = TikaInputStream.get(in);
Metadata metadata = new Metadata();
if (filename != null && context.getProperty(USE_FILENAME_IN_DETECTION).asBoolean()) {
metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
}
// Get mime type
MediaType mediatype = detector.detect(tikaStream, metadata);
mimeTypeRef.set(mediatype.toString());
}
}
});
String mimeType = mimeTypeRef.get();
String extension = "";
try {
MimeType mimetype;
mimetype = config.getMimeRepository().forName(mimeType);
extension = mimetype.getExtension();
} catch (MimeTypeException ex) {
logger.warn("MIME type extension lookup failed: {}", new Object[] { ex });
}
// Workaround for bug in Tika - https://issues.apache.org/jira/browse/TIKA-1563
if (mimeType != null && mimeType.equals("application/gzip") && extension.equals(".tgz")) {
extension = ".gz";
}
if (mimeType == null) {
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/octet-stream");
flowFile = session.putAttribute(flowFile, "mime.extension", "");
logger.info("Unable to identify MIME Type for {}; setting to application/octet-stream", new Object[] { flowFile });
} else {
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), mimeType);
flowFile = session.putAttribute(flowFile, "mime.extension", extension);
logger.info("Identified {} as having MIME Type {}", new Object[] { flowFile, mimeType });
}
session.getProvenanceReporter().modifyAttributes(flowFile);
session.transfer(flowFile, REL_SUCCESS);
}
use of org.apache.tika.mime.MimeTypeException in project ddf by codice.
the class DownloadManager method run.
@Override
public void run() {
String mimeType = null;
try (ReadableByteChannel byteChannel = Channels.newChannel(url.openStream())) {
mimeType = url.openConnection().getContentType();
String fileExtension = allTypes.forName(mimeType).getExtension();
LOGGER.debug("downloading product from: {}", url.toString());
LOGGER.debug("mimetype is: {}", mimeType);
LOGGER.debug("File Extension is: {}", fileExtension);
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFileName + fileExtension)) {
fileOutputStream.getChannel().transferFrom(byteChannel, 0, Long.MAX_VALUE);
} catch (IOException e) {
LOGGER.info("Error opening stream for {}", outputFileName, e);
}
} catch (IOException e) {
LOGGER.info("Error downloading file from url: {}", url, e);
} catch (MimeTypeException e) {
LOGGER.info("Error determining file extension from mimetype: {}", mimeType, e);
}
}
use of org.apache.tika.mime.MimeTypeException in project tika by apache.
the class TikaCLI method compareFileMagic.
/**
* Compares our mime types registry with the File(1) tool's
* directory of (uncompiled) Magic entries.
* (Well, those with mimetypes anyway)
* @param magicDir Path to the magic directory
*/
private void compareFileMagic(String magicDir) throws Exception {
Set<String> tikaLacking = new TreeSet<String>();
Set<String> tikaNoMagic = new TreeSet<String>();
// Sanity check
File dir = new File(magicDir);
if ((new File(dir, "elf")).exists() && (new File(dir, "mime")).exists() && (new File(dir, "vorbis")).exists()) {
// Looks plausible
} else {
throw new IllegalArgumentException(magicDir + " doesn't seem to hold uncompressed file magic entries");
}
// Find all the mimetypes in the directory
Set<String> fileMimes = new HashSet<String>();
for (File mf : dir.listFiles()) {
if (mf.isFile()) {
BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(mf), UTF_8));
String line;
while ((line = r.readLine()) != null) {
if (line.startsWith("!:mime") || line.startsWith("#!:mime")) {
String mime = line.substring(7).trim();
fileMimes.add(mime);
}
}
r.close();
}
}
// See how those compare to the Tika ones
TikaConfig config = TikaConfig.getDefaultConfig();
MimeTypes mimeTypes = config.getMimeRepository();
MediaTypeRegistry registry = config.getMediaTypeRegistry();
for (String mime : fileMimes) {
try {
final MimeType type = mimeTypes.getRegisteredMimeType(mime);
if (type == null) {
// Tika doesn't know about this one
tikaLacking.add(mime);
} else {
// Tika knows about this one!
// Does Tika have magic for it?
boolean hasMagic = type.hasMagic();
// How about the children?
if (!hasMagic) {
for (MediaType child : registry.getChildTypes(type.getType())) {
MimeType childType = mimeTypes.getRegisteredMimeType(child.toString());
if (childType != null && childType.hasMagic()) {
hasMagic = true;
}
}
}
// How about the parents?
MimeType parentType = type;
while (parentType != null && !hasMagic) {
if (parentType.hasMagic()) {
// Has magic, fine
hasMagic = true;
} else {
// Check the parent next
MediaType parent = registry.getSupertype(type.getType());
if (parent == MediaType.APPLICATION_XML || parent == MediaType.TEXT_PLAIN || parent == MediaType.OCTET_STREAM) {
// Stop checking parents if we hit a top level type
parent = null;
}
if (parent != null) {
parentType = mimeTypes.getRegisteredMimeType(parent.toString());
} else {
parentType = null;
}
}
}
if (!hasMagic) {
tikaNoMagic.add(mime);
}
}
} catch (MimeTypeException e) {
// Broken entry in the file magic directory
// Silently skip
}
}
// Check how many tika knows about
int tikaTypes = 0;
int tikaAliases = 0;
for (MediaType type : registry.getTypes()) {
tikaTypes++;
tikaAliases += registry.getAliases(type).size();
}
// Report
System.out.println("Tika knows about " + tikaTypes + " unique mime types");
System.out.println("Tika knows about " + (tikaTypes + tikaAliases) + " mime types including aliases");
System.out.println("The File Magic directory knows about " + fileMimes.size() + " unique mime types");
System.out.println();
System.out.println("The following mime types are known to File but not Tika:");
for (String mime : tikaLacking) {
System.out.println(" " + mime);
}
System.out.println();
System.out.println("The following mime types from File have no Tika magic (but their children might):");
for (String mime : tikaNoMagic) {
System.out.println(" " + mime);
}
}
Aggregations