Search in sources :

Example 1 with MimeType

use of org.apache.tika.mime.MimeType in project sling by apache.

the class TikaMimeTypeProvider method getExtension.

public String getExtension(String mimeType) {
    try {
        MimeType type = types.forName(mimeType);
        String extension = type.getExtension();
        if (extension != null && extension.length() > 1) {
            // skip leading "."
            return extension.substring(1);
        }
    } catch (MimeTypeException e) {
    // ignore
    }
    // fall back
    return null;
}
Also used : MimeTypeException(org.apache.tika.mime.MimeTypeException) MimeType(org.apache.tika.mime.MimeType)

Example 2 with MimeType

use of org.apache.tika.mime.MimeType in project nifi by apache.

the class IdentifyMimeType method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final ComponentLog logger = getLogger();
    final AtomicReference<String> mimeTypeRef = new AtomicReference<>(null);
    final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream stream) throws IOException {
            try (final InputStream in = new BufferedInputStream(stream)) {
                TikaInputStream tikaStream = TikaInputStream.get(in);
                Metadata metadata = new Metadata();
                if (filename != null && context.getProperty(USE_FILENAME_IN_DETECTION).asBoolean()) {
                    metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
                }
                // Get mime type
                MediaType mediatype = detector.detect(tikaStream, metadata);
                mimeTypeRef.set(mediatype.toString());
            }
        }
    });
    String mimeType = mimeTypeRef.get();
    String extension = "";
    try {
        MimeType mimetype;
        mimetype = config.getMimeRepository().forName(mimeType);
        extension = mimetype.getExtension();
    } catch (MimeTypeException ex) {
        logger.warn("MIME type extension lookup failed: {}", new Object[] { ex });
    }
    // Workaround for bug in Tika - https://issues.apache.org/jira/browse/TIKA-1563
    if (mimeType != null && mimeType.equals("application/gzip") && extension.equals(".tgz")) {
        extension = ".gz";
    }
    if (mimeType == null) {
        flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/octet-stream");
        flowFile = session.putAttribute(flowFile, "mime.extension", "");
        logger.info("Unable to identify MIME Type for {}; setting to application/octet-stream", new Object[] { flowFile });
    } else {
        flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), mimeType);
        flowFile = session.putAttribute(flowFile, "mime.extension", extension);
        logger.info("Identified {} as having MIME Type {}", new Object[] { flowFile, mimeType });
    }
    session.getProvenanceReporter().modifyAttributes(flowFile);
    session.transfer(flowFile, REL_SUCCESS);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedInputStream(java.io.BufferedInputStream) TikaInputStream(org.apache.tika.io.TikaInputStream) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) TikaInputStream(org.apache.tika.io.TikaInputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) MimeType(org.apache.tika.mime.MimeType) BufferedInputStream(java.io.BufferedInputStream) MimeTypeException(org.apache.tika.mime.MimeTypeException) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) MediaType(org.apache.tika.mime.MediaType)

Example 3 with MimeType

use of org.apache.tika.mime.MimeType in project tika by apache.

the class MimeUtilTest method assertResult.

private void assertResult(String contentType, String expected) throws MimeTypeException {
    TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
    MimeTypes r = tikaConfig.getMimeRepository();
    MimeType mt = r.forName(contentType);
    //        String ext = MimeUtil.getExtension(contentType, config);
    assertEquals(expected, mt.getExtension());
}
Also used : TikaConfig(org.apache.tika.config.TikaConfig) MimeTypes(org.apache.tika.mime.MimeTypes) MimeType(org.apache.tika.mime.MimeType)

Example 4 with MimeType

use of org.apache.tika.mime.MimeType in project tika by apache.

the class TikaCLI method compareFileMagic.

/**
     * Compares our mime types registry with the File(1) tool's 
     *  directory of (uncompiled) Magic entries. 
     * (Well, those with mimetypes anyway)
     * @param magicDir Path to the magic directory
     */
private void compareFileMagic(String magicDir) throws Exception {
    Set<String> tikaLacking = new TreeSet<String>();
    Set<String> tikaNoMagic = new TreeSet<String>();
    // Sanity check
    File dir = new File(magicDir);
    if ((new File(dir, "elf")).exists() && (new File(dir, "mime")).exists() && (new File(dir, "vorbis")).exists()) {
    // Looks plausible
    } else {
        throw new IllegalArgumentException(magicDir + " doesn't seem to hold uncompressed file magic entries");
    }
    // Find all the mimetypes in the directory
    Set<String> fileMimes = new HashSet<String>();
    for (File mf : dir.listFiles()) {
        if (mf.isFile()) {
            BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(mf), UTF_8));
            String line;
            while ((line = r.readLine()) != null) {
                if (line.startsWith("!:mime") || line.startsWith("#!:mime")) {
                    String mime = line.substring(7).trim();
                    fileMimes.add(mime);
                }
            }
            r.close();
        }
    }
    // See how those compare to the Tika ones
    TikaConfig config = TikaConfig.getDefaultConfig();
    MimeTypes mimeTypes = config.getMimeRepository();
    MediaTypeRegistry registry = config.getMediaTypeRegistry();
    for (String mime : fileMimes) {
        try {
            final MimeType type = mimeTypes.getRegisteredMimeType(mime);
            if (type == null) {
                // Tika doesn't know about this one
                tikaLacking.add(mime);
            } else {
                // Tika knows about this one!
                // Does Tika have magic for it?
                boolean hasMagic = type.hasMagic();
                // How about the children?
                if (!hasMagic) {
                    for (MediaType child : registry.getChildTypes(type.getType())) {
                        MimeType childType = mimeTypes.getRegisteredMimeType(child.toString());
                        if (childType != null && childType.hasMagic()) {
                            hasMagic = true;
                        }
                    }
                }
                // How about the parents?
                MimeType parentType = type;
                while (parentType != null && !hasMagic) {
                    if (parentType.hasMagic()) {
                        // Has magic, fine
                        hasMagic = true;
                    } else {
                        // Check the parent next
                        MediaType parent = registry.getSupertype(type.getType());
                        if (parent == MediaType.APPLICATION_XML || parent == MediaType.TEXT_PLAIN || parent == MediaType.OCTET_STREAM) {
                            // Stop checking parents if we hit a top level type
                            parent = null;
                        }
                        if (parent != null) {
                            parentType = mimeTypes.getRegisteredMimeType(parent.toString());
                        } else {
                            parentType = null;
                        }
                    }
                }
                if (!hasMagic) {
                    tikaNoMagic.add(mime);
                }
            }
        } catch (MimeTypeException e) {
        // Broken entry in the file magic directory
        // Silently skip
        }
    }
    // Check how many tika knows about
    int tikaTypes = 0;
    int tikaAliases = 0;
    for (MediaType type : registry.getTypes()) {
        tikaTypes++;
        tikaAliases += registry.getAliases(type).size();
    }
    // Report
    System.out.println("Tika knows about " + tikaTypes + " unique mime types");
    System.out.println("Tika knows about " + (tikaTypes + tikaAliases) + " mime types including aliases");
    System.out.println("The File Magic directory knows about " + fileMimes.size() + " unique mime types");
    System.out.println();
    System.out.println("The following mime types are known to File but not Tika:");
    for (String mime : tikaLacking) {
        System.out.println("  " + mime);
    }
    System.out.println();
    System.out.println("The following mime types from File have no Tika magic (but their children might):");
    for (String mime : tikaNoMagic) {
        System.out.println("  " + mime);
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) TikaConfig(org.apache.tika.config.TikaConfig) MediaTypeRegistry(org.apache.tika.mime.MediaTypeRegistry) MimeTypes(org.apache.tika.mime.MimeTypes) FileInputStream(java.io.FileInputStream) MimeType(org.apache.tika.mime.MimeType) TreeSet(java.util.TreeSet) MimeTypeException(org.apache.tika.mime.MimeTypeException) BufferedReader(java.io.BufferedReader) MediaType(org.apache.tika.mime.MediaType) File(java.io.File) HashSet(java.util.HashSet)

Example 5 with MimeType

use of org.apache.tika.mime.MimeType in project alliance by codice.

the class GetRelatedFilesRequestImpl method storeThumbnail.

private String storeThumbnail(Metacard metacard) throws IOException, MimeTypeException {
    String id = metacard.getId();
    byte[] thumbnailBytes = metacard.getThumbnail();
    TikaInputStream tis = TikaInputStream.get(thumbnailBytes);
    MediaType mediaType = DETECTOR.detect(tis, new Metadata());
    MimeType mimeType = TikaConfig.getDefaultConfig().getMimeRepository().forName(mediaType.toString());
    String fileName = id + "-THUMBNAIL" + mimeType.getExtension();
    String urlStr = DEFAULT_PROTOCOL + "://" + location.host_name + (port == null ? "" : ":" + port) + location.path_name + "/" + fileName;
    LOGGER.debug("Storing thumbnail for {} at location: {}", metacard.getTitle(), urlStr);
    HttpPut httpPut = new HttpPut(urlStr);
    HttpEntity entity = new ByteArrayEntity(thumbnailBytes);
    httpPut.setEntity(entity);
    Header contentTypeHeader = new BasicHeader("Content-Type", mediaType.toString());
    httpPut.addHeader(contentTypeHeader);
    HttpResponse response = httpClient.execute(httpPut);
    int statusCode = response.getStatusLine().getStatusCode();
    if (!(statusCode == HttpStatus.SC_OK || statusCode == HttpStatus.SC_CREATED || statusCode == HttpStatus.SC_ACCEPTED || statusCode == HttpStatus.SC_NO_CONTENT)) {
        fileName = null;
        LOGGER.debug("Unable to PUT file: code: {}, status: {}", statusCode, response.getStatusLine().getReasonPhrase());
    }
    return fileName;
}
Also used : HttpEntity(org.apache.http.HttpEntity) Metadata(org.apache.tika.metadata.Metadata) TikaInputStream(org.apache.tika.io.TikaInputStream) HttpResponse(org.apache.http.HttpResponse) MimeType(org.apache.tika.mime.MimeType) HttpPut(org.apache.http.client.methods.HttpPut) ByteArrayEntity(org.apache.http.entity.ByteArrayEntity) Header(org.apache.http.Header) BasicHeader(org.apache.http.message.BasicHeader) MediaType(org.apache.tika.mime.MediaType) BasicHeader(org.apache.http.message.BasicHeader)

Aggregations

MimeType (org.apache.tika.mime.MimeType)12 MimeTypeException (org.apache.tika.mime.MimeTypeException)8 MediaType (org.apache.tika.mime.MediaType)5 MimeTypes (org.apache.tika.mime.MimeTypes)5 IOException (java.io.IOException)4 TikaConfig (org.apache.tika.config.TikaConfig)4 TikaInputStream (org.apache.tika.io.TikaInputStream)4 Metadata (org.apache.tika.metadata.Metadata)4 InputStream (java.io.InputStream)3 BufferedInputStream (java.io.BufferedInputStream)2 TikaException (org.apache.tika.exception.TikaException)2 BufferedReader (java.io.BufferedReader)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 FileNotFoundException (java.io.FileNotFoundException)1 InputStreamReader (java.io.InputStreamReader)1 HttpURLConnection (java.net.HttpURLConnection)1 URL (java.net.URL)1 URLConnection (java.net.URLConnection)1 HashSet (java.util.HashSet)1