Search in sources :

Example 1 with MimeTypes

use of org.apache.tika.mime.MimeTypes in project Asqatasun by Asqatasun.

the class UploadAuditSetUpFormValidator method validateFiles.

/**
     * Control whether the uploaded files are of HTML type and whether their
     * size is under the maxFileSize limit.
     *
     * @param uploadAuditSetUpCommand
     * @param errors
     */
private void validateFiles(AuditSetUpCommand uploadAuditSetUpCommand, Errors errors) {
    boolean emptyFile = true;
    Metadata metadata = new Metadata();
    MimeTypes mimeTypes = TikaConfig.getDefaultConfig().getMimeRepository();
    String mime;
    for (int i = 0; i < uploadAuditSetUpCommand.getFileInputList().length; i++) {
        try {
            CommonsMultipartFile cmf = uploadAuditSetUpCommand.getFileInputList()[i];
            if (cmf.getSize() > maxFileSize) {
                Long maxFileSizeInMega = maxFileSize / 1000000;
                String[] arg = { maxFileSizeInMega.toString() };
                errors.rejectValue(ID_INPUT_FILE_PREFIX + "[" + i + "]", FILE_SIZE_EXCEEDED_MSG_BUNDLE_KEY, arg, "{0}");
            }
            if (cmf.getSize() > 0) {
                emptyFile = false;
                mime = mimeTypes.detect(new BufferedInputStream(cmf.getInputStream()), metadata).toString();
                LOGGER.debug("mime  " + mime + "  " + cmf.getOriginalFilename());
                if (!authorizedMimeType.contains(mime)) {
                    errors.rejectValue(ID_INPUT_FILE_PREFIX + "[" + i + "]", NOT_HTML_MSG_BUNDLE_KEY);
                }
            }
        } catch (IOException ex) {
            LOGGER.warn(ex);
            errors.rejectValue(ID_INPUT_FILE_PREFIX + "[" + i + "]", NOT_HTML_MSG_BUNDLE_KEY);
        }
    }
    if (emptyFile) {
        // if no file is uploaded
        LOGGER.debug("emptyFiles");
        errors.rejectValue(GENERAL_ERROR_MSG_KEY, NO_FILE_UPLOADED_MSG_BUNDLE_KEY);
    }
}
Also used : BufferedInputStream(java.io.BufferedInputStream) Metadata(org.apache.tika.metadata.Metadata) IOException(java.io.IOException) MimeTypes(org.apache.tika.mime.MimeTypes) CommonsMultipartFile(org.springframework.web.multipart.commons.CommonsMultipartFile)

Example 2 with MimeTypes

use of org.apache.tika.mime.MimeTypes in project tika by apache.

the class MimeUtilTest method assertResult.

private void assertResult(String contentType, String expected) throws MimeTypeException {
    TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
    MimeTypes r = tikaConfig.getMimeRepository();
    MimeType mt = r.forName(contentType);
    //        String ext = MimeUtil.getExtension(contentType, config);
    assertEquals(expected, mt.getExtension());
}
Also used : TikaConfig(org.apache.tika.config.TikaConfig) MimeTypes(org.apache.tika.mime.MimeTypes) MimeType(org.apache.tika.mime.MimeType)

Example 3 with MimeTypes

use of org.apache.tika.mime.MimeTypes in project tika by apache.

the class TikaCLI method compareFileMagic.

/**
     * Compares our mime types registry with the File(1) tool's 
     *  directory of (uncompiled) Magic entries. 
     * (Well, those with mimetypes anyway)
     * @param magicDir Path to the magic directory
     */
private void compareFileMagic(String magicDir) throws Exception {
    Set<String> tikaLacking = new TreeSet<String>();
    Set<String> tikaNoMagic = new TreeSet<String>();
    // Sanity check
    File dir = new File(magicDir);
    if ((new File(dir, "elf")).exists() && (new File(dir, "mime")).exists() && (new File(dir, "vorbis")).exists()) {
    // Looks plausible
    } else {
        throw new IllegalArgumentException(magicDir + " doesn't seem to hold uncompressed file magic entries");
    }
    // Find all the mimetypes in the directory
    Set<String> fileMimes = new HashSet<String>();
    for (File mf : dir.listFiles()) {
        if (mf.isFile()) {
            BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(mf), UTF_8));
            String line;
            while ((line = r.readLine()) != null) {
                if (line.startsWith("!:mime") || line.startsWith("#!:mime")) {
                    String mime = line.substring(7).trim();
                    fileMimes.add(mime);
                }
            }
            r.close();
        }
    }
    // See how those compare to the Tika ones
    TikaConfig config = TikaConfig.getDefaultConfig();
    MimeTypes mimeTypes = config.getMimeRepository();
    MediaTypeRegistry registry = config.getMediaTypeRegistry();
    for (String mime : fileMimes) {
        try {
            final MimeType type = mimeTypes.getRegisteredMimeType(mime);
            if (type == null) {
                // Tika doesn't know about this one
                tikaLacking.add(mime);
            } else {
                // Tika knows about this one!
                // Does Tika have magic for it?
                boolean hasMagic = type.hasMagic();
                // How about the children?
                if (!hasMagic) {
                    for (MediaType child : registry.getChildTypes(type.getType())) {
                        MimeType childType = mimeTypes.getRegisteredMimeType(child.toString());
                        if (childType != null && childType.hasMagic()) {
                            hasMagic = true;
                        }
                    }
                }
                // How about the parents?
                MimeType parentType = type;
                while (parentType != null && !hasMagic) {
                    if (parentType.hasMagic()) {
                        // Has magic, fine
                        hasMagic = true;
                    } else {
                        // Check the parent next
                        MediaType parent = registry.getSupertype(type.getType());
                        if (parent == MediaType.APPLICATION_XML || parent == MediaType.TEXT_PLAIN || parent == MediaType.OCTET_STREAM) {
                            // Stop checking parents if we hit a top level type
                            parent = null;
                        }
                        if (parent != null) {
                            parentType = mimeTypes.getRegisteredMimeType(parent.toString());
                        } else {
                            parentType = null;
                        }
                    }
                }
                if (!hasMagic) {
                    tikaNoMagic.add(mime);
                }
            }
        } catch (MimeTypeException e) {
        // Broken entry in the file magic directory
        // Silently skip
        }
    }
    // Check how many tika knows about
    int tikaTypes = 0;
    int tikaAliases = 0;
    for (MediaType type : registry.getTypes()) {
        tikaTypes++;
        tikaAliases += registry.getAliases(type).size();
    }
    // Report
    System.out.println("Tika knows about " + tikaTypes + " unique mime types");
    System.out.println("Tika knows about " + (tikaTypes + tikaAliases) + " mime types including aliases");
    System.out.println("The File Magic directory knows about " + fileMimes.size() + " unique mime types");
    System.out.println();
    System.out.println("The following mime types are known to File but not Tika:");
    for (String mime : tikaLacking) {
        System.out.println("  " + mime);
    }
    System.out.println();
    System.out.println("The following mime types from File have no Tika magic (but their children might):");
    for (String mime : tikaNoMagic) {
        System.out.println("  " + mime);
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) TikaConfig(org.apache.tika.config.TikaConfig) MediaTypeRegistry(org.apache.tika.mime.MediaTypeRegistry) MimeTypes(org.apache.tika.mime.MimeTypes) FileInputStream(java.io.FileInputStream) MimeType(org.apache.tika.mime.MimeType) TreeSet(java.util.TreeSet) MimeTypeException(org.apache.tika.mime.MimeTypeException) BufferedReader(java.io.BufferedReader) MediaType(org.apache.tika.mime.MediaType) File(java.io.File) HashSet(java.util.HashSet)

Example 4 with MimeTypes

use of org.apache.tika.mime.MimeTypes in project ddf by codice.

the class TikaMimeTypeResolver method getFileExtensionForMimeType.

@Override
public String getFileExtensionForMimeType(String contentType) {
    LOGGER.trace("ENTERING: getFileExtensionForMimeType()");
    MimeTypes mimeTypes = config.getMimeRepository();
    String extension = null;
    if (StringUtils.isNotEmpty(contentType)) {
        try {
            MimeType mimeType = mimeTypes.forName(contentType);
            extension = mimeType.getExtension();
        } catch (Exception e) {
            LOGGER.debug("Exception caught getting file extension for mime type {}", contentType, e);
        }
    }
    LOGGER.debug("mimeType = {},   file extension = [{}]", contentType, extension);
    LOGGER.trace("EXITING: getFileExtensionForMimeType()");
    return extension;
}
Also used : MimeTypes(org.apache.tika.mime.MimeTypes) MimeType(org.apache.tika.mime.MimeType)

Example 5 with MimeTypes

use of org.apache.tika.mime.MimeTypes in project Asqatasun by Asqatasun.

the class AddScenarioFormValidator method checkScenarioFileTypeAndSize.

/**
     * 
     * @param addScenarioCommand
     * @param errors 
     * @return  whether the scenario handled by the current AddScenarioCommand
     * has a correct type and size
     */
public boolean checkScenarioFileTypeAndSize(AddScenarioCommand addScenarioCommand, Errors errors) {
    if (addScenarioCommand.getScenarioFile() == null) {
        // if no file uploaded
        LOGGER.debug("empty Scenario File");
        errors.rejectValue(GENERAL_ERROR_MSG_KEY, MANDATORY_FIELD_MSG_BUNDLE_KEY);
        errors.rejectValue(SCENARIO_FILE_KEY, NO_SCENARIO_UPLOADED_MSG_BUNDLE_KEY);
        return false;
    }
    Metadata metadata = new Metadata();
    MimeTypes mimeTypes = TikaConfig.getDefaultConfig().getMimeRepository();
    String mime;
    try {
        CommonsMultipartFile cmf = addScenarioCommand.getScenarioFile();
        if (cmf.getSize() > maxFileSize) {
            Long maxFileSizeInMega = maxFileSize / 1000000;
            String[] arg = { maxFileSizeInMega.toString() };
            errors.rejectValue(GENERAL_ERROR_MSG_KEY, MANDATORY_FIELD_MSG_BUNDLE_KEY);
            errors.rejectValue(SCENARIO_FILE_KEY, FILE_SIZE_EXCEEDED_MSG_BUNDLE_KEY, arg, "{0}");
            return false;
        } else if (cmf.getSize() > 0) {
            mime = mimeTypes.detect(new BufferedInputStream(cmf.getInputStream()), metadata).toString();
            LOGGER.debug("mime  " + mime + "  " + cmf.getOriginalFilename());
            if (!authorizedMimeType.contains(mime)) {
                errors.rejectValue(GENERAL_ERROR_MSG_KEY, MANDATORY_FIELD_MSG_BUNDLE_KEY);
                errors.rejectValue(SCENARIO_FILE_KEY, NOT_SCENARIO_MSG_BUNDLE_KEY);
                return false;
            }
        } else {
            LOGGER.debug("File with size null");
            errors.rejectValue(GENERAL_ERROR_MSG_KEY, MANDATORY_FIELD_MSG_BUNDLE_KEY);
            errors.rejectValue(SCENARIO_FILE_KEY, NO_SCENARIO_UPLOADED_MSG_BUNDLE_KEY);
            return false;
        }
    } catch (IOException ex) {
        LOGGER.warn(ex);
        errors.rejectValue(SCENARIO_FILE_KEY, NOT_SCENARIO_MSG_BUNDLE_KEY);
        errors.rejectValue(GENERAL_ERROR_MSG_KEY, MANDATORY_FIELD_MSG_BUNDLE_KEY);
        return false;
    }
    return true;
}
Also used : BufferedInputStream(java.io.BufferedInputStream) Metadata(org.apache.tika.metadata.Metadata) IOException(java.io.IOException) MimeTypes(org.apache.tika.mime.MimeTypes) CommonsMultipartFile(org.springframework.web.multipart.commons.CommonsMultipartFile)

Aggregations

MimeTypes (org.apache.tika.mime.MimeTypes)10 MimeType (org.apache.tika.mime.MimeType)4 IOException (java.io.IOException)3 TikaConfig (org.apache.tika.config.TikaConfig)3 MediaType (org.apache.tika.mime.MediaType)3 BufferedInputStream (java.io.BufferedInputStream)2 File (java.io.File)2 URL (java.net.URL)2 Tika (org.apache.tika.Tika)2 Detector (org.apache.tika.detect.Detector)2 Metadata (org.apache.tika.metadata.Metadata)2 MimeTypeException (org.apache.tika.mime.MimeTypeException)2 CommonsMultipartFile (org.springframework.web.multipart.commons.CommonsMultipartFile)2 BufferedReader (java.io.BufferedReader)1 FileInputStream (java.io.FileInputStream)1 InputStream (java.io.InputStream)1 InputStreamReader (java.io.InputStreamReader)1 HashSet (java.util.HashSet)1 TreeSet (java.util.TreeSet)1 Response (javax.ws.rs.core.Response)1