Search in sources :

Example 1 with MediaTypeRegistry

use of org.apache.tika.mime.MediaTypeRegistry in project tika by apache.

the class MediaTypeExample method main.

public static void main(String[] args) throws Exception {
    MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
    MediaType type = MediaType.parse("image/svg+xml");
    while (type != null) {
        System.out.println(type);
        type = registry.getSupertype(type);
    }
}
Also used : MediaType(org.apache.tika.mime.MediaType) MediaTypeRegistry(org.apache.tika.mime.MediaTypeRegistry)

Example 2 with MediaTypeRegistry

use of org.apache.tika.mime.MediaTypeRegistry in project tika by apache.

the class MediaTypeExample method listAllTypes.

public static void listAllTypes() {
    MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
    for (MediaType type : registry.getTypes()) {
        Set<MediaType> aliases = registry.getAliases(type);
        System.out.println(type + ", also known as " + aliases);
    }
}
Also used : MediaType(org.apache.tika.mime.MediaType) MediaTypeRegistry(org.apache.tika.mime.MediaTypeRegistry)

Example 3 with MediaTypeRegistry

use of org.apache.tika.mime.MediaTypeRegistry in project tika by apache.

the class TikaCLI method compareFileMagic.

/**
     * Compares our mime types registry with the File(1) tool's 
     *  directory of (uncompiled) Magic entries. 
     * (Well, those with mimetypes anyway)
     * @param magicDir Path to the magic directory
     */
private void compareFileMagic(String magicDir) throws Exception {
    Set<String> tikaLacking = new TreeSet<String>();
    Set<String> tikaNoMagic = new TreeSet<String>();
    // Sanity check
    File dir = new File(magicDir);
    if ((new File(dir, "elf")).exists() && (new File(dir, "mime")).exists() && (new File(dir, "vorbis")).exists()) {
    // Looks plausible
    } else {
        throw new IllegalArgumentException(magicDir + " doesn't seem to hold uncompressed file magic entries");
    }
    // Find all the mimetypes in the directory
    Set<String> fileMimes = new HashSet<String>();
    for (File mf : dir.listFiles()) {
        if (mf.isFile()) {
            BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(mf), UTF_8));
            String line;
            while ((line = r.readLine()) != null) {
                if (line.startsWith("!:mime") || line.startsWith("#!:mime")) {
                    String mime = line.substring(7).trim();
                    fileMimes.add(mime);
                }
            }
            r.close();
        }
    }
    // See how those compare to the Tika ones
    TikaConfig config = TikaConfig.getDefaultConfig();
    MimeTypes mimeTypes = config.getMimeRepository();
    MediaTypeRegistry registry = config.getMediaTypeRegistry();
    for (String mime : fileMimes) {
        try {
            final MimeType type = mimeTypes.getRegisteredMimeType(mime);
            if (type == null) {
                // Tika doesn't know about this one
                tikaLacking.add(mime);
            } else {
                // Tika knows about this one!
                // Does Tika have magic for it?
                boolean hasMagic = type.hasMagic();
                // How about the children?
                if (!hasMagic) {
                    for (MediaType child : registry.getChildTypes(type.getType())) {
                        MimeType childType = mimeTypes.getRegisteredMimeType(child.toString());
                        if (childType != null && childType.hasMagic()) {
                            hasMagic = true;
                        }
                    }
                }
                // How about the parents?
                MimeType parentType = type;
                while (parentType != null && !hasMagic) {
                    if (parentType.hasMagic()) {
                        // Has magic, fine
                        hasMagic = true;
                    } else {
                        // Check the parent next
                        MediaType parent = registry.getSupertype(type.getType());
                        if (parent == MediaType.APPLICATION_XML || parent == MediaType.TEXT_PLAIN || parent == MediaType.OCTET_STREAM) {
                            // Stop checking parents if we hit a top level type
                            parent = null;
                        }
                        if (parent != null) {
                            parentType = mimeTypes.getRegisteredMimeType(parent.toString());
                        } else {
                            parentType = null;
                        }
                    }
                }
                if (!hasMagic) {
                    tikaNoMagic.add(mime);
                }
            }
        } catch (MimeTypeException e) {
        // Broken entry in the file magic directory
        // Silently skip
        }
    }
    // Check how many tika knows about
    int tikaTypes = 0;
    int tikaAliases = 0;
    for (MediaType type : registry.getTypes()) {
        tikaTypes++;
        tikaAliases += registry.getAliases(type).size();
    }
    // Report
    System.out.println("Tika knows about " + tikaTypes + " unique mime types");
    System.out.println("Tika knows about " + (tikaTypes + tikaAliases) + " mime types including aliases");
    System.out.println("The File Magic directory knows about " + fileMimes.size() + " unique mime types");
    System.out.println();
    System.out.println("The following mime types are known to File but not Tika:");
    for (String mime : tikaLacking) {
        System.out.println("  " + mime);
    }
    System.out.println();
    System.out.println("The following mime types from File have no Tika magic (but their children might):");
    for (String mime : tikaNoMagic) {
        System.out.println("  " + mime);
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) TikaConfig(org.apache.tika.config.TikaConfig) MediaTypeRegistry(org.apache.tika.mime.MediaTypeRegistry) MimeTypes(org.apache.tika.mime.MimeTypes) FileInputStream(java.io.FileInputStream) MimeType(org.apache.tika.mime.MimeType) TreeSet(java.util.TreeSet) MimeTypeException(org.apache.tika.mime.MimeTypeException) BufferedReader(java.io.BufferedReader) MediaType(org.apache.tika.mime.MediaType) File(java.io.File) HashSet(java.util.HashSet)

Example 4 with MediaTypeRegistry

use of org.apache.tika.mime.MediaTypeRegistry in project ddf by codice.

the class TikaInputTransformer method getSupportedMimeTypes.

private List<String> getSupportedMimeTypes() {
    MediaTypeRegistry mediaTypeRegistry = MediaTypeRegistry.getDefaultRegistry();
    Set<MediaType> mediaTypes = mediaTypeRegistry.getTypes();
    Set<MediaType> mediaTypeAliases = new HashSet<>();
    List<String> mimeTypes = new ArrayList<>(mediaTypes.size());
    for (MediaType mediaType : mediaTypes) {
        addMediaTypetoMimeTypes(mediaType, mimeTypes);
        mediaTypeAliases.addAll(mediaTypeRegistry.getAliases(mediaType));
    }
    for (MediaType mediaType : mediaTypeAliases) {
        addMediaTypetoMimeTypes(mediaType, mimeTypes);
    }
    mimeTypes.add("image/jp2");
    mimeTypes.add("application/vnd.ms-visio.viewer");
    LOGGER.debug("supported mime types: {}", mimeTypes);
    return mimeTypes;
}
Also used : ArrayList(java.util.ArrayList) MediaType(org.apache.tika.mime.MediaType) MediaTypeRegistry(org.apache.tika.mime.MediaTypeRegistry) HashSet(java.util.HashSet)

Example 5 with MediaTypeRegistry

use of org.apache.tika.mime.MediaTypeRegistry in project tika by apache.

the class TikaCLI method displaySupportedTypes.

/**
     * Prints all the known media types, aliases and matching parser classes.
     */
private void displaySupportedTypes() {
    AutoDetectParser parser = new AutoDetectParser();
    MediaTypeRegistry registry = parser.getMediaTypeRegistry();
    Map<MediaType, Parser> parsers = parser.getParsers();
    for (MediaType type : registry.getTypes()) {
        System.out.println(type);
        for (MediaType alias : registry.getAliases(type)) {
            System.out.println("  alias:     " + alias);
        }
        MediaType supertype = registry.getSupertype(type);
        if (supertype != null) {
            System.out.println("  supertype: " + supertype);
        }
        Parser p = parsers.get(type);
        if (p != null) {
            if (p instanceof CompositeParser) {
                p = ((CompositeParser) p).getParsers().get(type);
            }
            System.out.println("  parser:    " + p.getClass().getName());
        }
    }
}
Also used : CompositeParser(org.apache.tika.parser.CompositeParser) AutoDetectParser(org.apache.tika.parser.AutoDetectParser) MediaType(org.apache.tika.mime.MediaType) MediaTypeRegistry(org.apache.tika.mime.MediaTypeRegistry) Parser(org.apache.tika.parser.Parser) CompositeParser(org.apache.tika.parser.CompositeParser) AutoDetectParser(org.apache.tika.parser.AutoDetectParser) DigestingParser(org.apache.tika.parser.DigestingParser) NetworkParser(org.apache.tika.parser.NetworkParser) ForkParser(org.apache.tika.fork.ForkParser)

Aggregations

MediaTypeRegistry (org.apache.tika.mime.MediaTypeRegistry)8 MediaType (org.apache.tika.mime.MediaType)7 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 TikaConfig (org.apache.tika.config.TikaConfig)2 CompositeParser (org.apache.tika.parser.CompositeParser)2 Parser (org.apache.tika.parser.Parser)2 BufferedInputStream (java.io.BufferedInputStream)1 BufferedReader (java.io.BufferedReader)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 InputStreamReader (java.io.InputStreamReader)1 TreeSet (java.util.TreeSet)1 PasswordRequiredException (org.apache.commons.compress.PasswordRequiredException)1 ArchiveEntry (org.apache.commons.compress.archivers.ArchiveEntry)1 ArchiveException (org.apache.commons.compress.archivers.ArchiveException)1 ArchiveInputStream (org.apache.commons.compress.archivers.ArchiveInputStream)1 ArchiveStreamFactory (org.apache.commons.compress.archivers.ArchiveStreamFactory)1 StreamingNotSupportedException (org.apache.commons.compress.archivers.StreamingNotSupportedException)1 ArArchiveInputStream (org.apache.commons.compress.archivers.ar.ArArchiveInputStream)1