Search in sources :

Example 86 with MediaType

use of org.apache.tika.mime.MediaType in project tika by apache.

the class TikaMimeTypes method getMimeTypesPlain.

@GET
@Produces("text/plain")
public String getMimeTypesPlain() {
    StringBuffer text = new StringBuffer();
    for (MediaTypeDetails type : getMediaTypes()) {
        text.append(type.type.toString());
        text.append("\n");
        for (MediaType alias : type.aliases) {
            text.append("  alias:     ").append(alias).append("\n");
        }
        if (type.supertype != null) {
            text.append("  supertype: ").append(type.supertype.toString()).append("\n");
        }
        if (type.parser != null) {
            text.append("  parser:    ").append(type.parser).append("\n");
        }
    }
    return text.toString();
}
Also used : MediaType(org.apache.tika.mime.MediaType) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Example 87 with MediaType

use of org.apache.tika.mime.MediaType in project tika by apache.

the class TikaParsers method parserAsMap.

private void parserAsMap(ParserDetails p, boolean withMimeTypes, Map<String, Object> details) {
    details.put("name", p.className);
    details.put("composite", p.isComposite);
    details.put("decorated", p.isDecorated);
    if (p.isComposite) {
        List<Map<String, Object>> c = new ArrayList<Map<String, Object>>();
        for (Parser cp : p.childParsers) {
            Map<String, Object> cdet = new HashMap<String, Object>();
            parserAsMap(new ParserDetails(cp), withMimeTypes, cdet);
            c.add(cdet);
        }
        details.put("children", c);
    } else if (withMimeTypes) {
        List<String> mts = new ArrayList<String>(p.supportedTypes.size());
        for (MediaType mt : p.supportedTypes) {
            mts.add(mt.toString());
        }
        details.put("supportedTypes", mts);
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MediaType(org.apache.tika.mime.MediaType) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) Parser(org.apache.tika.parser.Parser) CompositeParser(org.apache.tika.parser.CompositeParser)

Example 88 with MediaType

use of org.apache.tika.mime.MediaType in project ddf by codice.

the class MimeTypeMapperImpl method guessMimeType.

@Override
public String guessMimeType(InputStream is, String fileExtension) throws MimeTypeResolutionException {
    LOGGER.trace("ENTERING: guessMimeType()");
    String mimeType = null;
    LOGGER.debug("Looping through{} MimeTypeResolvers", mimeTypeResolvers.size());
    // This is to force the TikaMimeTypeResolver to be called
    // after the CustomMimeTypeResolvers to prevent Tika default mapping
    // from being used when a CustomMimeTypeResolver may be more appropriate.
    List<MimeTypeResolver> sortedResolvers = sortResolvers(mimeTypeResolvers);
    if (StringUtils.isEmpty(fileExtension)) {
        try (TemporaryFileBackedOutputStream tfbos = new TemporaryFileBackedOutputStream()) {
            IOUtils.copy(is, tfbos);
            try (InputStream inputStream = tfbos.asByteSource().openStream()) {
                Detector detector = new DefaultDetector();
                MediaType mediaType = detector.detect(inputStream, new Metadata());
                fileExtension = getFileExtensionForMimeType(mediaType.toString()).replace(".", "");
            } finally {
                is = tfbos.asByteSource().openStream();
            }
        } catch (Exception e) {
            LOGGER.debug("Failed to guess mimeType for file without extension.");
        }
    }
    // If file has XML extension, then read root element namespace once so
    // each MimeTypeResolver does not have to open the stream and read the namespace
    String namespace = null;
    if (fileExtension.equals(XML_FILE_EXTENSION)) {
        try {
            namespace = XMLUtils.getRootNamespace(IOUtils.toString(is));
        } catch (IOException ioe) {
            LOGGER.debug("Could not read namespace from input stream.", ioe);
        }
        LOGGER.debug("namespace = {}", namespace);
    }
    // Once a file extension is find for the given mime type, exit the loop.
    for (MimeTypeResolver resolver : sortedResolvers) {
        LOGGER.debug("Calling MimeTypeResolver {}", resolver.getName());
        try {
            // an InputTransformer to create a metacard for that "generic" XML file.
            if (fileExtension.equals(XML_FILE_EXTENSION)) {
                if (namespace != null && resolver.hasSchema()) {
                    if (namespace.equals(resolver.getSchema())) {
                        mimeType = resolver.getMimeTypeForFileExtension(fileExtension);
                    }
                }
            } else {
                mimeType = resolver.getMimeTypeForFileExtension(fileExtension);
            }
        } catch (Exception e) {
            LOGGER.debug("Error resolving mime type for file extension: {}", fileExtension);
            throw new MimeTypeResolutionException(e);
        }
        if (StringUtils.isNotEmpty(mimeType)) {
            LOGGER.debug("mimeType [{}] retrieved from MimeTypeResolver:  ", mimeType, resolver.getName());
            break;
        }
    }
    LOGGER.debug("mimeType = {},   file extension = [{}]", mimeType, fileExtension);
    LOGGER.trace("EXITING: guessMimeType()");
    return mimeType;
}
Also used : MimeTypeResolver(ddf.mime.MimeTypeResolver) DefaultDetector(org.apache.tika.detect.DefaultDetector) MimeTypeResolutionException(ddf.mime.MimeTypeResolutionException) Detector(org.apache.tika.detect.Detector) DefaultDetector(org.apache.tika.detect.DefaultDetector) TemporaryFileBackedOutputStream(org.codice.ddf.platform.util.TemporaryFileBackedOutputStream) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) MediaType(org.apache.tika.mime.MediaType) IOException(java.io.IOException) IOException(java.io.IOException) MimeTypeResolutionException(ddf.mime.MimeTypeResolutionException)

Aggregations

MediaType (org.apache.tika.mime.MediaType)88 Test (org.junit.Test)28 Metadata (org.apache.tika.metadata.Metadata)27 InputStream (java.io.InputStream)23 TikaInputStream (org.apache.tika.io.TikaInputStream)17 Parser (org.apache.tika.parser.Parser)17 ParseContext (org.apache.tika.parser.ParseContext)16 IOException (java.io.IOException)15 TikaException (org.apache.tika.exception.TikaException)13 CompositeParser (org.apache.tika.parser.CompositeParser)13 ContentHandler (org.xml.sax.ContentHandler)13 AutoDetectParser (org.apache.tika.parser.AutoDetectParser)12 BodyContentHandler (org.apache.tika.sax.BodyContentHandler)12 TikaTest (org.apache.tika.TikaTest)10 Detector (org.apache.tika.detect.Detector)10 HashSet (java.util.HashSet)8 ByteArrayInputStream (java.io.ByteArrayInputStream)7 TikaConfig (org.apache.tika.config.TikaConfig)7 MediaTypeRegistry (org.apache.tika.mime.MediaTypeRegistry)7 ArrayList (java.util.ArrayList)6