Search in sources :

Example 46 with MediaType

use of org.apache.tika.mime.MediaType in project winery by eclipse.

the class CsarImporter method importFile.

/**
 * Imports a file from the filesystem to the repository
 *
 * @param p                       the file to read from
 * @param repositoryFileReference the "file" to put the content to
 * @param tmf                     the TOSCAMetaFile object used to determine the mimetype. Must not be null.
 * @param rootPath                used to make the path p relative in order to determine the mime type
 * @param errors                  list where import errors should be stored to
 */
private static void importFile(Path p, RepositoryFileReference repositoryFileReference, TOSCAMetaFile tmf, Path rootPath, final List<String> errors) {
    Objects.requireNonNull(p);
    Objects.requireNonNull(repositoryFileReference);
    Objects.requireNonNull(tmf);
    Objects.requireNonNull(rootPath);
    Objects.requireNonNull(errors);
    try (InputStream is = Files.newInputStream(p);
        BufferedInputStream bis = new BufferedInputStream(is)) {
        MediaType mediaType = MediaType.parse(tmf.getMimeType(p.relativize(rootPath).toString()));
        if (mediaType == null) {
            // Manually find out mime type
            try {
                mediaType = BackendUtils.getMimeType(bis, p.getFileName().toString());
            } catch (IOException e) {
                errors.add(String.format("No MimeType given for %1$s (%2$s)", p.getFileName(), e.getMessage()));
                return;
            }
            if (mediaType == null) {
                errors.add(String.format("No MimeType given for %1$s", p.getFileName()));
                return;
            }
        }
        try {
            RepositoryFactory.getRepository().putContentToFile(repositoryFileReference, bis, mediaType);
        } catch (IllegalArgumentException | IOException e) {
            throw new IllegalStateException(e);
        }
    } catch (IOException e1) {
        throw new IllegalStateException("Could not work on generated temporary files", e1);
    }
}
Also used : BufferedInputStream(java.io.BufferedInputStream) BufferedInputStream(java.io.BufferedInputStream) ZipInputStream(java.util.zip.ZipInputStream) InputStream(java.io.InputStream) MediaType(org.apache.tika.mime.MediaType) IOException(java.io.IOException)

Example 47 with MediaType

use of org.apache.tika.mime.MediaType in project winery by eclipse.

the class AbstractRepository method getMimeType.

/**
 * This is a simple implementation using the information put by
 * setMimeType(RepositoryFileReference ref) or determining the mime type
 * using Utils.getMimeType. If the latter is done, the mime type is
 * persisted using setMimeType
 */
@Override
public String getMimeType(RepositoryFileReference ref) throws IOException {
    RepositoryFileReference mimeFileRef = this.getMimeFileRef(ref);
    String mimeType;
    if (this.exists(mimeFileRef)) {
        InputStream is = this.newInputStream(mimeFileRef);
        mimeType = IOUtils.toString(is, "UTF-8");
        is.close();
    } else {
        // repository has been manipulated manually,
        // create mimetype information
        MediaType mediaType;
        try (InputStream is = this.newInputStream(ref);
            BufferedInputStream bis = new BufferedInputStream(is)) {
            mediaType = BackendUtils.getMimeType(bis, ref.getFileName());
        }
        if (mediaType != null) {
            // successful execution
            this.setMimeType(ref, mediaType);
            mimeType = mediaType.toString();
        } else {
            AbstractRepository.LOGGER.debug("Could not determine mimetype");
            mimeType = null;
        }
    }
    return mimeType;
}
Also used : RepositoryFileReference(org.eclipse.winery.common.RepositoryFileReference) BufferedInputStream(java.io.BufferedInputStream) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) MediaType(org.apache.tika.mime.MediaType)

Example 48 with MediaType

use of org.apache.tika.mime.MediaType in project winery by eclipse.

the class WriterUtils method storeTypes.

public static void storeTypes(Path path, String namespace, String id) {
    LOGGER.debug("Store type: {}", id);
    try {
        MediaType mediaType = MediaTypes.MEDIATYPE_XSD;
        TImport.Builder builder = new TImport.Builder(Namespaces.XML_NS);
        builder.setNamespace(namespace);
        builder.setLocation(id + ".xsd");
        GenericImportId rid = new XSDImportId(namespace, id, false);
        TDefinitions definitions = BackendUtils.createWrapperDefinitions(rid);
        definitions.getImport().add(builder.build());
        CsarImporter.storeDefinitions(rid, definitions);
        RepositoryFileReference ref = BackendUtils.getRefOfDefinitions(rid);
        List<File> files = Files.list(path).filter(Files::isRegularFile).map(Path::toFile).collect(Collectors.toList());
        for (File file : files) {
            BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));
            RepositoryFileReference fileRef = new RepositoryFileReference(ref.getParent(), file.getName());
            RepositoryFactory.getRepository().putContentToFile(fileRef, stream, mediaType);
        }
    } catch (IllegalArgumentException | IOException e) {
        throw new IllegalStateException(e);
    }
}
Also used : XSDImportId(org.eclipse.winery.common.ids.definitions.imports.XSDImportId) TImport(org.eclipse.winery.model.tosca.TImport) GenericImportId(org.eclipse.winery.common.ids.definitions.imports.GenericImportId) RepositoryFileReference(org.eclipse.winery.common.RepositoryFileReference) MediaType(org.apache.tika.mime.MediaType) Files(java.nio.file.Files) TDefinitions(org.eclipse.winery.model.tosca.TDefinitions)

Example 49 with MediaType

use of org.apache.tika.mime.MediaType in project cxf by apache.

the class TikaContentExtractor method extract.

/**
 * Extract the content and metadata from the input stream with a media type hint
 * type of content.
 * @param in input stream to extract the metadata from
 * @param handler custom ContentHandler
 * @param mt JAX-RS MediaType of the stream content
 * @param context custom context
 * @return the extracted content and metadata or null if extraction is not possible
 *         or was unsuccessful
 */
public TikaContent extract(final InputStream in, ContentHandler handler, javax.ws.rs.core.MediaType mtHint, ParseContext context) {
    if (in == null) {
        return null;
    }
    final Metadata metadata = new Metadata();
    try {
        // Try to validate that input stream media type is supported by the parser
        MediaType mediaType = null;
        if (mtHint != null) {
            mediaType = MediaType.parse(mtHint.toString());
        } else if (detector != null && in.markSupported()) {
            mediaType = detector.detect(in, metadata);
        }
        if (mediaType != null) {
            metadata.set(HttpHeaders.CONTENT_TYPE, mediaType.toString());
        }
        Parser parser = null;
        if (parsers.size() == 1) {
            parser = parsers.get(0);
        } else {
            for (Parser p : parsers) {
                if (mediaType != null && !p.getSupportedTypes(context).contains(mediaType)) {
                    continue;
                }
                parser = p;
                break;
            }
        }
        if (parser == null) {
            return null;
        }
        if (context == null) {
            context = new ParseContext();
        }
        if (context.get(Parser.class) == null) {
            // to process the embedded attachments
            context.set(Parser.class, parser instanceof AutoDetectParser ? parser : new AutoDetectParser());
        }
        try {
            parser.parse(in, handler, metadata, context);
        } catch (Exception ex) {
            // not ready to accept null handlers so lets retry with IgnoreContentHandler.
            if (handler == null) {
                handler = new IgnoreContentHandler();
                parser.parse(in, handler, metadata, context);
            } else {
                throw ex;
            }
        }
        return new TikaContent(handler, metadata, mediaType);
    } catch (final IOException ex) {
        LOG.log(Level.WARNING, "Unable to extract media type from input stream", ex);
    } catch (final SAXException ex) {
        LOG.log(Level.WARNING, "Unable to parse input stream", ex);
    } catch (final TikaException ex) {
        LOG.log(Level.WARNING, "Unable to parse input stream", ex);
    }
    return null;
}
Also used : TikaException(org.apache.tika.exception.TikaException) Metadata(org.apache.tika.metadata.Metadata) ParseContext(org.apache.tika.parser.ParseContext) MediaType(org.apache.tika.mime.MediaType) AutoDetectParser(org.apache.tika.parser.AutoDetectParser) IOException(java.io.IOException) IOException(java.io.IOException) TikaException(org.apache.tika.exception.TikaException) SAXException(org.xml.sax.SAXException) Parser(org.apache.tika.parser.Parser) AutoDetectParser(org.apache.tika.parser.AutoDetectParser) SAXException(org.xml.sax.SAXException)

Example 50 with MediaType

use of org.apache.tika.mime.MediaType in project uPortal by Jasig.

the class JaxbPortalDataHandlerService method importDataArchive.

protected void importDataArchive(Resource archive, InputStream resourceStream, BatchImportOptions options) {
    BufferedInputStream bufferedResourceStream = null;
    try {
        // Make sure the stream is buffered
        if (resourceStream instanceof BufferedInputStream) {
            bufferedResourceStream = (BufferedInputStream) resourceStream;
        } else {
            bufferedResourceStream = new BufferedInputStream(resourceStream);
        }
        // Buffer up to 100MB, bad things will happen if we bust this buffer.
        // TODO see if there is a buffered stream that will write to a file once the buffer
        // fills up
        bufferedResourceStream.mark(100 * 1024 * 1024);
        final MediaType type = getMediaType(bufferedResourceStream, archive.getFilename());
        if (MT_JAVA_ARCHIVE.equals(type)) {
            final ArchiveInputStream archiveStream = new JarArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MediaType.APPLICATION_ZIP.equals(type)) {
            final ArchiveInputStream archiveStream = new ZipArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MT_CPIO.equals(type)) {
            final ArchiveInputStream archiveStream = new CpioArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MT_AR.equals(type)) {
            final ArchiveInputStream archiveStream = new ArArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MT_TAR.equals(type)) {
            final ArchiveInputStream archiveStream = new TarArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MT_BZIP2.equals(type)) {
            final CompressorInputStream compressedStream = new BZip2CompressorInputStream(bufferedResourceStream);
            importDataArchive(archive, compressedStream, options);
        } else if (MT_GZIP.equals(type)) {
            final CompressorInputStream compressedStream = new GzipCompressorInputStream(bufferedResourceStream);
            importDataArchive(archive, compressedStream, options);
        } else if (MT_PACK200.equals(type)) {
            final CompressorInputStream compressedStream = new Pack200CompressorInputStream(bufferedResourceStream);
            importDataArchive(archive, compressedStream, options);
        } else if (MT_XZ.equals(type)) {
            final CompressorInputStream compressedStream = new XZCompressorInputStream(bufferedResourceStream);
            importDataArchive(archive, compressedStream, options);
        } else {
            throw new RuntimeException("Unrecognized archive media type: " + type);
        }
    } catch (IOException e) {
        throw new RuntimeException("Could not load InputStream for resource: " + archive, e);
    } finally {
        IOUtils.closeQuietly(bufferedResourceStream);
    }
}
Also used : JarArchiveInputStream(org.apache.commons.compress.archivers.jar.JarArchiveInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) ArArchiveInputStream(org.apache.commons.compress.archivers.ar.ArArchiveInputStream) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream) XZCompressorInputStream(org.apache.commons.compress.compressors.xz.XZCompressorInputStream) Pack200CompressorInputStream(org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) IOException(java.io.IOException) Pack200CompressorInputStream(org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) JarArchiveInputStream(org.apache.commons.compress.archivers.jar.JarArchiveInputStream) ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) CpioArchiveInputStream(org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream) ArArchiveInputStream(org.apache.commons.compress.archivers.ar.ArArchiveInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) BufferedInputStream(java.io.BufferedInputStream) MediaType(org.apache.tika.mime.MediaType) CpioArchiveInputStream(org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream) XZCompressorInputStream(org.apache.commons.compress.compressors.xz.XZCompressorInputStream)

Aggregations

MediaType (org.apache.tika.mime.MediaType)95 Metadata (org.apache.tika.metadata.Metadata)29 Test (org.junit.Test)28 InputStream (java.io.InputStream)26 IOException (java.io.IOException)18 Parser (org.apache.tika.parser.Parser)18 TikaInputStream (org.apache.tika.io.TikaInputStream)17 ParseContext (org.apache.tika.parser.ParseContext)17 TikaException (org.apache.tika.exception.TikaException)14 AutoDetectParser (org.apache.tika.parser.AutoDetectParser)14 CompositeParser (org.apache.tika.parser.CompositeParser)13 ContentHandler (org.xml.sax.ContentHandler)13 BodyContentHandler (org.apache.tika.sax.BodyContentHandler)12 Detector (org.apache.tika.detect.Detector)11 TikaTest (org.apache.tika.TikaTest)10 HashSet (java.util.HashSet)8 ByteArrayInputStream (java.io.ByteArrayInputStream)7 ArrayList (java.util.ArrayList)7 TikaConfig (org.apache.tika.config.TikaConfig)7 MediaTypeRegistry (org.apache.tika.mime.MediaTypeRegistry)7