Search in sources :

Example 11 with ArchiveInputStream

use of org.apache.commons.compress.archivers.ArchiveInputStream in project stanbol by apache.

the class IndexInstallTask method execute.

@Override
public void execute(InstallationContext ctx) {
    String indexName = (String) getResource().getAttribute(INDEX_NAME);
    if (indexName == null) {
        log.error("Unable to remove Managed Index because the required Property '{}'" + "used to define the name of the Index is missing", INDEX_NAME);
        setFinishedState(ResourceState.IGNORED);
    } else {
        String serverName = (String) getResource().getAttribute(ManagedIndexConstants.SERVER_NAME);
        ManagedSolrServer server = managedServers.get(serverName);
        if (server == null) {
            log.warn("Unable to install Managed Solr Index {} because the {} " + "Server {} is currently not active!", new Object[] { indexName, serverName == null ? "default" : "", serverName != null ? serverName : "" });
            //needs still to be installed
            setFinishedState(ResourceState.IGNORED);
        } else {
            //we have an index name and a server to in stall it ... 
            //  ... let's do the work
            String archiveFormat = (String) getResource().getAttribute(PROPERTY_ARCHIVE_FORMAT);
            InputStream is = null;
            try {
                is = getResource().getInputStream();
                if ("properties".equals(archiveFormat)) {
                    InputStreamReader reader = new InputStreamReader(is, "UTF-8");
                    Properties props = new Properties();
                    try {
                        props.load(reader);
                    } finally {
                        IOUtils.closeQuietly(reader);
                    }
                    //TODO install to the right server!
                    String indexPath = props.getProperty(INDEX_ARCHIVES);
                    if (indexPath == null) {
                        indexPath = indexName + '.' + ConfigUtils.SOLR_INDEX_ARCHIVE_EXTENSION;
                        log.info("Property \"" + INDEX_ARCHIVES + "\" not present within the SolrIndex references file. Will use the default name \"" + indexPath + "\"");
                    }
                    server.updateIndex(indexName, indexPath, props);
                    setFinishedState(ResourceState.INSTALLED);
                } else {
                    ArchiveInputStream ais = null;
                    try {
                        ais = ConfigUtils.getArchiveInputStream(archiveFormat, is);
                        server.updateIndex(indexName, ais);
                        // we are done ... set the state to installed!
                        setFinishedState(ResourceState.INSTALLED);
                    } finally {
                        IOUtils.closeQuietly(ais);
                    }
                }
            // now we can copy the core!
            } catch (Exception e) {
                String message = String.format("Unable to install SolrIndexArchive for index name '%s'!" + " (resource=%s, arviceFormat=%s)", indexName, getResource().getURL(), archiveFormat);
                log.error(message, e);
                ctx.log("%s! Reason: %s", message, e.getMessage());
                setFinishedState(ResourceState.IGNORED);
            } finally {
                IOUtils.closeQuietly(is);
            }
        }
    }
}
Also used : ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) InputStreamReader(java.io.InputStreamReader) ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) InputStream(java.io.InputStream) Properties(java.util.Properties) ManagedSolrServer(org.apache.stanbol.commons.solr.managed.ManagedSolrServer)

Example 12 with ArchiveInputStream

use of org.apache.commons.compress.archivers.ArchiveInputStream in project tika by apache.

the class PackageParser method parse.

public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
    //lazily load the MediaTypeRegistry at parse time
    //only want to call getDefaultConfig() once, and can't
    //load statically because of the ForkParser
    TikaConfig config = context.get(TikaConfig.class);
    MediaTypeRegistry mediaTypeRegistry = null;
    if (config != null) {
        mediaTypeRegistry = config.getMediaTypeRegistry();
    } else {
        if (bufferedMediaTypeRegistry == null) {
            //buffer this for next time.
            synchronized (lock) {
                //now that we're locked, check again
                if (bufferedMediaTypeRegistry == null) {
                    bufferedMediaTypeRegistry = TikaConfig.getDefaultConfig().getMediaTypeRegistry();
                }
            }
        }
        mediaTypeRegistry = bufferedMediaTypeRegistry;
    }
    // Ensure that the stream supports the mark feature
    if (!stream.markSupported()) {
        stream = new BufferedInputStream(stream);
    }
    TemporaryResources tmp = new TemporaryResources();
    ArchiveInputStream ais = null;
    try {
        ArchiveStreamFactory factory = context.get(ArchiveStreamFactory.class, new ArchiveStreamFactory());
        // At the end we want to close the archive stream to release
        // any associated resources, but the underlying document stream
        // should not be closed
        ais = factory.createArchiveInputStream(new CloseShieldInputStream(stream));
    } catch (StreamingNotSupportedException sne) {
        // Most archive formats work on streams, but a few need files
        if (sne.getFormat().equals(ArchiveStreamFactory.SEVEN_Z)) {
            // Rework as a file, and wrap
            stream.reset();
            TikaInputStream tstream = TikaInputStream.get(stream, tmp);
            // Seven Zip suports passwords, was one given?
            String password = null;
            PasswordProvider provider = context.get(PasswordProvider.class);
            if (provider != null) {
                password = provider.getPassword(metadata);
            }
            SevenZFile sevenz;
            if (password == null) {
                sevenz = new SevenZFile(tstream.getFile());
            } else {
                sevenz = new SevenZFile(tstream.getFile(), password.getBytes("UnicodeLittleUnmarked"));
            }
            // Pending a fix for COMPRESS-269 / TIKA-1525, this bit is a little nasty
            ais = new SevenZWrapper(sevenz);
        } else {
            tmp.close();
            throw new TikaException("Unknown non-streaming format " + sne.getFormat(), sne);
        }
    } catch (ArchiveException e) {
        tmp.close();
        throw new TikaException("Unable to unpack document stream", e);
    }
    updateMediaType(ais, mediaTypeRegistry, metadata);
    // Use the delegate parser to parse the contained document
    EmbeddedDocumentExtractor extractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
    XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
    xhtml.startDocument();
    try {
        ArchiveEntry entry = ais.getNextEntry();
        while (entry != null) {
            if (!entry.isDirectory()) {
                parseEntry(ais, entry, extractor, metadata, xhtml);
            }
            entry = ais.getNextEntry();
        }
    } catch (UnsupportedZipFeatureException zfe) {
        // If it's an encrypted document of unknown password, report as such
        if (zfe.getFeature() == Feature.ENCRYPTION) {
            throw new EncryptedDocumentException(zfe);
        }
        // Otherwise throw the exception
        throw new TikaException("UnsupportedZipFeature", zfe);
    } catch (PasswordRequiredException pre) {
        throw new EncryptedDocumentException(pre);
    } finally {
        ais.close();
        tmp.close();
    }
    xhtml.endDocument();
}
Also used : StreamingNotSupportedException(org.apache.commons.compress.archivers.StreamingNotSupportedException) TikaException(org.apache.tika.exception.TikaException) EncryptedDocumentException(org.apache.tika.exception.EncryptedDocumentException) TikaConfig(org.apache.tika.config.TikaConfig) EmbeddedDocumentExtractor(org.apache.tika.extractor.EmbeddedDocumentExtractor) TemporaryResources(org.apache.tika.io.TemporaryResources) TikaInputStream(org.apache.tika.io.TikaInputStream) MediaTypeRegistry(org.apache.tika.mime.MediaTypeRegistry) ZipArchiveEntry(org.apache.commons.compress.archivers.zip.ZipArchiveEntry) ArchiveEntry(org.apache.commons.compress.archivers.ArchiveEntry) PasswordRequiredException(org.apache.commons.compress.PasswordRequiredException) ArchiveException(org.apache.commons.compress.archivers.ArchiveException) XHTMLContentHandler(org.apache.tika.sax.XHTMLContentHandler) PasswordProvider(org.apache.tika.parser.PasswordProvider) UnsupportedZipFeatureException(org.apache.commons.compress.archivers.zip.UnsupportedZipFeatureException) ArchiveStreamFactory(org.apache.commons.compress.archivers.ArchiveStreamFactory) ArArchiveInputStream(org.apache.commons.compress.archivers.ar.ArArchiveInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) JarArchiveInputStream(org.apache.commons.compress.archivers.jar.JarArchiveInputStream) ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) CpioArchiveInputStream(org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) DumpArchiveInputStream(org.apache.commons.compress.archivers.dump.DumpArchiveInputStream) SevenZFile(org.apache.commons.compress.archivers.sevenz.SevenZFile) BufferedInputStream(java.io.BufferedInputStream) CloseShieldInputStream(org.apache.commons.io.input.CloseShieldInputStream)

Aggregations

ArchiveInputStream (org.apache.commons.compress.archivers.ArchiveInputStream)12 TarArchiveInputStream (org.apache.commons.compress.archivers.tar.TarArchiveInputStream)6 IOException (java.io.IOException)5 InputStream (java.io.InputStream)5 ZipArchiveInputStream (org.apache.commons.compress.archivers.zip.ZipArchiveInputStream)5 File (java.io.File)4 ArchiveEntry (org.apache.commons.compress.archivers.ArchiveEntry)4 ArchiveStreamFactory (org.apache.commons.compress.archivers.ArchiveStreamFactory)4 BufferedInputStream (java.io.BufferedInputStream)3 ArchiveException (org.apache.commons.compress.archivers.ArchiveException)3 TarArchiveEntry (org.apache.commons.compress.archivers.tar.TarArchiveEntry)3 FileInputStream (java.io.FileInputStream)2 LinkedList (java.util.LinkedList)2 CancellationException (java.util.concurrent.CancellationException)2 ArArchiveInputStream (org.apache.commons.compress.archivers.ar.ArArchiveInputStream)2 CpioArchiveInputStream (org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream)2 JarArchiveInputStream (org.apache.commons.compress.archivers.jar.JarArchiveInputStream)2 ZipArchiveEntry (org.apache.commons.compress.archivers.zip.ZipArchiveEntry)2 BZip2CompressorInputStream (org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream)2 GzipCompressorInputStream (org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream)2