Search in sources :

Example 11 with CompressorStreamFactory

use of org.apache.commons.compress.compressors.CompressorStreamFactory in project languagetool by languagetool-org.

the class CommonCrawlToNgram3 method indexInputFile.

private void indexInputFile() throws IOException, CompressorException {
    FileInputStream fin = new FileInputStream(input);
    BufferedInputStream in = new BufferedInputStream(fin);
    try (CompressorInputStream input = new CompressorStreamFactory().createCompressorInputStream(in)) {
        final byte[] buffer = new byte[8192];
        int n;
        while ((n = input.read(buffer)) != -1) {
            // TODO: not always correct, we need to wait for line end first?
            String buf = new String(buffer, 0, n);
            String[] lines = buf.split("\n");
            indexLine(lines);
        }
    }
    writeToDisk(1, unigramToCount);
    writeToDisk(2, bigramToCount);
    writeToDisk(3, trigramToCount);
}
Also used : CompressorStreamFactory(org.apache.commons.compress.compressors.CompressorStreamFactory) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream)

Example 12 with CompressorStreamFactory

use of org.apache.commons.compress.compressors.CompressorStreamFactory in project cloudstack by apache.

the class VhdProcessor method checkCompressed.

private boolean checkCompressed(String fileName) throws IOException {
    FileInputStream fin = null;
    BufferedInputStream bin = null;
    CompressorInputStream cin = null;
    try {
        fin = new FileInputStream(fileName);
        bin = new BufferedInputStream(fin);
        cin = new CompressorStreamFactory().createCompressorInputStream(bin);
    } catch (CompressorException e) {
        s_logger.warn(e.getMessage());
        return false;
    } catch (FileNotFoundException e) {
        s_logger.warn(e.getMessage());
        return false;
    } finally {
        if (cin != null)
            cin.close();
        else if (bin != null)
            bin.close();
    }
    return true;
}
Also used : BufferedInputStream(java.io.BufferedInputStream) CompressorException(org.apache.commons.compress.compressors.CompressorException) FileNotFoundException(java.io.FileNotFoundException) CompressorStreamFactory(org.apache.commons.compress.compressors.CompressorStreamFactory) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream) FileInputStream(java.io.FileInputStream)

Example 13 with CompressorStreamFactory

use of org.apache.commons.compress.compressors.CompressorStreamFactory in project logging-log4j2 by apache.

the class CommonsCompressAction method execute.

/**
     * Compresses a file.
     *
     * @param name the compressor name, i.e. "gz", "bzip2", "xz", "pack200", or "deflate".
     * @param source file to compress, may not be null.
     * @param destination compressed file, may not be null.
     * @param deleteSource if true, attempt to delete file on completion. Failure to delete does not cause an exception
     *            to be thrown or affect return value.
     *
     * @return true if source file compressed.
     * @throws IOException on IO exception.
     */
public static boolean execute(final String name, final File source, final File destination, final boolean deleteSource) throws IOException {
    if (!source.exists()) {
        return false;
    }
    LOGGER.debug("Starting {} compression of {}", name, source.getPath());
    try (final FileInputStream input = new FileInputStream(source);
        final BufferedOutputStream output = new BufferedOutputStream(new CompressorStreamFactory().createCompressorOutputStream(name, new FileOutputStream(destination)))) {
        IOUtils.copy(input, output, BUF_SIZE);
        LOGGER.debug("Finished {} compression of {}", name, source.getPath());
    } catch (final CompressorException e) {
        throw new IOException(e);
    }
    if (deleteSource) {
        try {
            if (Files.deleteIfExists(source.toPath())) {
                LOGGER.debug("Deleted {}", source.toString());
            } else {
                LOGGER.warn("Unable to delete {} after {} compression. File did not exist", source.toString(), name);
            }
        } catch (Exception ex) {
            LOGGER.warn("Unable to delete {} after {} compression, {}", source.toString(), name, ex.getMessage());
        }
    }
    return true;
}
Also used : CompressorException(org.apache.commons.compress.compressors.CompressorException) FileOutputStream(java.io.FileOutputStream) CompressorStreamFactory(org.apache.commons.compress.compressors.CompressorStreamFactory) IOException(java.io.IOException) BufferedOutputStream(java.io.BufferedOutputStream) FileInputStream(java.io.FileInputStream) CompressorException(org.apache.commons.compress.compressors.CompressorException) IOException(java.io.IOException)

Example 14 with CompressorStreamFactory

use of org.apache.commons.compress.compressors.CompressorStreamFactory in project lucene-solr by apache.

the class StreamUtilsTest method rawGzipFile.

private Path rawGzipFile(String ext) throws Exception {
    Path f = testDir.resolve("testfile." + ext);
    OutputStream os = new CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.GZIP, Files.newOutputStream(f));
    writeText(os);
    return f;
}
Also used : Path(java.nio.file.Path) OutputStream(java.io.OutputStream) CompressorStreamFactory(org.apache.commons.compress.compressors.CompressorStreamFactory)

Example 15 with CompressorStreamFactory

use of org.apache.commons.compress.compressors.CompressorStreamFactory in project tika by apache.

the class CompressorParser method parse.

public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
    // should not be closed
    if (stream.markSupported()) {
        stream = new CloseShieldInputStream(stream);
    } else {
        // Ensure that the stream supports the mark feature
        stream = new BufferedInputStream(new CloseShieldInputStream(stream));
    }
    CompressorInputStream cis;
    try {
        CompressorParserOptions options = context.get(CompressorParserOptions.class, new CompressorParserOptions() {

            public boolean decompressConcatenated(Metadata metadata) {
                return false;
            }
        });
        CompressorStreamFactory factory = new CompressorStreamFactory(options.decompressConcatenated(metadata), memoryLimitInKb);
        cis = factory.createCompressorInputStream(stream);
    } catch (CompressorException e) {
        if (e.getCause() != null && e.getCause() instanceof MemoryLimitException) {
            throw new TikaMemoryLimitException(e.getMessage());
        }
        throw new TikaException("Unable to uncompress document stream", e);
    }
    MediaType type = getMediaType(cis);
    if (!type.equals(MediaType.OCTET_STREAM)) {
        metadata.set(CONTENT_TYPE, type.toString());
    }
    XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
    xhtml.startDocument();
    try {
        Metadata entrydata = new Metadata();
        String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
        if (name != null) {
            if (name.endsWith(".tbz")) {
                name = name.substring(0, name.length() - 4) + ".tar";
            } else if (name.endsWith(".tbz2")) {
                name = name.substring(0, name.length() - 5) + ".tar";
            } else if (name.endsWith(".bz")) {
                name = name.substring(0, name.length() - 3);
            } else if (name.endsWith(".bz2")) {
                name = name.substring(0, name.length() - 4);
            } else if (name.endsWith(".xz")) {
                name = name.substring(0, name.length() - 3);
            } else if (name.endsWith(".zlib")) {
                name = name.substring(0, name.length() - 5);
            } else if (name.endsWith(".pack")) {
                name = name.substring(0, name.length() - 5);
            } else if (name.length() > 0) {
                name = GzipUtils.getUncompressedFilename(name);
            }
            entrydata.set(Metadata.RESOURCE_NAME_KEY, name);
        }
        // Use the delegate parser to parse the compressed document
        EmbeddedDocumentExtractor extractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
        if (extractor.shouldParseEmbedded(entrydata)) {
            extractor.parseEmbedded(cis, xhtml, entrydata, true);
        }
    } finally {
        cis.close();
    }
    xhtml.endDocument();
}
Also used : TikaException(org.apache.tika.exception.TikaException) EmbeddedDocumentExtractor(org.apache.tika.extractor.EmbeddedDocumentExtractor) Metadata(org.apache.tika.metadata.Metadata) CompressorStreamFactory(org.apache.commons.compress.compressors.CompressorStreamFactory) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream) SnappyCompressorInputStream(org.apache.commons.compress.compressors.snappy.SnappyCompressorInputStream) XZCompressorInputStream(org.apache.commons.compress.compressors.xz.XZCompressorInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) DeflateCompressorInputStream(org.apache.commons.compress.compressors.deflate.DeflateCompressorInputStream) LZMACompressorInputStream(org.apache.commons.compress.compressors.lzma.LZMACompressorInputStream) FramedSnappyCompressorInputStream(org.apache.commons.compress.compressors.snappy.FramedSnappyCompressorInputStream) ZCompressorInputStream(org.apache.commons.compress.compressors.z.ZCompressorInputStream) Pack200CompressorInputStream(org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream) XHTMLContentHandler(org.apache.tika.sax.XHTMLContentHandler) MemoryLimitException(org.apache.commons.compress.MemoryLimitException) TikaMemoryLimitException(org.apache.tika.exception.TikaMemoryLimitException) BufferedInputStream(java.io.BufferedInputStream) CompressorException(org.apache.commons.compress.compressors.CompressorException) TikaMemoryLimitException(org.apache.tika.exception.TikaMemoryLimitException) MediaType(org.apache.tika.mime.MediaType) CloseShieldInputStream(org.apache.commons.io.input.CloseShieldInputStream)

Aggregations

CompressorStreamFactory (org.apache.commons.compress.compressors.CompressorStreamFactory)15 FileInputStream (java.io.FileInputStream)8 CompressorException (org.apache.commons.compress.compressors.CompressorException)7 BufferedInputStream (java.io.BufferedInputStream)6 IOException (java.io.IOException)6 CompressorInputStream (org.apache.commons.compress.compressors.CompressorInputStream)6 OutputStream (java.io.OutputStream)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 File (java.io.File)3 Path (java.nio.file.Path)3 InputStream (java.io.InputStream)2 ArchiveStreamFactory (org.apache.commons.compress.archivers.ArchiveStreamFactory)2 TarArchiveEntry (org.apache.commons.compress.archivers.tar.TarArchiveEntry)2 TarArchiveOutputStream (org.apache.commons.compress.archivers.tar.TarArchiveOutputStream)2 ZipArchiveOutputStream (org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream)2 MediaType (org.apache.tika.mime.MediaType)2 Test (org.junit.Test)2 FilestoreModel (com.gitblit.models.FilestoreModel)1 BufferedOutputStream (java.io.BufferedOutputStream)1 FileNotFoundException (java.io.FileNotFoundException)1