Search in sources :

Example 1 with XZInputStream

use of org.tukaani.xz.XZInputStream in project caffeine by ben-manes.

the class AbstractTraceReader method readFile.

/** Returns the input stream, decompressing if required. */
private InputStream readFile(String filePath) throws IOException {
    BufferedInputStream input = new BufferedInputStream(openFile(filePath), BUFFER_SIZE);
    input.mark(100);
    try {
        return new XZInputStream(input);
    } catch (IOException e) {
        input.reset();
    }
    try {
        return new CompressorStreamFactory().createCompressorInputStream(input);
    } catch (CompressorException e) {
        input.reset();
    }
    try {
        return new ArchiveStreamFactory().createArchiveInputStream(input);
    } catch (ArchiveException e) {
        input.reset();
    }
    return input;
}
Also used : ArchiveStreamFactory(org.apache.commons.compress.archivers.ArchiveStreamFactory) XZInputStream(org.tukaani.xz.XZInputStream) BufferedInputStream(java.io.BufferedInputStream) CompressorException(org.apache.commons.compress.compressors.CompressorException) CompressorStreamFactory(org.apache.commons.compress.compressors.CompressorStreamFactory) IOException(java.io.IOException) ArchiveException(org.apache.commons.compress.archivers.ArchiveException)

Example 2 with XZInputStream

use of org.tukaani.xz.XZInputStream in project buck by facebook.

the class XzStepTest method testXzStep.

@Test
public void testXzStep() throws IOException {
    final Path sourceFile = TestDataHelper.getTestDataScenario(this, "xz_with_rm_and_check").resolve("xzstep.data");
    final File destinationFile = tmp.newFile("xzstep.data.xz");
    XzStep step = new XzStep(new ProjectFilesystem(tmp.getRoot().toPath()), sourceFile, destinationFile.toPath(), /* compressionLevel -- for faster testing */
    1, /* keep */
    true, XZ.CHECK_CRC32);
    ExecutionContext context = TestExecutionContext.newInstance();
    assertEquals(0, step.execute(context).getExitCode());
    ByteSource original = PathByteSource.asByteSource(sourceFile);
    ByteSource decompressed = new ByteSource() {

        @Override
        public InputStream openStream() throws IOException {
            return new XZInputStream(new FileInputStream(destinationFile));
        }
    };
    assertTrue("Decompressed file must be identical to original.", original.contentEquals(decompressed));
}
Also used : Path(java.nio.file.Path) ExecutionContext(com.facebook.buck.step.ExecutionContext) TestExecutionContext(com.facebook.buck.step.TestExecutionContext) XZInputStream(org.tukaani.xz.XZInputStream) PathByteSource(com.facebook.buck.io.PathByteSource) ByteSource(com.google.common.io.ByteSource) ProjectFilesystem(com.facebook.buck.io.ProjectFilesystem) File(java.io.File) FileInputStream(java.io.FileInputStream) Test(org.junit.Test)

Example 3 with XZInputStream

use of org.tukaani.xz.XZInputStream in project languagetool by languagetool-org.

the class CommonCrawlToNgram method indexInputFile.

void indexInputFile() throws IOException {
    // run now so we have a baseline
    writeAndEvaluate();
    FileInputStream fin = new FileInputStream(input);
    BufferedInputStream in = new BufferedInputStream(fin);
    try (XZInputStream xzIn = new XZInputStream(in)) {
        final byte[] buffer = new byte[8192];
        int n;
        while ((n = xzIn.read(buffer)) != -1) {
            // TODO: not always correct, we need to wait for line end first?
            String buf = new String(buffer, 0, n);
            String[] lines = buf.split("\n");
            indexLine(lines);
        }
    }
    writeAndEvaluate();
}
Also used : XZInputStream(org.tukaani.xz.XZInputStream)

Example 4 with XZInputStream

use of org.tukaani.xz.XZInputStream in project nifi by apache.

the class CompressContent method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final ComponentLog logger = getLogger();
    final long sizeBeforeCompression = flowFile.getSize();
    final String compressionMode = context.getProperty(MODE).getValue();
    String compressionFormatValue = context.getProperty(COMPRESSION_FORMAT).getValue();
    if (compressionFormatValue.equals(COMPRESSION_FORMAT_ATTRIBUTE)) {
        final String mimeType = flowFile.getAttribute(CoreAttributes.MIME_TYPE.key());
        if (mimeType == null) {
            logger.error("No {} attribute exists for {}; routing to failure", new Object[] { CoreAttributes.MIME_TYPE.key(), flowFile });
            session.transfer(flowFile, REL_FAILURE);
            return;
        }
        compressionFormatValue = compressionFormatMimeTypeMap.get(mimeType);
        if (compressionFormatValue == null) {
            logger.info("Mime Type of {} is '{}', which does not indicate a supported Compression Format; routing to success without decompressing", new Object[] { flowFile, mimeType });
            session.transfer(flowFile, REL_SUCCESS);
            return;
        }
    }
    final String compressionFormat = compressionFormatValue;
    final AtomicReference<String> mimeTypeRef = new AtomicReference<>(null);
    final StopWatch stopWatch = new StopWatch(true);
    final String fileExtension;
    switch(compressionFormat.toLowerCase()) {
        case COMPRESSION_FORMAT_GZIP:
            fileExtension = ".gz";
            break;
        case COMPRESSION_FORMAT_LZMA:
            fileExtension = ".lzma";
            break;
        case COMPRESSION_FORMAT_XZ_LZMA2:
            fileExtension = ".xz";
            break;
        case COMPRESSION_FORMAT_BZIP2:
            fileExtension = ".bz2";
            break;
        case COMPRESSION_FORMAT_SNAPPY:
            fileExtension = ".snappy";
            break;
        case COMPRESSION_FORMAT_SNAPPY_FRAMED:
            fileExtension = ".sz";
            break;
        default:
            fileExtension = "";
            break;
    }
    try {
        flowFile = session.write(flowFile, new StreamCallback() {

            @Override
            public void process(final InputStream rawIn, final OutputStream rawOut) throws IOException {
                final OutputStream compressionOut;
                final InputStream compressionIn;
                final OutputStream bufferedOut = new BufferedOutputStream(rawOut, 65536);
                final InputStream bufferedIn = new BufferedInputStream(rawIn, 65536);
                try {
                    if (MODE_COMPRESS.equalsIgnoreCase(compressionMode)) {
                        compressionIn = bufferedIn;
                        switch(compressionFormat.toLowerCase()) {
                            case COMPRESSION_FORMAT_GZIP:
                                final int compressionLevel = context.getProperty(COMPRESSION_LEVEL).asInteger();
                                compressionOut = new GZIPOutputStream(bufferedOut, compressionLevel);
                                mimeTypeRef.set("application/gzip");
                                break;
                            case COMPRESSION_FORMAT_LZMA:
                                compressionOut = new LzmaOutputStream.Builder(bufferedOut).build();
                                mimeTypeRef.set("application/x-lzma");
                                break;
                            case COMPRESSION_FORMAT_XZ_LZMA2:
                                compressionOut = new XZOutputStream(bufferedOut, new LZMA2Options());
                                mimeTypeRef.set("application/x-xz");
                                break;
                            case COMPRESSION_FORMAT_SNAPPY:
                                compressionOut = new SnappyOutputStream(bufferedOut);
                                mimeTypeRef.set("application/x-snappy");
                                break;
                            case COMPRESSION_FORMAT_SNAPPY_FRAMED:
                                compressionOut = new SnappyFramedOutputStream(bufferedOut);
                                mimeTypeRef.set("application/x-snappy-framed");
                                break;
                            case COMPRESSION_FORMAT_BZIP2:
                            default:
                                mimeTypeRef.set("application/x-bzip2");
                                compressionOut = new CompressorStreamFactory().createCompressorOutputStream(compressionFormat.toLowerCase(), bufferedOut);
                                break;
                        }
                    } else {
                        compressionOut = bufferedOut;
                        switch(compressionFormat.toLowerCase()) {
                            case COMPRESSION_FORMAT_LZMA:
                                compressionIn = new LzmaInputStream(bufferedIn, new Decoder());
                                break;
                            case COMPRESSION_FORMAT_XZ_LZMA2:
                                compressionIn = new XZInputStream(bufferedIn);
                                break;
                            case COMPRESSION_FORMAT_BZIP2:
                                // need this two-arg constructor to support concatenated streams
                                compressionIn = new BZip2CompressorInputStream(bufferedIn, true);
                                break;
                            case COMPRESSION_FORMAT_GZIP:
                                compressionIn = new GzipCompressorInputStream(bufferedIn, true);
                                break;
                            case COMPRESSION_FORMAT_SNAPPY:
                                compressionIn = new SnappyInputStream(bufferedIn);
                                break;
                            case COMPRESSION_FORMAT_SNAPPY_FRAMED:
                                compressionIn = new SnappyFramedInputStream(bufferedIn);
                                break;
                            default:
                                compressionIn = new CompressorStreamFactory().createCompressorInputStream(compressionFormat.toLowerCase(), bufferedIn);
                        }
                    }
                } catch (final Exception e) {
                    closeQuietly(bufferedOut);
                    throw new IOException(e);
                }
                try (final InputStream in = compressionIn;
                    final OutputStream out = compressionOut) {
                    final byte[] buffer = new byte[8192];
                    int len;
                    while ((len = in.read(buffer)) > 0) {
                        out.write(buffer, 0, len);
                    }
                    out.flush();
                }
            }
        });
        stopWatch.stop();
        final long sizeAfterCompression = flowFile.getSize();
        if (MODE_DECOMPRESS.equalsIgnoreCase(compressionMode)) {
            flowFile = session.removeAttribute(flowFile, CoreAttributes.MIME_TYPE.key());
            if (context.getProperty(UPDATE_FILENAME).asBoolean()) {
                final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
                if (filename.toLowerCase().endsWith(fileExtension)) {
                    flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), filename.substring(0, filename.length() - fileExtension.length()));
                }
            }
        } else {
            flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get());
            if (context.getProperty(UPDATE_FILENAME).asBoolean()) {
                final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
                flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), filename + fileExtension);
            }
        }
        logger.info("Successfully {}ed {} using {} compression format; size changed from {} to {} bytes", new Object[] { compressionMode.toLowerCase(), flowFile, compressionFormat, sizeBeforeCompression, sizeAfterCompression });
        session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getDuration(TimeUnit.MILLISECONDS));
        session.transfer(flowFile, REL_SUCCESS);
    } catch (final ProcessException e) {
        logger.error("Unable to {} {} using {} compression format due to {}; routing to failure", new Object[] { compressionMode.toLowerCase(), flowFile, compressionFormat, e });
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) SnappyFramedOutputStream(org.xerial.snappy.SnappyFramedOutputStream) BufferedOutputStream(org.apache.nifi.stream.io.BufferedOutputStream) GZIPOutputStream(org.apache.nifi.stream.io.GZIPOutputStream) SnappyFramedOutputStream(org.xerial.snappy.SnappyFramedOutputStream) SnappyOutputStream(org.xerial.snappy.SnappyOutputStream) OutputStream(java.io.OutputStream) XZOutputStream(org.tukaani.xz.XZOutputStream) LzmaOutputStream(lzma.streams.LzmaOutputStream) CompressorStreamFactory(org.apache.commons.compress.compressors.CompressorStreamFactory) Decoder(lzma.sdk.lzma.Decoder) XZOutputStream(org.tukaani.xz.XZOutputStream) LzmaInputStream(lzma.streams.LzmaInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) GZIPOutputStream(org.apache.nifi.stream.io.GZIPOutputStream) SnappyInputStream(org.xerial.snappy.SnappyInputStream) BufferedOutputStream(org.apache.nifi.stream.io.BufferedOutputStream) FlowFile(org.apache.nifi.flowfile.FlowFile) XZInputStream(org.tukaani.xz.XZInputStream) LzmaInputStream(lzma.streams.LzmaInputStream) XZInputStream(org.tukaani.xz.XZInputStream) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) SnappyInputStream(org.xerial.snappy.SnappyInputStream) SnappyFramedInputStream(org.xerial.snappy.SnappyFramedInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) InputStream(java.io.InputStream) SnappyFramedInputStream(org.xerial.snappy.SnappyFramedInputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) StreamCallback(org.apache.nifi.processor.io.StreamCallback) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) StopWatch(org.apache.nifi.util.StopWatch) ProcessException(org.apache.nifi.processor.exception.ProcessException) LZMA2Options(org.tukaani.xz.LZMA2Options) SnappyOutputStream(org.xerial.snappy.SnappyOutputStream)

Example 5 with XZInputStream

use of org.tukaani.xz.XZInputStream in project ant by apache.

the class Unxz method extract.

/**
 * Implement the gunzipping.
 */
@Override
protected void extract() {
    if (srcResource.getLastModified() > dest.lastModified()) {
        log("Expanding " + srcResource.getName() + " to " + dest.getAbsolutePath());
        try (XZInputStream zIn = new XZInputStream(srcResource.getInputStream());
            OutputStream out = Files.newOutputStream(dest.toPath())) {
            byte[] buffer = new byte[BUFFER_SIZE];
            int count = 0;
            do {
                out.write(buffer, 0, count);
                count = zIn.read(buffer, 0, buffer.length);
            } while (count != -1);
        } catch (IOException ioe) {
            String msg = "Problem expanding xz " + ioe.getMessage();
            throw new BuildException(msg, ioe, getLocation());
        }
    }
}
Also used : XZInputStream(org.tukaani.xz.XZInputStream) OutputStream(java.io.OutputStream) IOException(java.io.IOException) BuildException(org.apache.tools.ant.BuildException)

Aggregations

XZInputStream (org.tukaani.xz.XZInputStream)5 IOException (java.io.IOException)3 OutputStream (java.io.OutputStream)2 CompressorStreamFactory (org.apache.commons.compress.compressors.CompressorStreamFactory)2 PathByteSource (com.facebook.buck.io.PathByteSource)1 ProjectFilesystem (com.facebook.buck.io.ProjectFilesystem)1 ExecutionContext (com.facebook.buck.step.ExecutionContext)1 TestExecutionContext (com.facebook.buck.step.TestExecutionContext)1 ByteSource (com.google.common.io.ByteSource)1 BufferedInputStream (java.io.BufferedInputStream)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 InputStream (java.io.InputStream)1 Path (java.nio.file.Path)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 Decoder (lzma.sdk.lzma.Decoder)1 LzmaInputStream (lzma.streams.LzmaInputStream)1 LzmaOutputStream (lzma.streams.LzmaOutputStream)1 ArchiveException (org.apache.commons.compress.archivers.ArchiveException)1 ArchiveStreamFactory (org.apache.commons.compress.archivers.ArchiveStreamFactory)1