Search in sources :

Example 1 with SnappyFramedOutputStream

use of org.xerial.snappy.SnappyFramedOutputStream in project nifi by apache.

the class CompressContent method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final ComponentLog logger = getLogger();
    final long sizeBeforeCompression = flowFile.getSize();
    final String compressionMode = context.getProperty(MODE).getValue();
    String compressionFormatValue = context.getProperty(COMPRESSION_FORMAT).getValue();
    if (compressionFormatValue.equals(COMPRESSION_FORMAT_ATTRIBUTE)) {
        final String mimeType = flowFile.getAttribute(CoreAttributes.MIME_TYPE.key());
        if (mimeType == null) {
            logger.error("No {} attribute exists for {}; routing to failure", new Object[] { CoreAttributes.MIME_TYPE.key(), flowFile });
            session.transfer(flowFile, REL_FAILURE);
            return;
        }
        compressionFormatValue = compressionFormatMimeTypeMap.get(mimeType);
        if (compressionFormatValue == null) {
            logger.info("Mime Type of {} is '{}', which does not indicate a supported Compression Format; routing to success without decompressing", new Object[] { flowFile, mimeType });
            session.transfer(flowFile, REL_SUCCESS);
            return;
        }
    }
    final String compressionFormat = compressionFormatValue;
    final AtomicReference<String> mimeTypeRef = new AtomicReference<>(null);
    final StopWatch stopWatch = new StopWatch(true);
    final String fileExtension;
    switch(compressionFormat.toLowerCase()) {
        case COMPRESSION_FORMAT_GZIP:
            fileExtension = ".gz";
            break;
        case COMPRESSION_FORMAT_LZMA:
            fileExtension = ".lzma";
            break;
        case COMPRESSION_FORMAT_XZ_LZMA2:
            fileExtension = ".xz";
            break;
        case COMPRESSION_FORMAT_BZIP2:
            fileExtension = ".bz2";
            break;
        case COMPRESSION_FORMAT_SNAPPY:
            fileExtension = ".snappy";
            break;
        case COMPRESSION_FORMAT_SNAPPY_FRAMED:
            fileExtension = ".sz";
            break;
        default:
            fileExtension = "";
            break;
    }
    try {
        flowFile = session.write(flowFile, new StreamCallback() {

            @Override
            public void process(final InputStream rawIn, final OutputStream rawOut) throws IOException {
                final OutputStream compressionOut;
                final InputStream compressionIn;
                final OutputStream bufferedOut = new BufferedOutputStream(rawOut, 65536);
                final InputStream bufferedIn = new BufferedInputStream(rawIn, 65536);
                try {
                    if (MODE_COMPRESS.equalsIgnoreCase(compressionMode)) {
                        compressionIn = bufferedIn;
                        switch(compressionFormat.toLowerCase()) {
                            case COMPRESSION_FORMAT_GZIP:
                                final int compressionLevel = context.getProperty(COMPRESSION_LEVEL).asInteger();
                                compressionOut = new GZIPOutputStream(bufferedOut, compressionLevel);
                                mimeTypeRef.set("application/gzip");
                                break;
                            case COMPRESSION_FORMAT_LZMA:
                                compressionOut = new LzmaOutputStream.Builder(bufferedOut).build();
                                mimeTypeRef.set("application/x-lzma");
                                break;
                            case COMPRESSION_FORMAT_XZ_LZMA2:
                                compressionOut = new XZOutputStream(bufferedOut, new LZMA2Options());
                                mimeTypeRef.set("application/x-xz");
                                break;
                            case COMPRESSION_FORMAT_SNAPPY:
                                compressionOut = new SnappyOutputStream(bufferedOut);
                                mimeTypeRef.set("application/x-snappy");
                                break;
                            case COMPRESSION_FORMAT_SNAPPY_FRAMED:
                                compressionOut = new SnappyFramedOutputStream(bufferedOut);
                                mimeTypeRef.set("application/x-snappy-framed");
                                break;
                            case COMPRESSION_FORMAT_BZIP2:
                            default:
                                mimeTypeRef.set("application/x-bzip2");
                                compressionOut = new CompressorStreamFactory().createCompressorOutputStream(compressionFormat.toLowerCase(), bufferedOut);
                                break;
                        }
                    } else {
                        compressionOut = bufferedOut;
                        switch(compressionFormat.toLowerCase()) {
                            case COMPRESSION_FORMAT_LZMA:
                                compressionIn = new LzmaInputStream(bufferedIn, new Decoder());
                                break;
                            case COMPRESSION_FORMAT_XZ_LZMA2:
                                compressionIn = new XZInputStream(bufferedIn);
                                break;
                            case COMPRESSION_FORMAT_BZIP2:
                                // need this two-arg constructor to support concatenated streams
                                compressionIn = new BZip2CompressorInputStream(bufferedIn, true);
                                break;
                            case COMPRESSION_FORMAT_GZIP:
                                compressionIn = new GzipCompressorInputStream(bufferedIn, true);
                                break;
                            case COMPRESSION_FORMAT_SNAPPY:
                                compressionIn = new SnappyInputStream(bufferedIn);
                                break;
                            case COMPRESSION_FORMAT_SNAPPY_FRAMED:
                                compressionIn = new SnappyFramedInputStream(bufferedIn);
                                break;
                            default:
                                compressionIn = new CompressorStreamFactory().createCompressorInputStream(compressionFormat.toLowerCase(), bufferedIn);
                        }
                    }
                } catch (final Exception e) {
                    closeQuietly(bufferedOut);
                    throw new IOException(e);
                }
                try (final InputStream in = compressionIn;
                    final OutputStream out = compressionOut) {
                    final byte[] buffer = new byte[8192];
                    int len;
                    while ((len = in.read(buffer)) > 0) {
                        out.write(buffer, 0, len);
                    }
                    out.flush();
                }
            }
        });
        stopWatch.stop();
        final long sizeAfterCompression = flowFile.getSize();
        if (MODE_DECOMPRESS.equalsIgnoreCase(compressionMode)) {
            flowFile = session.removeAttribute(flowFile, CoreAttributes.MIME_TYPE.key());
            if (context.getProperty(UPDATE_FILENAME).asBoolean()) {
                final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
                if (filename.toLowerCase().endsWith(fileExtension)) {
                    flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), filename.substring(0, filename.length() - fileExtension.length()));
                }
            }
        } else {
            flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get());
            if (context.getProperty(UPDATE_FILENAME).asBoolean()) {
                final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
                flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), filename + fileExtension);
            }
        }
        logger.info("Successfully {}ed {} using {} compression format; size changed from {} to {} bytes", new Object[] { compressionMode.toLowerCase(), flowFile, compressionFormat, sizeBeforeCompression, sizeAfterCompression });
        session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getDuration(TimeUnit.MILLISECONDS));
        session.transfer(flowFile, REL_SUCCESS);
    } catch (final ProcessException e) {
        logger.error("Unable to {} {} using {} compression format due to {}; routing to failure", new Object[] { compressionMode.toLowerCase(), flowFile, compressionFormat, e });
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) SnappyFramedOutputStream(org.xerial.snappy.SnappyFramedOutputStream) BufferedOutputStream(org.apache.nifi.stream.io.BufferedOutputStream) GZIPOutputStream(org.apache.nifi.stream.io.GZIPOutputStream) SnappyFramedOutputStream(org.xerial.snappy.SnappyFramedOutputStream) SnappyOutputStream(org.xerial.snappy.SnappyOutputStream) OutputStream(java.io.OutputStream) XZOutputStream(org.tukaani.xz.XZOutputStream) LzmaOutputStream(lzma.streams.LzmaOutputStream) CompressorStreamFactory(org.apache.commons.compress.compressors.CompressorStreamFactory) Decoder(lzma.sdk.lzma.Decoder) XZOutputStream(org.tukaani.xz.XZOutputStream) LzmaInputStream(lzma.streams.LzmaInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) GZIPOutputStream(org.apache.nifi.stream.io.GZIPOutputStream) SnappyInputStream(org.xerial.snappy.SnappyInputStream) BufferedOutputStream(org.apache.nifi.stream.io.BufferedOutputStream) FlowFile(org.apache.nifi.flowfile.FlowFile) XZInputStream(org.tukaani.xz.XZInputStream) LzmaInputStream(lzma.streams.LzmaInputStream) XZInputStream(org.tukaani.xz.XZInputStream) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) SnappyInputStream(org.xerial.snappy.SnappyInputStream) SnappyFramedInputStream(org.xerial.snappy.SnappyFramedInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) InputStream(java.io.InputStream) SnappyFramedInputStream(org.xerial.snappy.SnappyFramedInputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) StreamCallback(org.apache.nifi.processor.io.StreamCallback) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) StopWatch(org.apache.nifi.util.StopWatch) ProcessException(org.apache.nifi.processor.exception.ProcessException) LZMA2Options(org.tukaani.xz.LZMA2Options) SnappyOutputStream(org.xerial.snappy.SnappyOutputStream)

Aggregations

IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 Decoder (lzma.sdk.lzma.Decoder)1 LzmaInputStream (lzma.streams.LzmaInputStream)1 LzmaOutputStream (lzma.streams.LzmaOutputStream)1 CompressorStreamFactory (org.apache.commons.compress.compressors.CompressorStreamFactory)1 BZip2CompressorInputStream (org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream)1 GzipCompressorInputStream (org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream)1 FlowFile (org.apache.nifi.flowfile.FlowFile)1 ComponentLog (org.apache.nifi.logging.ComponentLog)1 ProcessException (org.apache.nifi.processor.exception.ProcessException)1 StreamCallback (org.apache.nifi.processor.io.StreamCallback)1 BufferedInputStream (org.apache.nifi.stream.io.BufferedInputStream)1 BufferedOutputStream (org.apache.nifi.stream.io.BufferedOutputStream)1 GZIPOutputStream (org.apache.nifi.stream.io.GZIPOutputStream)1 StopWatch (org.apache.nifi.util.StopWatch)1 LZMA2Options (org.tukaani.xz.LZMA2Options)1 XZInputStream (org.tukaani.xz.XZInputStream)1