Search in sources :

Example 1 with CompressionOutputStream

use of org.apache.hadoop.io.compress.CompressionOutputStream in project hadoop by apache.

the class TestLz4CompressorDecompressor method testCompressorDecopressorLogicWithCompressionStreams.

// test compress/decompress process through CompressionOutputStream/CompressionInputStream api 
@Test
public void testCompressorDecopressorLogicWithCompressionStreams() {
    DataOutputStream deflateOut = null;
    DataInputStream inflateIn = null;
    int BYTE_SIZE = 1024 * 100;
    byte[] bytes = generate(BYTE_SIZE);
    int bufferSize = 262144;
    int compressionOverhead = (bufferSize / 6) + 32;
    try {
        DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
        CompressionOutputStream deflateFilter = new BlockCompressorStream(compressedDataBuffer, new Lz4Compressor(bufferSize), bufferSize, compressionOverhead);
        deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter));
        deflateOut.write(bytes, 0, bytes.length);
        deflateOut.flush();
        deflateFilter.finish();
        DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
        deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, compressedDataBuffer.getLength());
        CompressionInputStream inflateFilter = new BlockDecompressorStream(deCompressedDataBuffer, new Lz4Decompressor(bufferSize), bufferSize);
        inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter));
        byte[] result = new byte[BYTE_SIZE];
        inflateIn.read(result);
        assertArrayEquals("original array not equals compress/decompressed array", result, bytes);
    } catch (IOException e) {
        fail("testLz4CompressorDecopressorLogicWithCompressionStreams ex error !!!");
    } finally {
        try {
            if (deflateOut != null)
                deflateOut.close();
            if (inflateIn != null)
                inflateIn.close();
        } catch (Exception e) {
        }
    }
}
Also used : CompressionOutputStream(org.apache.hadoop.io.compress.CompressionOutputStream) Lz4Compressor(org.apache.hadoop.io.compress.lz4.Lz4Compressor) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) DataOutputStream(java.io.DataOutputStream) BlockDecompressorStream(org.apache.hadoop.io.compress.BlockDecompressorStream) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) IOException(java.io.IOException) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) Lz4Decompressor(org.apache.hadoop.io.compress.lz4.Lz4Decompressor) BlockCompressorStream(org.apache.hadoop.io.compress.BlockCompressorStream) BufferedInputStream(java.io.BufferedInputStream) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) BufferedOutputStream(java.io.BufferedOutputStream) Test(org.junit.Test)

Example 2 with CompressionOutputStream

use of org.apache.hadoop.io.compress.CompressionOutputStream in project carbondata by apache.

the class CSVInputFormatTest method generateCompressFiles.

/**
   * generate compressed files, no need to call this method.
   * @throws Exception
   */
public void generateCompressFiles() throws Exception {
    String pwd = new File("src/test/resources/csv").getCanonicalPath();
    String inputFile = pwd + "/data.csv";
    FileInputStream input = new FileInputStream(inputFile);
    Configuration conf = new Configuration();
    // .gz
    String outputFile = pwd + "/data.csv.gz";
    FileOutputStream output = new FileOutputStream(outputFile);
    GzipCodec gzip = new GzipCodec();
    gzip.setConf(conf);
    CompressionOutputStream outputStream = gzip.createOutputStream(output);
    int i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();
    // .bz2
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.bz2";
    output = new FileOutputStream(outputFile);
    BZip2Codec bzip2 = new BZip2Codec();
    bzip2.setConf(conf);
    outputStream = bzip2.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();
    // .snappy
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.snappy";
    output = new FileOutputStream(outputFile);
    SnappyCodec snappy = new SnappyCodec();
    snappy.setConf(conf);
    outputStream = snappy.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();
    //.lz4
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.lz4";
    output = new FileOutputStream(outputFile);
    Lz4Codec lz4 = new Lz4Codec();
    lz4.setConf(conf);
    outputStream = lz4.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();
}
Also used : Lz4Codec(org.apache.hadoop.io.compress.Lz4Codec) CompressionOutputStream(org.apache.hadoop.io.compress.CompressionOutputStream) Configuration(org.apache.hadoop.conf.Configuration) FileOutputStream(java.io.FileOutputStream) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) File(java.io.File) SnappyCodec(org.apache.hadoop.io.compress.SnappyCodec) FileInputStream(java.io.FileInputStream)

Example 3 with CompressionOutputStream

use of org.apache.hadoop.io.compress.CompressionOutputStream in project hadoop by apache.

the class TestSnappyCompressorDecompressor method testSnappyCompressorDecopressorLogicWithCompressionStreams.

@Test
public void testSnappyCompressorDecopressorLogicWithCompressionStreams() {
    int BYTE_SIZE = 1024 * 100;
    byte[] bytes = BytesGenerator.get(BYTE_SIZE);
    int bufferSize = 262144;
    int compressionOverhead = (bufferSize / 6) + 32;
    DataOutputStream deflateOut = null;
    DataInputStream inflateIn = null;
    try {
        DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
        CompressionOutputStream deflateFilter = new BlockCompressorStream(compressedDataBuffer, new SnappyCompressor(bufferSize), bufferSize, compressionOverhead);
        deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter));
        deflateOut.write(bytes, 0, bytes.length);
        deflateOut.flush();
        deflateFilter.finish();
        DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
        deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, compressedDataBuffer.getLength());
        CompressionInputStream inflateFilter = new BlockDecompressorStream(deCompressedDataBuffer, new SnappyDecompressor(bufferSize), bufferSize);
        inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter));
        byte[] result = new byte[BYTE_SIZE];
        inflateIn.read(result);
        Assert.assertArrayEquals("original array not equals compress/decompressed array", result, bytes);
    } catch (IOException e) {
        fail("testSnappyCompressorDecopressorLogicWithCompressionStreams ex error !!!");
    } finally {
        try {
            if (deflateOut != null)
                deflateOut.close();
            if (inflateIn != null)
                inflateIn.close();
        } catch (Exception e) {
        }
    }
}
Also used : CompressionOutputStream(org.apache.hadoop.io.compress.CompressionOutputStream) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) DataOutputStream(java.io.DataOutputStream) BlockDecompressorStream(org.apache.hadoop.io.compress.BlockDecompressorStream) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) IOException(java.io.IOException) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) BlockCompressorStream(org.apache.hadoop.io.compress.BlockCompressorStream) BufferedInputStream(java.io.BufferedInputStream) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) BufferedOutputStream(java.io.BufferedOutputStream) Test(org.junit.Test)

Example 4 with CompressionOutputStream

use of org.apache.hadoop.io.compress.CompressionOutputStream in project hadoop by apache.

the class TestZStandardCompressorDecompressor method compressDecompressLoop.

private void compressDecompressLoop(int rawDataSize) throws IOException {
    byte[] rawData = null;
    rawData = generate(rawDataSize);
    ByteArrayOutputStream baos = new ByteArrayOutputStream(rawDataSize + 12);
    CompressionOutputStream deflateFilter = new CompressorStream(baos, new ZStandardCompressor(), 4096);
    DataOutputStream deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter));
    deflateOut.write(rawData, 0, rawData.length);
    deflateOut.flush();
    deflateFilter.finish();
    byte[] compressedResult = baos.toByteArray();
    int compressedSize = compressedResult.length;
    ZStandardDecompressor.ZStandardDirectDecompressor decompressor = new ZStandardDecompressor.ZStandardDirectDecompressor(4096);
    ByteBuffer inBuf = ByteBuffer.allocateDirect(compressedSize);
    ByteBuffer outBuf = ByteBuffer.allocateDirect(8096);
    inBuf.put(compressedResult, 0, compressedSize);
    inBuf.flip();
    ByteBuffer expected = ByteBuffer.wrap(rawData);
    outBuf.clear();
    while (!decompressor.finished()) {
        decompressor.decompress(inBuf, outBuf);
        if (outBuf.remaining() == 0) {
            outBuf.flip();
            while (outBuf.remaining() > 0) {
                assertEquals(expected.get(), outBuf.get());
            }
            outBuf.clear();
        }
    }
    outBuf.flip();
    while (outBuf.remaining() > 0) {
        assertEquals(expected.get(), outBuf.get());
    }
    outBuf.clear();
    assertEquals(0, expected.remaining());
}
Also used : CompressionOutputStream(org.apache.hadoop.io.compress.CompressionOutputStream) CompressorStream(org.apache.hadoop.io.compress.CompressorStream) DataOutputStream(java.io.DataOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) ByteBuffer(java.nio.ByteBuffer)

Example 5 with CompressionOutputStream

use of org.apache.hadoop.io.compress.CompressionOutputStream in project carbondata by apache.

the class CSVInputFormatTest method generateCompressFiles.

/**
 * generate compressed files, no need to call this method.
 * @throws Exception
 */
public void generateCompressFiles() throws Exception {
    String pwd = new File("src/test/resources/csv").getCanonicalPath();
    String inputFile = pwd + "/data.csv";
    FileInputStream input = new FileInputStream(inputFile);
    Configuration conf = new Configuration();
    // .gz
    String outputFile = pwd + "/data.csv.gz";
    FileOutputStream output = new FileOutputStream(outputFile);
    GzipCodec gzip = new GzipCodec();
    gzip.setConf(conf);
    CompressionOutputStream outputStream = gzip.createOutputStream(output);
    int i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();
    // .bz2
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.bz2";
    output = new FileOutputStream(outputFile);
    BZip2Codec bzip2 = new BZip2Codec();
    bzip2.setConf(conf);
    outputStream = bzip2.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();
    // .snappy
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.snappy";
    output = new FileOutputStream(outputFile);
    SnappyCodec snappy = new SnappyCodec();
    snappy.setConf(conf);
    outputStream = snappy.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();
    // .lz4
    input = new FileInputStream(inputFile);
    outputFile = pwd + "/data.csv.lz4";
    output = new FileOutputStream(outputFile);
    Lz4Codec lz4 = new Lz4Codec();
    lz4.setConf(conf);
    outputStream = lz4.createOutputStream(output);
    i = -1;
    while ((i = input.read()) != -1) {
        outputStream.write(i);
    }
    outputStream.close();
    input.close();
}
Also used : Lz4Codec(org.apache.hadoop.io.compress.Lz4Codec) CompressionOutputStream(org.apache.hadoop.io.compress.CompressionOutputStream) Configuration(org.apache.hadoop.conf.Configuration) FileOutputStream(java.io.FileOutputStream) GzipCodec(org.apache.hadoop.io.compress.GzipCodec) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) File(java.io.File) SnappyCodec(org.apache.hadoop.io.compress.SnappyCodec) FileInputStream(java.io.FileInputStream)

Aggregations

CompressionOutputStream (org.apache.hadoop.io.compress.CompressionOutputStream)15 CompressionInputStream (org.apache.hadoop.io.compress.CompressionInputStream)9 ByteArrayOutputStream (java.io.ByteArrayOutputStream)8 Configuration (org.apache.hadoop.conf.Configuration)8 ByteArrayInputStream (java.io.ByteArrayInputStream)7 Test (org.junit.Test)7 BufferedOutputStream (java.io.BufferedOutputStream)5 DataOutputStream (java.io.DataOutputStream)5 IOException (java.io.IOException)4 DataOutputBuffer (org.apache.hadoop.io.DataOutputBuffer)4 Compressor (org.apache.hadoop.io.compress.Compressor)4 BufferedInputStream (java.io.BufferedInputStream)3 DataInputStream (java.io.DataInputStream)3 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)3 Decompressor (org.apache.hadoop.io.compress.Decompressor)3 SnappyCodec (org.apache.hadoop.io.compress.SnappyCodec)3 File (java.io.File)2 FileInputStream (java.io.FileInputStream)2 FileOutputStream (java.io.FileOutputStream)2 BZip2Codec (org.apache.hadoop.io.compress.BZip2Codec)2