Search in sources :

Example 1 with SnappyCodec

use of org.apache.parquet.hadoop.codec.SnappyCodec in project parquet-mr by apache.

the class TestSnappyCodec method TestSnappyStream.

@Test
public void TestSnappyStream() throws IOException {
    SnappyCodec codec = new SnappyCodec();
    codec.setConf(new Configuration());
    int blockSize = 1024;
    int inputSize = blockSize * 1024;
    byte[] input = new byte[inputSize];
    for (int i = 0; i < inputSize; ++i) {
        input[i] = (byte) i;
    }
    ByteArrayOutputStream compressedStream = new ByteArrayOutputStream();
    CompressionOutputStream compressor = codec.createOutputStream(compressedStream);
    int bytesCompressed = 0;
    while (bytesCompressed < inputSize) {
        int len = Math.min(inputSize - bytesCompressed, blockSize);
        compressor.write(input, bytesCompressed, len);
        bytesCompressed += len;
    }
    compressor.finish();
    byte[] rawCompressed = Snappy.compress(input);
    byte[] codecCompressed = compressedStream.toByteArray();
    // Validate that the result from the codec is the same as if we compressed the
    // buffer directly.
    assertArrayEquals(rawCompressed, codecCompressed);
    ByteArrayInputStream inputStream = new ByteArrayInputStream(codecCompressed);
    CompressionInputStream decompressor = codec.createInputStream(inputStream);
    byte[] codecDecompressed = new byte[inputSize];
    int bytesDecompressed = 0;
    int numBytes;
    while ((numBytes = decompressor.read(codecDecompressed, bytesDecompressed, blockSize)) != 0) {
        bytesDecompressed += numBytes;
        if (bytesDecompressed == inputSize)
            break;
    }
    byte[] rawDecompressed = Snappy.uncompress(rawCompressed);
    assertArrayEquals(input, rawDecompressed);
    assertArrayEquals(input, codecDecompressed);
}
Also used : CompressionOutputStream(org.apache.hadoop.io.compress.CompressionOutputStream) Configuration(org.apache.hadoop.conf.Configuration) ByteArrayInputStream(java.io.ByteArrayInputStream) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) SnappyCodec(org.apache.parquet.hadoop.codec.SnappyCodec) Test(org.junit.Test)

Aggregations

ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 Configuration (org.apache.hadoop.conf.Configuration)1 CompressionInputStream (org.apache.hadoop.io.compress.CompressionInputStream)1 CompressionOutputStream (org.apache.hadoop.io.compress.CompressionOutputStream)1 SnappyCodec (org.apache.parquet.hadoop.codec.SnappyCodec)1 Test (org.junit.Test)1