Search in sources :

Example 6 with CompressionOutputStream

use of org.apache.hadoop.io.compress.CompressionOutputStream in project parquet-mr by apache.

the class TestSnappyCodec method TestSnappyStream.

@Test
public void TestSnappyStream() throws IOException {
    SnappyCodec codec = new SnappyCodec();
    codec.setConf(new Configuration());
    int blockSize = 1024;
    int inputSize = blockSize * 1024;
    byte[] input = new byte[inputSize];
    for (int i = 0; i < inputSize; ++i) {
        input[i] = (byte) i;
    }
    ByteArrayOutputStream compressedStream = new ByteArrayOutputStream();
    CompressionOutputStream compressor = codec.createOutputStream(compressedStream);
    int bytesCompressed = 0;
    while (bytesCompressed < inputSize) {
        int len = Math.min(inputSize - bytesCompressed, blockSize);
        compressor.write(input, bytesCompressed, len);
        bytesCompressed += len;
    }
    compressor.finish();
    byte[] rawCompressed = Snappy.compress(input);
    byte[] codecCompressed = compressedStream.toByteArray();
    // Validate that the result from the codec is the same as if we compressed the
    // buffer directly.
    assertArrayEquals(rawCompressed, codecCompressed);
    ByteArrayInputStream inputStream = new ByteArrayInputStream(codecCompressed);
    CompressionInputStream decompressor = codec.createInputStream(inputStream);
    byte[] codecDecompressed = new byte[inputSize];
    int bytesDecompressed = 0;
    int numBytes;
    while ((numBytes = decompressor.read(codecDecompressed, bytesDecompressed, blockSize)) != 0) {
        bytesDecompressed += numBytes;
        if (bytesDecompressed == inputSize)
            break;
    }
    byte[] rawDecompressed = Snappy.uncompress(rawCompressed);
    assertArrayEquals(input, rawDecompressed);
    assertArrayEquals(input, codecDecompressed);
}
Also used : CompressionOutputStream(org.apache.hadoop.io.compress.CompressionOutputStream) Configuration(org.apache.hadoop.conf.Configuration) ByteArrayInputStream(java.io.ByteArrayInputStream) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) SnappyCodec(org.apache.parquet.hadoop.codec.SnappyCodec) Test(org.junit.Test)

Example 7 with CompressionOutputStream

use of org.apache.hadoop.io.compress.CompressionOutputStream in project hbase by apache.

the class CompressionTestBase method codecTest.

protected void codecTest(final CompressionCodec codec, final byte[][] input, final Integer expectedCompressedSize) throws Exception {
    // We do this in Compression.java
    ((Configurable) codec).getConf().setInt("io.file.buffer.size", 32 * 1024);
    // Compress
    long start = EnvironmentEdgeManager.currentTime();
    Compressor compressor = codec.createCompressor();
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    CompressionOutputStream out = codec.createOutputStream(baos, compressor);
    int inLen = 0;
    for (int i = 0; i < input.length; i++) {
        out.write(input[i]);
        inLen += input[i].length;
    }
    out.close();
    long end = EnvironmentEdgeManager.currentTime();
    final byte[] compressed = baos.toByteArray();
    LOG.info("{} compressed {} bytes to {} bytes in {} ms", codec.getClass().getSimpleName(), inLen, compressed.length, end - start);
    if (expectedCompressedSize != null) {
        assertTrue("Expected compressed size does not match: (expected=" + expectedCompressedSize + ", actual=" + compressed.length + ")", expectedCompressedSize == compressed.length);
    }
    // Decompress
    final byte[] plain = new byte[inLen];
    Decompressor decompressor = codec.createDecompressor();
    CompressionInputStream in = codec.createInputStream(new ByteArrayInputStream(compressed), decompressor);
    start = EnvironmentEdgeManager.currentTime();
    IOUtils.readFully(in, plain, 0, plain.length);
    in.close();
    end = EnvironmentEdgeManager.currentTime();
    LOG.info("{} decompressed {} bytes to {} bytes in {} ms", codec.getClass().getSimpleName(), compressed.length, plain.length, end - start);
    // Decompressed bytes should equal the original
    int offset = 0;
    for (int i = 0; i < input.length; i++) {
        assertTrue("Comparison failed at offset " + offset, Bytes.compareTo(plain, offset, input[i].length, input[i], 0, input[i].length) == 0);
        offset += input[i].length;
    }
}
Also used : CompressionOutputStream(org.apache.hadoop.io.compress.CompressionOutputStream) Decompressor(org.apache.hadoop.io.compress.Decompressor) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) Compressor(org.apache.hadoop.io.compress.Compressor) ByteArrayOutputStream(java.io.ByteArrayOutputStream)

Example 8 with CompressionOutputStream

use of org.apache.hadoop.io.compress.CompressionOutputStream in project hbase by apache.

the class TestCompressionTest method nativeCodecTest.

/**
 * Verify CompressionTest.testCompression() on a native codec.
 */
private void nativeCodecTest(String codecName, String libName, String codecClassName) {
    if (isCompressionAvailable(codecClassName)) {
        try {
            if (libName != null) {
                System.loadLibrary(libName);
            }
            try {
                Configuration conf = new Configuration();
                CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(conf.getClassByName(codecClassName), conf);
                DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
                CompressionOutputStream deflateFilter = codec.createOutputStream(compressedDataBuffer);
                byte[] data = new byte[1024];
                DataOutputStream deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter));
                deflateOut.write(data, 0, data.length);
                deflateOut.flush();
                deflateFilter.finish();
                // Codec class, codec nativelib and Hadoop nativelib with codec JNIs are present
                assertTrue(CompressionTest.testCompression(codecName));
            } catch (UnsatisfiedLinkError e) {
                // Hadoop nativelib does not have codec JNIs.
                // cannot assert the codec here because the current logic of
                // CompressionTest checks only classloading, not the codec
                // usage.
                LOG.debug("No JNI for codec '" + codecName + "' " + e.getMessage());
            } catch (Exception e) {
                LOG.error(codecName, e);
            }
        } catch (UnsatisfiedLinkError e) {
            // nativelib is not available
            LOG.debug("Native lib not available: " + codecName);
            assertFalse(CompressionTest.testCompression(codecName));
        }
    } else {
        // Compression Codec class is not available
        LOG.debug("Codec class not available: " + codecName);
        assertFalse(CompressionTest.testCompression(codecName));
    }
}
Also used : CompressionOutputStream(org.apache.hadoop.io.compress.CompressionOutputStream) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) BufferedOutputStream(java.io.BufferedOutputStream) IOException(java.io.IOException)

Example 9 with CompressionOutputStream

use of org.apache.hadoop.io.compress.CompressionOutputStream in project hadoop by apache.

the class TestZStandardCompressorDecompressor method testCompressorDecompressorLogicWithCompressionStreams.

// test compress/decompress process through
// CompressionOutputStream/CompressionInputStream api
@Test
public void testCompressorDecompressorLogicWithCompressionStreams() throws Exception {
    DataOutputStream deflateOut = null;
    DataInputStream inflateIn = null;
    int byteSize = 1024 * 100;
    byte[] bytes = generate(byteSize);
    int bufferSize = IO_FILE_BUFFER_SIZE_DEFAULT;
    try {
        DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
        CompressionOutputStream deflateFilter = new CompressorStream(compressedDataBuffer, new ZStandardCompressor(), bufferSize);
        deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter));
        deflateOut.write(bytes, 0, bytes.length);
        deflateOut.flush();
        deflateFilter.finish();
        DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
        deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, compressedDataBuffer.getLength());
        CompressionInputStream inflateFilter = new DecompressorStream(deCompressedDataBuffer, new ZStandardDecompressor(bufferSize), bufferSize);
        inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter));
        byte[] result = new byte[byteSize];
        inflateIn.read(result);
        assertArrayEquals("original array not equals compress/decompressed array", result, bytes);
    } finally {
        IOUtils.closeQuietly(deflateOut);
        IOUtils.closeQuietly(inflateIn);
    }
}
Also used : DecompressorStream(org.apache.hadoop.io.compress.DecompressorStream) CompressionOutputStream(org.apache.hadoop.io.compress.CompressionOutputStream) CompressorStream(org.apache.hadoop.io.compress.CompressorStream) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) DataOutputStream(java.io.DataOutputStream) DataInputStream(java.io.DataInputStream) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) BufferedInputStream(java.io.BufferedInputStream) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) BufferedOutputStream(java.io.BufferedOutputStream) Test(org.junit.Test)

Example 10 with CompressionOutputStream

use of org.apache.hadoop.io.compress.CompressionOutputStream in project hadoop by apache.

the class TestZStandardCompressorDecompressor method testCompressionCompressesCorrectly.

@Test
public void testCompressionCompressesCorrectly() throws Exception {
    int uncompressedSize = (int) FileUtils.sizeOf(uncompressedFile);
    byte[] bytes = FileUtils.readFileToByteArray(uncompressedFile);
    assertEquals(uncompressedSize, bytes.length);
    Configuration conf = new Configuration();
    ZStandardCodec codec = new ZStandardCodec();
    codec.setConf(conf);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    Compressor compressor = codec.createCompressor();
    CompressionOutputStream outputStream = codec.createOutputStream(baos, compressor);
    for (byte aByte : bytes) {
        outputStream.write(aByte);
    }
    outputStream.finish();
    outputStream.close();
    assertEquals(uncompressedSize, compressor.getBytesRead());
    assertTrue(compressor.finished());
    // just make sure we can decompress the file
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
    Decompressor decompressor = codec.createDecompressor();
    CompressionInputStream inputStream = codec.createInputStream(bais, decompressor);
    byte[] buffer = new byte[100];
    int n = buffer.length;
    while ((n = inputStream.read(buffer, 0, n)) != -1) {
        byteArrayOutputStream.write(buffer, 0, n);
    }
    assertArrayEquals(bytes, byteArrayOutputStream.toByteArray());
}
Also used : CompressionOutputStream(org.apache.hadoop.io.compress.CompressionOutputStream) Decompressor(org.apache.hadoop.io.compress.Decompressor) Configuration(org.apache.hadoop.conf.Configuration) ByteArrayInputStream(java.io.ByteArrayInputStream) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) Compressor(org.apache.hadoop.io.compress.Compressor) ZStandardCodec(org.apache.hadoop.io.compress.ZStandardCodec) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Test(org.junit.Test)

Aggregations

CompressionOutputStream (org.apache.hadoop.io.compress.CompressionOutputStream)15 CompressionInputStream (org.apache.hadoop.io.compress.CompressionInputStream)9 ByteArrayOutputStream (java.io.ByteArrayOutputStream)8 Configuration (org.apache.hadoop.conf.Configuration)8 ByteArrayInputStream (java.io.ByteArrayInputStream)7 Test (org.junit.Test)7 BufferedOutputStream (java.io.BufferedOutputStream)5 DataOutputStream (java.io.DataOutputStream)5 IOException (java.io.IOException)4 DataOutputBuffer (org.apache.hadoop.io.DataOutputBuffer)4 Compressor (org.apache.hadoop.io.compress.Compressor)4 BufferedInputStream (java.io.BufferedInputStream)3 DataInputStream (java.io.DataInputStream)3 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)3 Decompressor (org.apache.hadoop.io.compress.Decompressor)3 SnappyCodec (org.apache.hadoop.io.compress.SnappyCodec)3 File (java.io.File)2 FileInputStream (java.io.FileInputStream)2 FileOutputStream (java.io.FileOutputStream)2 BZip2Codec (org.apache.hadoop.io.compress.BZip2Codec)2