Search in sources :

Example 6 with CompressionInputStream

use of org.apache.hadoop.io.compress.CompressionInputStream in project hadoop by apache.

the class TestSnappyCompressorDecompressor method testSnappyCompressorDecopressorLogicWithCompressionStreams.

@Test
public void testSnappyCompressorDecopressorLogicWithCompressionStreams() {
    int BYTE_SIZE = 1024 * 100;
    byte[] bytes = BytesGenerator.get(BYTE_SIZE);
    int bufferSize = 262144;
    int compressionOverhead = (bufferSize / 6) + 32;
    DataOutputStream deflateOut = null;
    DataInputStream inflateIn = null;
    try {
        DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
        CompressionOutputStream deflateFilter = new BlockCompressorStream(compressedDataBuffer, new SnappyCompressor(bufferSize), bufferSize, compressionOverhead);
        deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter));
        deflateOut.write(bytes, 0, bytes.length);
        deflateOut.flush();
        deflateFilter.finish();
        DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
        deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, compressedDataBuffer.getLength());
        CompressionInputStream inflateFilter = new BlockDecompressorStream(deCompressedDataBuffer, new SnappyDecompressor(bufferSize), bufferSize);
        inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter));
        byte[] result = new byte[BYTE_SIZE];
        inflateIn.read(result);
        Assert.assertArrayEquals("original array not equals compress/decompressed array", result, bytes);
    } catch (IOException e) {
        fail("testSnappyCompressorDecopressorLogicWithCompressionStreams ex error !!!");
    } finally {
        try {
            if (deflateOut != null)
                deflateOut.close();
            if (inflateIn != null)
                inflateIn.close();
        } catch (Exception e) {
        }
    }
}
Also used : CompressionOutputStream(org.apache.hadoop.io.compress.CompressionOutputStream) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) DataOutputStream(java.io.DataOutputStream) BlockDecompressorStream(org.apache.hadoop.io.compress.BlockDecompressorStream) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) IOException(java.io.IOException) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) BlockCompressorStream(org.apache.hadoop.io.compress.BlockCompressorStream) BufferedInputStream(java.io.BufferedInputStream) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) BufferedOutputStream(java.io.BufferedOutputStream) Test(org.junit.Test)

Example 7 with CompressionInputStream

use of org.apache.hadoop.io.compress.CompressionInputStream in project hadoop by apache.

the class TestZStandardCompressorDecompressor method testReadingWithAStream.

@Test
public void testReadingWithAStream() throws Exception {
    FileInputStream inputStream = FileUtils.openInputStream(compressedFile);
    ZStandardCodec codec = new ZStandardCodec();
    codec.setConf(CONFIGURATION);
    Decompressor decompressor = codec.createDecompressor();
    CompressionInputStream cis = codec.createInputStream(inputStream, decompressor);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    byte[] resultOfDecompression;
    try {
        byte[] buffer = new byte[100];
        int n;
        while ((n = cis.read(buffer, 0, buffer.length)) != -1) {
            baos.write(buffer, 0, n);
        }
        resultOfDecompression = baos.toByteArray();
    } finally {
        IOUtils.closeQuietly(baos);
        IOUtils.closeQuietly(cis);
    }
    byte[] expected = FileUtils.readFileToByteArray(uncompressedFile);
    assertEquals(bytesToHex(expected), bytesToHex(resultOfDecompression));
}
Also used : Decompressor(org.apache.hadoop.io.compress.Decompressor) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) ZStandardCodec(org.apache.hadoop.io.compress.ZStandardCodec) ByteArrayOutputStream(java.io.ByteArrayOutputStream) FileInputStream(java.io.FileInputStream) Test(org.junit.Test)

Example 8 with CompressionInputStream

use of org.apache.hadoop.io.compress.CompressionInputStream in project hadoop by apache.

the class TestZStandardCompressorDecompressor method testDecompressingOutput.

@Test
public void testDecompressingOutput() throws Exception {
    byte[] expectedDecompressedResult = FileUtils.readFileToByteArray(uncompressedFile);
    ZStandardCodec codec = new ZStandardCodec();
    codec.setConf(CONFIGURATION);
    CompressionInputStream inputStream = codec.createInputStream(FileUtils.openInputStream(compressedFile), codec.createDecompressor());
    byte[] toDecompress = new byte[100];
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    byte[] decompressedResult;
    int totalFileSize = 0;
    int result = toDecompress.length;
    try {
        while ((result = inputStream.read(toDecompress, 0, result)) != -1) {
            baos.write(toDecompress, 0, result);
            totalFileSize += result;
        }
        decompressedResult = baos.toByteArray();
    } finally {
        IOUtils.closeQuietly(baos);
    }
    assertEquals(decompressedResult.length, totalFileSize);
    assertEquals(bytesToHex(expectedDecompressedResult), bytesToHex(decompressedResult));
}
Also used : CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) ZStandardCodec(org.apache.hadoop.io.compress.ZStandardCodec) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Test(org.junit.Test)

Example 9 with CompressionInputStream

use of org.apache.hadoop.io.compress.CompressionInputStream in project apex-malhar by apache.

the class AbstractFileOutputOperatorTest method checkSnappyFile.

private void checkSnappyFile(File file, List<Long> offsets, int startVal, int totalWindows, int totalRecords) throws IOException {
    FileInputStream fis;
    InputStream gss = null;
    Configuration conf = new Configuration();
    CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(SnappyCodec.class, conf);
    CompressionInputStream snappyIs = null;
    BufferedReader br = null;
    int numWindows = 0;
    try {
        fis = new FileInputStream(file);
        gss = fis;
        long startOffset = 0;
        for (long offset : offsets) {
            // Skip initial case in case file is not yet created
            if (offset == 0) {
                continue;
            }
            long limit = offset - startOffset;
            LimitInputStream lis = new LimitInputStream(gss, limit);
            snappyIs = codec.createInputStream(lis);
            br = new BufferedReader(new InputStreamReader(snappyIs));
            String eline = "" + (startVal + numWindows * 2);
            int count = 0;
            String line;
            while ((line = br.readLine()) != null) {
                Assert.assertEquals("File line", eline, line);
                ++count;
                if ((count % totalRecords) == 0) {
                    ++numWindows;
                    eline = "" + (startVal + numWindows * 2);
                }
            }
            startOffset = offset;
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (br != null) {
            br.close();
        } else {
            if (snappyIs != null) {
                snappyIs.close();
            } else if (gss != null) {
                gss.close();
            }
        }
    }
    Assert.assertEquals("Total", totalWindows, numWindows);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) LimitInputStream(com.google.common.io.LimitInputStream) CipherInputStream(javax.crypto.CipherInputStream) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) LimitInputStream(com.google.common.io.LimitInputStream) FileInputStream(java.io.FileInputStream) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) IOException(java.io.IOException) ConstraintViolationException(javax.validation.ConstraintViolationException) BufferedReader(java.io.BufferedReader) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) SnappyCodec(org.apache.hadoop.io.compress.SnappyCodec)

Example 10 with CompressionInputStream

use of org.apache.hadoop.io.compress.CompressionInputStream in project parquet-mr by apache.

the class TestSnappyCodec method TestSnappyStream.

@Test
public void TestSnappyStream() throws IOException {
    SnappyCodec codec = new SnappyCodec();
    codec.setConf(new Configuration());
    int blockSize = 1024;
    int inputSize = blockSize * 1024;
    byte[] input = new byte[inputSize];
    for (int i = 0; i < inputSize; ++i) {
        input[i] = (byte) i;
    }
    ByteArrayOutputStream compressedStream = new ByteArrayOutputStream();
    CompressionOutputStream compressor = codec.createOutputStream(compressedStream);
    int bytesCompressed = 0;
    while (bytesCompressed < inputSize) {
        int len = Math.min(inputSize - bytesCompressed, blockSize);
        compressor.write(input, bytesCompressed, len);
        bytesCompressed += len;
    }
    compressor.finish();
    byte[] rawCompressed = Snappy.compress(input);
    byte[] codecCompressed = compressedStream.toByteArray();
    // Validate that the result from the codec is the same as if we compressed the
    // buffer directly.
    assertArrayEquals(rawCompressed, codecCompressed);
    ByteArrayInputStream inputStream = new ByteArrayInputStream(codecCompressed);
    CompressionInputStream decompressor = codec.createInputStream(inputStream);
    byte[] codecDecompressed = new byte[inputSize];
    int bytesDecompressed = 0;
    int numBytes;
    while ((numBytes = decompressor.read(codecDecompressed, bytesDecompressed, blockSize)) != 0) {
        bytesDecompressed += numBytes;
        if (bytesDecompressed == inputSize)
            break;
    }
    byte[] rawDecompressed = Snappy.uncompress(rawCompressed);
    assertArrayEquals(input, rawDecompressed);
    assertArrayEquals(input, codecDecompressed);
}
Also used : CompressionOutputStream(org.apache.hadoop.io.compress.CompressionOutputStream) Configuration(org.apache.hadoop.conf.Configuration) ByteArrayInputStream(java.io.ByteArrayInputStream) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) SnappyCodec(org.apache.parquet.hadoop.codec.SnappyCodec) Test(org.junit.Test)

Aggregations

CompressionInputStream (org.apache.hadoop.io.compress.CompressionInputStream)20 Test (org.junit.Test)13 CompressionOutputStream (org.apache.hadoop.io.compress.CompressionOutputStream)9 ByteArrayOutputStream (java.io.ByteArrayOutputStream)8 ByteArrayInputStream (java.io.ByteArrayInputStream)7 Configuration (org.apache.hadoop.conf.Configuration)7 Decompressor (org.apache.hadoop.io.compress.Decompressor)7 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)6 FileInputStream (java.io.FileInputStream)5 IOException (java.io.IOException)5 BufferedOutputStream (java.io.BufferedOutputStream)4 ZStandardCodec (org.apache.hadoop.io.compress.ZStandardCodec)4 BufferedInputStream (java.io.BufferedInputStream)3 DataInputStream (java.io.DataInputStream)3 DataOutputStream (java.io.DataOutputStream)3 InputStream (java.io.InputStream)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)3 DataOutputBuffer (org.apache.hadoop.io.DataOutputBuffer)3 Compressor (org.apache.hadoop.io.compress.Compressor)3