Search in sources :

Example 1 with Decompressor

use of org.apache.hadoop.io.compress.Decompressor in project hadoop by apache.

the class TestZlibCompressorDecompressor method testZlibCompressorDecompressorWithCompressionLevels.

@Test
public void testZlibCompressorDecompressorWithCompressionLevels() {
    Configuration conf = new Configuration();
    conf.set("zlib.compress.level", "FOUR");
    if (ZlibFactory.isNativeZlibLoaded(conf)) {
        byte[] rawData;
        int tryNumber = 5;
        int BYTE_SIZE = 10 * 1024;
        Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
        Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
        rawData = generate(BYTE_SIZE);
        try {
            for (int i = 0; i < tryNumber; i++) compressDecompressZlib(rawData, (ZlibCompressor) zlibCompressor, (ZlibDecompressor) zlibDecompressor);
            zlibCompressor.reinit(conf);
        } catch (Exception ex) {
            fail("testZlibCompressorDecompressorWithConfiguration ex error " + ex);
        }
    } else {
        assertTrue("ZlibFactory is using native libs against request", ZlibFactory.isNativeZlibLoaded(conf));
    }
}
Also used : ZlibDirectDecompressor(org.apache.hadoop.io.compress.zlib.ZlibDecompressor.ZlibDirectDecompressor) Decompressor(org.apache.hadoop.io.compress.Decompressor) Configuration(org.apache.hadoop.conf.Configuration) Compressor(org.apache.hadoop.io.compress.Compressor) IOException(java.io.IOException) Test(org.junit.Test)

Example 2 with Decompressor

use of org.apache.hadoop.io.compress.Decompressor in project hadoop by apache.

the class TestZlibCompressorDecompressor method testBuiltInGzipDecompressorExceptions.

@Test
public void testBuiltInGzipDecompressorExceptions() {
    BuiltInGzipDecompressor decompresser = new BuiltInGzipDecompressor();
    try {
        decompresser.setInput(null, 0, 1);
    } catch (NullPointerException ex) {
    // expected
    } catch (Exception ex) {
        fail("testBuiltInGzipDecompressorExceptions npe error " + ex);
    }
    try {
        decompresser.setInput(new byte[] { 0 }, 0, -1);
    } catch (ArrayIndexOutOfBoundsException ex) {
    // expected
    } catch (Exception ex) {
        fail("testBuiltInGzipDecompressorExceptions aioob error" + ex);
    }
    assertTrue("decompresser.getBytesRead error", decompresser.getBytesRead() == 0);
    assertTrue("decompresser.getRemaining error", decompresser.getRemaining() == 0);
    decompresser.reset();
    decompresser.end();
    InputStream decompStream = null;
    try {
        // invalid 0 and 1 bytes , must be 31, -117
        int buffSize = 1 * 1024;
        byte[] buffer = new byte[buffSize];
        Decompressor decompressor = new BuiltInGzipDecompressor();
        DataInputBuffer gzbuf = new DataInputBuffer();
        decompStream = new DecompressorStream(gzbuf, decompressor);
        gzbuf.reset(new byte[] { 0, 0, 1, 1, 1, 1, 11, 1, 1, 1, 1 }, 11);
        decompStream.read(buffer);
    } catch (IOException ioex) {
    // expected
    } catch (Exception ex) {
        fail("invalid 0 and 1 byte in gzip stream" + ex);
    }
    // invalid 2 byte, must be 8
    try {
        int buffSize = 1 * 1024;
        byte[] buffer = new byte[buffSize];
        Decompressor decompressor = new BuiltInGzipDecompressor();
        DataInputBuffer gzbuf = new DataInputBuffer();
        decompStream = new DecompressorStream(gzbuf, decompressor);
        gzbuf.reset(new byte[] { 31, -117, 7, 1, 1, 1, 1, 11, 1, 1, 1, 1 }, 11);
        decompStream.read(buffer);
    } catch (IOException ioex) {
    // expected
    } catch (Exception ex) {
        fail("invalid 2 byte in gzip stream" + ex);
    }
    try {
        int buffSize = 1 * 1024;
        byte[] buffer = new byte[buffSize];
        Decompressor decompressor = new BuiltInGzipDecompressor();
        DataInputBuffer gzbuf = new DataInputBuffer();
        decompStream = new DecompressorStream(gzbuf, decompressor);
        gzbuf.reset(new byte[] { 31, -117, 8, -32, 1, 1, 1, 11, 1, 1, 1, 1 }, 11);
        decompStream.read(buffer);
    } catch (IOException ioex) {
    // expected
    } catch (Exception ex) {
        fail("invalid 3 byte in gzip stream" + ex);
    }
    try {
        int buffSize = 1 * 1024;
        byte[] buffer = new byte[buffSize];
        Decompressor decompressor = new BuiltInGzipDecompressor();
        DataInputBuffer gzbuf = new DataInputBuffer();
        decompStream = new DecompressorStream(gzbuf, decompressor);
        gzbuf.reset(new byte[] { 31, -117, 8, 4, 1, 1, 1, 11, 1, 1, 1, 1 }, 11);
        decompStream.read(buffer);
    } catch (IOException ioex) {
    // expected
    } catch (Exception ex) {
        fail("invalid 3 byte make hasExtraField" + ex);
    }
}
Also used : DecompressorStream(org.apache.hadoop.io.compress.DecompressorStream) ZlibDirectDecompressor(org.apache.hadoop.io.compress.zlib.ZlibDecompressor.ZlibDirectDecompressor) Decompressor(org.apache.hadoop.io.compress.Decompressor) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) InputStream(java.io.InputStream) IOException(java.io.IOException) IOException(java.io.IOException) Test(org.junit.Test)

Example 3 with Decompressor

use of org.apache.hadoop.io.compress.Decompressor in project hadoop by apache.

the class TestZStandardCompressorDecompressor method testReadingWithAStream.

@Test
public void testReadingWithAStream() throws Exception {
    FileInputStream inputStream = FileUtils.openInputStream(compressedFile);
    ZStandardCodec codec = new ZStandardCodec();
    codec.setConf(CONFIGURATION);
    Decompressor decompressor = codec.createDecompressor();
    CompressionInputStream cis = codec.createInputStream(inputStream, decompressor);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    byte[] resultOfDecompression;
    try {
        byte[] buffer = new byte[100];
        int n;
        while ((n = cis.read(buffer, 0, buffer.length)) != -1) {
            baos.write(buffer, 0, n);
        }
        resultOfDecompression = baos.toByteArray();
    } finally {
        IOUtils.closeQuietly(baos);
        IOUtils.closeQuietly(cis);
    }
    byte[] expected = FileUtils.readFileToByteArray(uncompressedFile);
    assertEquals(bytesToHex(expected), bytesToHex(resultOfDecompression));
}
Also used : Decompressor(org.apache.hadoop.io.compress.Decompressor) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) ZStandardCodec(org.apache.hadoop.io.compress.ZStandardCodec) ByteArrayOutputStream(java.io.ByteArrayOutputStream) FileInputStream(java.io.FileInputStream) Test(org.junit.Test)

Example 4 with Decompressor

use of org.apache.hadoop.io.compress.Decompressor in project hadoop by apache.

the class TestLineRecordReader method testMultipleClose.

@Test
public void testMultipleClose() throws IOException {
    URL testFileUrl = getClass().getClassLoader().getResource("recordSpanningMultipleSplits.txt.bz2");
    assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl);
    File testFile = new File(testFileUrl.getFile());
    Path testFilePath = new Path(testFile.getAbsolutePath());
    long testFileSize = testFile.length();
    Configuration conf = new Configuration();
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    // read the data and check whether BOM is skipped
    FileSplit split = new FileSplit(testFilePath, 0, testFileSize, null);
    LineRecordReader reader = new LineRecordReader();
    reader.initialize(split, context);
    //noinspection StatementWithEmptyBody
    while (reader.nextKeyValue()) ;
    reader.close();
    reader.close();
    BZip2Codec codec = new BZip2Codec();
    codec.setConf(conf);
    Set<Decompressor> decompressors = new HashSet<Decompressor>();
    for (int i = 0; i < 10; ++i) {
        decompressors.add(CodecPool.getDecompressor(codec));
    }
    assertEquals(10, decompressors.size());
}
Also used : Path(org.apache.hadoop.fs.Path) Decompressor(org.apache.hadoop.io.compress.Decompressor) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) URL(java.net.URL) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) File(java.io.File) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 5 with Decompressor

use of org.apache.hadoop.io.compress.Decompressor in project hadoop by apache.

the class TestLineRecordReader method testMultipleClose.

@Test
public void testMultipleClose() throws IOException {
    URL testFileUrl = getClass().getClassLoader().getResource("recordSpanningMultipleSplits.txt.bz2");
    assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2", testFileUrl);
    File testFile = new File(testFileUrl.getFile());
    Path testFilePath = new Path(testFile.getAbsolutePath());
    long testFileSize = testFile.length();
    Configuration conf = new Configuration();
    conf.setInt(org.apache.hadoop.mapreduce.lib.input.LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
    FileSplit split = new FileSplit(testFilePath, 0, testFileSize, (String[]) null);
    LineRecordReader reader = new LineRecordReader(conf, split);
    LongWritable key = new LongWritable();
    Text value = new Text();
    //noinspection StatementWithEmptyBody
    while (reader.next(key, value)) ;
    reader.close();
    reader.close();
    BZip2Codec codec = new BZip2Codec();
    codec.setConf(conf);
    Set<Decompressor> decompressors = new HashSet<Decompressor>();
    for (int i = 0; i < 10; ++i) {
        decompressors.add(CodecPool.getDecompressor(codec));
    }
    assertEquals(10, decompressors.size());
}
Also used : Path(org.apache.hadoop.fs.Path) Decompressor(org.apache.hadoop.io.compress.Decompressor) Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) BZip2Codec(org.apache.hadoop.io.compress.BZip2Codec) URL(java.net.URL) LongWritable(org.apache.hadoop.io.LongWritable) File(java.io.File) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

Decompressor (org.apache.hadoop.io.compress.Decompressor)14 Test (org.junit.Test)9 Configuration (org.apache.hadoop.conf.Configuration)7 CompressionInputStream (org.apache.hadoop.io.compress.CompressionInputStream)6 Compressor (org.apache.hadoop.io.compress.Compressor)5 ZlibDirectDecompressor (org.apache.hadoop.io.compress.zlib.ZlibDecompressor.ZlibDirectDecompressor)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 IOException (java.io.IOException)3 InputStream (java.io.InputStream)3 ZStandardCodec (org.apache.hadoop.io.compress.ZStandardCodec)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 File (java.io.File)2 URL (java.net.URL)2 HashSet (java.util.HashSet)2 Path (org.apache.hadoop.fs.Path)2 BZip2Codec (org.apache.hadoop.io.compress.BZip2Codec)2 CompressionOutputStream (org.apache.hadoop.io.compress.CompressionOutputStream)2 BufferedInputStream (java.io.BufferedInputStream)1 FileInputStream (java.io.FileInputStream)1 Configurable (org.apache.hadoop.conf.Configurable)1