Search in sources :

Example 66 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project hadoop by apache.

the class TestCodec method testGzipCompatibility.

@Test
public void testGzipCompatibility() throws IOException {
    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);
    LOG.info("seed: " + seed);
    DataOutputBuffer dflbuf = new DataOutputBuffer();
    GZIPOutputStream gzout = new GZIPOutputStream(dflbuf);
    byte[] b = new byte[r.nextInt(128 * 1024 + 1)];
    r.nextBytes(b);
    gzout.write(b);
    gzout.close();
    DataInputBuffer gzbuf = new DataInputBuffer();
    gzbuf.reset(dflbuf.getData(), dflbuf.getLength());
    Configuration conf = new Configuration();
    // don't use native libs
    ZlibFactory.setNativeZlibLoaded(false);
    CompressionCodec codec = ReflectionUtils.newInstance(GzipCodec.class, conf);
    Decompressor decom = codec.createDecompressor();
    assertNotNull(decom);
    assertEquals(BuiltInGzipDecompressor.class, decom.getClass());
    InputStream gzin = codec.createInputStream(gzbuf, decom);
    dflbuf.reset();
    IOUtils.copyBytes(gzin, dflbuf, 4096);
    final byte[] dflchk = Arrays.copyOf(dflbuf.getData(), dflbuf.getLength());
    assertArrayEquals(b, dflchk);
}
Also used : DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) BuiltInGzipDecompressor(org.apache.hadoop.io.compress.zlib.BuiltInGzipDecompressor) Random(java.util.Random) Configuration(org.apache.hadoop.conf.Configuration) GZIPOutputStream(java.util.zip.GZIPOutputStream) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) Test(org.junit.Test)

Example 67 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project hadoop by apache.

the class TestCodec method codecTest.

private static void codecTest(Configuration conf, int seed, int count, String codecClass) throws IOException {
    // Create the codec
    CompressionCodec codec = null;
    try {
        codec = (CompressionCodec) ReflectionUtils.newInstance(conf.getClassByName(codecClass), conf);
    } catch (ClassNotFoundException cnfe) {
        throw new IOException("Illegal codec!");
    }
    LOG.info("Created a Codec object of type: " + codecClass);
    // Generate data
    DataOutputBuffer data = new DataOutputBuffer();
    RandomDatum.Generator generator = new RandomDatum.Generator(seed);
    for (int i = 0; i < count; ++i) {
        generator.next();
        RandomDatum key = generator.getKey();
        RandomDatum value = generator.getValue();
        key.write(data);
        value.write(data);
    }
    LOG.info("Generated " + count + " records");
    // Compress data
    DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
    CompressionOutputStream deflateFilter = codec.createOutputStream(compressedDataBuffer);
    DataOutputStream deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter));
    deflateOut.write(data.getData(), 0, data.getLength());
    deflateOut.flush();
    deflateFilter.finish();
    LOG.info("Finished compressing data");
    // De-compress data
    DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
    deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, compressedDataBuffer.getLength());
    CompressionInputStream inflateFilter = codec.createInputStream(deCompressedDataBuffer);
    DataInputStream inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter));
    // Check
    DataInputBuffer originalData = new DataInputBuffer();
    originalData.reset(data.getData(), 0, data.getLength());
    DataInputStream originalIn = new DataInputStream(new BufferedInputStream(originalData));
    for (int i = 0; i < count; ++i) {
        RandomDatum k1 = new RandomDatum();
        RandomDatum v1 = new RandomDatum();
        k1.readFields(originalIn);
        v1.readFields(originalIn);
        RandomDatum k2 = new RandomDatum();
        RandomDatum v2 = new RandomDatum();
        k2.readFields(inflateIn);
        v2.readFields(inflateIn);
        assertTrue("original and compressed-then-decompressed-output not equal", k1.equals(k2) && v1.equals(v2));
        // original and compressed-then-decompressed-output have the same hashCode
        Map<RandomDatum, String> m = new HashMap<RandomDatum, String>();
        m.put(k1, k1.toString());
        m.put(v1, v1.toString());
        String result = m.get(k2);
        assertEquals("k1 and k2 hashcode not equal", result, k1.toString());
        result = m.get(v2);
        assertEquals("v1 and v2 hashcode not equal", result, v1.toString());
    }
    // De-compress data byte-at-a-time
    originalData.reset(data.getData(), 0, data.getLength());
    deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, compressedDataBuffer.getLength());
    inflateFilter = codec.createInputStream(deCompressedDataBuffer);
    // Check
    originalIn = new DataInputStream(new BufferedInputStream(originalData));
    int expected;
    do {
        expected = originalIn.read();
        assertEquals("Inflated stream read by byte does not match", expected, inflateFilter.read());
    } while (expected != -1);
    LOG.info("SUCCESS! Completed checking " + count + " records");
}
Also used : HashMap(java.util.HashMap) DataOutputStream(java.io.DataOutputStream) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) BufferedInputStream(java.io.BufferedInputStream) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) BufferedOutputStream(java.io.BufferedOutputStream) RandomDatum(org.apache.hadoop.io.RandomDatum)

Example 68 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project hadoop by apache.

the class TestCodec method GzipConcatTest.

void GzipConcatTest(Configuration conf, Class<? extends Decompressor> decomClass) throws IOException {
    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);
    LOG.info(decomClass + " seed: " + seed);
    final int CONCAT = r.nextInt(4) + 3;
    final int BUFLEN = 128 * 1024;
    DataOutputBuffer dflbuf = new DataOutputBuffer();
    DataOutputBuffer chkbuf = new DataOutputBuffer();
    byte[] b = new byte[BUFLEN];
    for (int i = 0; i < CONCAT; ++i) {
        GZIPOutputStream gzout = new GZIPOutputStream(dflbuf);
        r.nextBytes(b);
        int len = r.nextInt(BUFLEN);
        int off = r.nextInt(BUFLEN - len);
        chkbuf.write(b, off, len);
        gzout.write(b, off, len);
        gzout.close();
    }
    final byte[] chk = Arrays.copyOf(chkbuf.getData(), chkbuf.getLength());
    CompressionCodec codec = ReflectionUtils.newInstance(GzipCodec.class, conf);
    Decompressor decom = codec.createDecompressor();
    assertNotNull(decom);
    assertEquals(decomClass, decom.getClass());
    DataInputBuffer gzbuf = new DataInputBuffer();
    gzbuf.reset(dflbuf.getData(), dflbuf.getLength());
    InputStream gzin = codec.createInputStream(gzbuf, decom);
    dflbuf.reset();
    IOUtils.copyBytes(gzin, dflbuf, 4096);
    final byte[] dflchk = Arrays.copyOf(dflbuf.getData(), dflbuf.getLength());
    assertArrayEquals(chk, dflchk);
}
Also used : BuiltInGzipDecompressor(org.apache.hadoop.io.compress.zlib.BuiltInGzipDecompressor) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) Random(java.util.Random) GZIPOutputStream(java.util.zip.GZIPOutputStream) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer)

Example 69 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project hadoop by apache.

the class TestCompressionStreamReuse method resetStateTest.

private void resetStateTest(Configuration conf, int seed, int count, String codecClass) throws IOException {
    // Create the codec
    CompressionCodec codec = null;
    try {
        codec = (CompressionCodec) ReflectionUtils.newInstance(conf.getClassByName(codecClass), conf);
    } catch (ClassNotFoundException cnfe) {
        throw new IOException("Illegal codec!");
    }
    LOG.info("Created a Codec object of type: " + codecClass);
    // Generate data
    DataOutputBuffer data = new DataOutputBuffer();
    RandomDatum.Generator generator = new RandomDatum.Generator(seed);
    for (int i = 0; i < count; ++i) {
        generator.next();
        RandomDatum key = generator.getKey();
        RandomDatum value = generator.getValue();
        key.write(data);
        value.write(data);
    }
    LOG.info("Generated " + count + " records");
    // Compress data
    DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
    DataOutputStream deflateOut = new DataOutputStream(new BufferedOutputStream(compressedDataBuffer));
    CompressionOutputStream deflateFilter = codec.createOutputStream(deflateOut);
    deflateFilter.write(data.getData(), 0, data.getLength());
    deflateFilter.finish();
    deflateFilter.flush();
    LOG.info("Finished compressing data");
    // reset deflator
    deflateFilter.resetState();
    LOG.info("Finished reseting deflator");
    // re-generate data
    data.reset();
    generator = new RandomDatum.Generator(seed);
    for (int i = 0; i < count; ++i) {
        generator.next();
        RandomDatum key = generator.getKey();
        RandomDatum value = generator.getValue();
        key.write(data);
        value.write(data);
    }
    DataInputBuffer originalData = new DataInputBuffer();
    DataInputStream originalIn = new DataInputStream(new BufferedInputStream(originalData));
    originalData.reset(data.getData(), 0, data.getLength());
    // re-compress data
    compressedDataBuffer.reset();
    deflateOut = new DataOutputStream(new BufferedOutputStream(compressedDataBuffer));
    deflateFilter = codec.createOutputStream(deflateOut);
    deflateFilter.write(data.getData(), 0, data.getLength());
    deflateFilter.finish();
    deflateFilter.flush();
    LOG.info("Finished re-compressing data");
    // De-compress data
    DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
    deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, compressedDataBuffer.getLength());
    CompressionInputStream inflateFilter = codec.createInputStream(deCompressedDataBuffer);
    DataInputStream inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter));
    // Check
    for (int i = 0; i < count; ++i) {
        RandomDatum k1 = new RandomDatum();
        RandomDatum v1 = new RandomDatum();
        k1.readFields(originalIn);
        v1.readFields(originalIn);
        RandomDatum k2 = new RandomDatum();
        RandomDatum v2 = new RandomDatum();
        k2.readFields(inflateIn);
        v2.readFields(inflateIn);
        assertTrue("original and compressed-then-decompressed-output not equal", k1.equals(k2) && v1.equals(v2));
    }
    LOG.info("SUCCESS! Completed checking " + count + " records");
}
Also used : DataOutputStream(java.io.DataOutputStream) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) BufferedInputStream(java.io.BufferedInputStream) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) BufferedOutputStream(java.io.BufferedOutputStream) RandomDatum(org.apache.hadoop.io.RandomDatum)

Example 70 with DataInputBuffer

use of org.apache.hadoop.io.DataInputBuffer in project hadoop by apache.

the class TestZStandardCompressorDecompressor method testCompressorDecompressorLogicWithCompressionStreams.

// test compress/decompress process through
// CompressionOutputStream/CompressionInputStream api
@Test
public void testCompressorDecompressorLogicWithCompressionStreams() throws Exception {
    DataOutputStream deflateOut = null;
    DataInputStream inflateIn = null;
    int byteSize = 1024 * 100;
    byte[] bytes = generate(byteSize);
    int bufferSize = IO_FILE_BUFFER_SIZE_DEFAULT;
    try {
        DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
        CompressionOutputStream deflateFilter = new CompressorStream(compressedDataBuffer, new ZStandardCompressor(), bufferSize);
        deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter));
        deflateOut.write(bytes, 0, bytes.length);
        deflateOut.flush();
        deflateFilter.finish();
        DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
        deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, compressedDataBuffer.getLength());
        CompressionInputStream inflateFilter = new DecompressorStream(deCompressedDataBuffer, new ZStandardDecompressor(bufferSize), bufferSize);
        inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter));
        byte[] result = new byte[byteSize];
        inflateIn.read(result);
        assertArrayEquals("original array not equals compress/decompressed array", result, bytes);
    } finally {
        IOUtils.closeQuietly(deflateOut);
        IOUtils.closeQuietly(inflateIn);
    }
}
Also used : DecompressorStream(org.apache.hadoop.io.compress.DecompressorStream) CompressionOutputStream(org.apache.hadoop.io.compress.CompressionOutputStream) CompressorStream(org.apache.hadoop.io.compress.CompressorStream) CompressionInputStream(org.apache.hadoop.io.compress.CompressionInputStream) DataOutputStream(java.io.DataOutputStream) DataInputStream(java.io.DataInputStream) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) BufferedInputStream(java.io.BufferedInputStream) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) BufferedOutputStream(java.io.BufferedOutputStream) Test(org.junit.Test)

Aggregations

DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)112 Test (org.junit.Test)49 DataOutputBuffer (org.apache.hadoop.io.DataOutputBuffer)45 IOException (java.io.IOException)24 Text (org.apache.hadoop.io.Text)20 Path (org.apache.hadoop.fs.Path)16 Configuration (org.apache.hadoop.conf.Configuration)13 IntWritable (org.apache.hadoop.io.IntWritable)11 Random (java.util.Random)10 DataInputStream (java.io.DataInputStream)9 BufferedInputStream (java.io.BufferedInputStream)8 HashMap (java.util.HashMap)8 DataOutputStream (java.io.DataOutputStream)6 LongWritable (org.apache.hadoop.io.LongWritable)6 SerializationFactory (org.apache.hadoop.io.serializer.SerializationFactory)6 IFile (org.apache.tez.runtime.library.common.sort.impl.IFile)6 BufferedOutputStream (java.io.BufferedOutputStream)5 BytesWritable (org.apache.hadoop.io.BytesWritable)5 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)4 Credentials (org.apache.hadoop.security.Credentials)4