Search in sources :

Example 6 with RandomDatum

use of org.apache.hadoop.io.RandomDatum in project hadoop by apache.

the class TestCodec method codecTest.

private static void codecTest(Configuration conf, int seed, int count, String codecClass) throws IOException {
    // Create the codec
    CompressionCodec codec = null;
    try {
        codec = (CompressionCodec) ReflectionUtils.newInstance(conf.getClassByName(codecClass), conf);
    } catch (ClassNotFoundException cnfe) {
        throw new IOException("Illegal codec!");
    }
    LOG.info("Created a Codec object of type: " + codecClass);
    // Generate data
    DataOutputBuffer data = new DataOutputBuffer();
    RandomDatum.Generator generator = new RandomDatum.Generator(seed);
    for (int i = 0; i < count; ++i) {
        generator.next();
        RandomDatum key = generator.getKey();
        RandomDatum value = generator.getValue();
        key.write(data);
        value.write(data);
    }
    LOG.info("Generated " + count + " records");
    // Compress data
    DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
    CompressionOutputStream deflateFilter = codec.createOutputStream(compressedDataBuffer);
    DataOutputStream deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter));
    deflateOut.write(data.getData(), 0, data.getLength());
    deflateOut.flush();
    deflateFilter.finish();
    LOG.info("Finished compressing data");
    // De-compress data
    DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
    deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, compressedDataBuffer.getLength());
    CompressionInputStream inflateFilter = codec.createInputStream(deCompressedDataBuffer);
    DataInputStream inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter));
    // Check
    DataInputBuffer originalData = new DataInputBuffer();
    originalData.reset(data.getData(), 0, data.getLength());
    DataInputStream originalIn = new DataInputStream(new BufferedInputStream(originalData));
    for (int i = 0; i < count; ++i) {
        RandomDatum k1 = new RandomDatum();
        RandomDatum v1 = new RandomDatum();
        k1.readFields(originalIn);
        v1.readFields(originalIn);
        RandomDatum k2 = new RandomDatum();
        RandomDatum v2 = new RandomDatum();
        k2.readFields(inflateIn);
        v2.readFields(inflateIn);
        assertTrue("original and compressed-then-decompressed-output not equal", k1.equals(k2) && v1.equals(v2));
        // original and compressed-then-decompressed-output have the same hashCode
        Map<RandomDatum, String> m = new HashMap<RandomDatum, String>();
        m.put(k1, k1.toString());
        m.put(v1, v1.toString());
        String result = m.get(k2);
        assertEquals("k1 and k2 hashcode not equal", result, k1.toString());
        result = m.get(v2);
        assertEquals("v1 and v2 hashcode not equal", result, v1.toString());
    }
    // De-compress data byte-at-a-time
    originalData.reset(data.getData(), 0, data.getLength());
    deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, compressedDataBuffer.getLength());
    inflateFilter = codec.createInputStream(deCompressedDataBuffer);
    // Check
    originalIn = new DataInputStream(new BufferedInputStream(originalData));
    int expected;
    do {
        expected = originalIn.read();
        assertEquals("Inflated stream read by byte does not match", expected, inflateFilter.read());
    } while (expected != -1);
    LOG.info("SUCCESS! Completed checking " + count + " records");
}
Also used : HashMap(java.util.HashMap) DataOutputStream(java.io.DataOutputStream) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) BufferedInputStream(java.io.BufferedInputStream) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) BufferedOutputStream(java.io.BufferedOutputStream) RandomDatum(org.apache.hadoop.io.RandomDatum)

Example 7 with RandomDatum

use of org.apache.hadoop.io.RandomDatum in project hadoop by apache.

the class TestCompressionStreamReuse method resetStateTest.

private void resetStateTest(Configuration conf, int seed, int count, String codecClass) throws IOException {
    // Create the codec
    CompressionCodec codec = null;
    try {
        codec = (CompressionCodec) ReflectionUtils.newInstance(conf.getClassByName(codecClass), conf);
    } catch (ClassNotFoundException cnfe) {
        throw new IOException("Illegal codec!");
    }
    LOG.info("Created a Codec object of type: " + codecClass);
    // Generate data
    DataOutputBuffer data = new DataOutputBuffer();
    RandomDatum.Generator generator = new RandomDatum.Generator(seed);
    for (int i = 0; i < count; ++i) {
        generator.next();
        RandomDatum key = generator.getKey();
        RandomDatum value = generator.getValue();
        key.write(data);
        value.write(data);
    }
    LOG.info("Generated " + count + " records");
    // Compress data
    DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
    DataOutputStream deflateOut = new DataOutputStream(new BufferedOutputStream(compressedDataBuffer));
    CompressionOutputStream deflateFilter = codec.createOutputStream(deflateOut);
    deflateFilter.write(data.getData(), 0, data.getLength());
    deflateFilter.finish();
    deflateFilter.flush();
    LOG.info("Finished compressing data");
    // reset deflator
    deflateFilter.resetState();
    LOG.info("Finished reseting deflator");
    // re-generate data
    data.reset();
    generator = new RandomDatum.Generator(seed);
    for (int i = 0; i < count; ++i) {
        generator.next();
        RandomDatum key = generator.getKey();
        RandomDatum value = generator.getValue();
        key.write(data);
        value.write(data);
    }
    DataInputBuffer originalData = new DataInputBuffer();
    DataInputStream originalIn = new DataInputStream(new BufferedInputStream(originalData));
    originalData.reset(data.getData(), 0, data.getLength());
    // re-compress data
    compressedDataBuffer.reset();
    deflateOut = new DataOutputStream(new BufferedOutputStream(compressedDataBuffer));
    deflateFilter = codec.createOutputStream(deflateOut);
    deflateFilter.write(data.getData(), 0, data.getLength());
    deflateFilter.finish();
    deflateFilter.flush();
    LOG.info("Finished re-compressing data");
    // De-compress data
    DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
    deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, compressedDataBuffer.getLength());
    CompressionInputStream inflateFilter = codec.createInputStream(deCompressedDataBuffer);
    DataInputStream inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter));
    // Check
    for (int i = 0; i < count; ++i) {
        RandomDatum k1 = new RandomDatum();
        RandomDatum v1 = new RandomDatum();
        k1.readFields(originalIn);
        v1.readFields(originalIn);
        RandomDatum k2 = new RandomDatum();
        RandomDatum v2 = new RandomDatum();
        k2.readFields(inflateIn);
        v2.readFields(inflateIn);
        assertTrue("original and compressed-then-decompressed-output not equal", k1.equals(k2) && v1.equals(v2));
    }
    LOG.info("SUCCESS! Completed checking " + count + " records");
}
Also used : DataOutputStream(java.io.DataOutputStream) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) BufferedInputStream(java.io.BufferedInputStream) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) BufferedOutputStream(java.io.BufferedOutputStream) RandomDatum(org.apache.hadoop.io.RandomDatum)

Example 8 with RandomDatum

use of org.apache.hadoop.io.RandomDatum in project SSM by Intel-bigdata.

the class TestArrayFile method main.

/**
 * For debugging and testing.
 */
public static void main(String[] args) throws Exception {
    int count = 1024 * 1024;
    boolean create = true;
    boolean check = true;
    String file = TEST_FILE;
    String usage = "Usage: TestArrayFile [-count N] [-nocreate] [-nocheck] file";
    if (args.length == 0) {
        System.err.println(usage);
        System.exit(-1);
    }
    Configuration conf = new Configuration();
    int i = 0;
    Path fpath = null;
    FileSystem fs = null;
    try {
        for (; i < args.length; i++) {
            // parse command line
            if (args[i] == null) {
                continue;
            } else if (args[i].equals("-count")) {
                count = Integer.parseInt(args[++i]);
            } else if (args[i].equals("-nocreate")) {
                create = false;
            } else if (args[i].equals("-nocheck")) {
                check = false;
            } else {
                // file is required parameter
                file = args[i];
                fpath = new Path(file);
            }
        }
        fs = fpath.getFileSystem(conf);
        LOG.info("count = " + count);
        LOG.info("create = " + create);
        LOG.info("check = " + check);
        LOG.info("file = " + file);
        RandomDatum[] data = generate(count);
        if (create) {
            writeTest(fs, data, file);
        }
        if (check) {
            readTest(fs, data, file, conf);
        }
    } finally {
        fs.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) RandomDatum(org.apache.hadoop.io.RandomDatum)

Aggregations

RandomDatum (org.apache.hadoop.io.RandomDatum)8 DataOutputBuffer (org.apache.hadoop.io.DataOutputBuffer)4 BufferedInputStream (java.io.BufferedInputStream)3 DataInputStream (java.io.DataInputStream)3 IOException (java.io.IOException)3 Configuration (org.apache.hadoop.conf.Configuration)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)3 BufferedOutputStream (java.io.BufferedOutputStream)2 DataOutputStream (java.io.DataOutputStream)2 HashMap (java.util.HashMap)2 Test (org.junit.Test)2 Random (java.util.Random)1 Path (org.apache.hadoop.fs.Path)1 Before (org.junit.Before)1