Search in sources :

Example 11 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.

the class TestMRSequenceFileAsBinaryOutputFormat method testBinary.

@Test
public void testBinary() throws IOException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);
    FileOutputFormat.setOutputPath(job, outdir);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);
    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
    BytesWritable bkey = new BytesWritable();
    BytesWritable bval = new BytesWritable();
    TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    OutputFormat<BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    committer.setupJob(job);
    RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);
    IntWritable iwritable = new IntWritable();
    DoubleWritable dwritable = new DoubleWritable();
    DataOutputBuffer outbuf = new DataOutputBuffer();
    LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
    try {
        for (int i = 0; i < RECORDS; ++i) {
            iwritable = new IntWritable(r.nextInt());
            iwritable.write(outbuf);
            bkey.set(outbuf.getData(), 0, outbuf.getLength());
            outbuf.reset();
            dwritable = new DoubleWritable(r.nextDouble());
            dwritable.write(outbuf);
            bval.set(outbuf.getData(), 0, outbuf.getLength());
            outbuf.reset();
            writer.write(bkey, bval);
        }
    } finally {
        writer.close(context);
    }
    committer.commitTask(context);
    committer.commitJob(job);
    InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>();
    int count = 0;
    r.setSeed(seed);
    SequenceFileInputFormat.setInputPaths(job, outdir);
    LOG.info("Reading data by SequenceFileInputFormat");
    for (InputSplit split : iformat.getSplits(job)) {
        RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
        MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        try {
            int sourceInt;
            double sourceDouble;
            while (reader.nextKeyValue()) {
                sourceInt = r.nextInt();
                sourceDouble = r.nextDouble();
                iwritable = reader.getCurrentKey();
                dwritable = reader.getCurrentValue();
                assertEquals("Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get());
                assertTrue("Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0);
                ++count;
            }
        } finally {
            reader.close();
        }
    }
    assertEquals("Some records not found", RECORDS, count);
}
Also used : Path(org.apache.hadoop.fs.Path) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) BytesWritable(org.apache.hadoop.io.BytesWritable) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) DoubleWritable(org.apache.hadoop.io.DoubleWritable) Random(java.util.Random) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 12 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.

the class TestBinaryPartitioner method testDefaultOffsets.

@Test
public void testDefaultOffsets() {
    Configuration conf = new Configuration();
    BinaryPartitioner<?> partitioner = ReflectionUtils.newInstance(BinaryPartitioner.class, conf);
    BinaryComparable key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
    BinaryComparable key2 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
    int partition1 = partitioner.getPartition(key1, null, 10);
    int partition2 = partitioner.getPartition(key2, null, 10);
    assertEquals(partition1, partition2);
    key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
    key2 = new BytesWritable(new byte[] { 6, 2, 3, 4, 5 });
    partition1 = partitioner.getPartition(key1, null, 10);
    partition2 = partitioner.getPartition(key2, null, 10);
    assertTrue(partition1 != partition2);
    key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
    key2 = new BytesWritable(new byte[] { 1, 2, 3, 4, 6 });
    partition1 = partitioner.getPartition(key1, null, 10);
    partition2 = partitioner.getPartition(key2, null, 10);
    assertTrue(partition1 != partition2);
}
Also used : BinaryComparable(org.apache.hadoop.io.BinaryComparable) Configuration(org.apache.hadoop.conf.Configuration) BytesWritable(org.apache.hadoop.io.BytesWritable) Test(org.junit.Test)

Example 13 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.

the class TestBinaryPartitioner method testUpperBound.

@Test
public void testUpperBound() {
    Configuration conf = new Configuration();
    BinaryPartitioner.setRightOffset(conf, 4);
    BinaryPartitioner<?> partitioner = ReflectionUtils.newInstance(BinaryPartitioner.class, conf);
    BinaryComparable key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
    BinaryComparable key2 = new BytesWritable(new byte[] { 1, 2, 3, 4, 6 });
    int partition1 = partitioner.getPartition(key1, null, 10);
    int partition2 = partitioner.getPartition(key2, null, 10);
    assertTrue(partition1 != partition2);
}
Also used : BinaryComparable(org.apache.hadoop.io.BinaryComparable) Configuration(org.apache.hadoop.conf.Configuration) BytesWritable(org.apache.hadoop.io.BytesWritable) Test(org.junit.Test)

Example 14 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.

the class TestJoinTupleWritable method testNestedIterable.

@Test
public void testNestedIterable() throws Exception {
    Random r = new Random();
    Writable[] writs = { new BooleanWritable(r.nextBoolean()), new FloatWritable(r.nextFloat()), new FloatWritable(r.nextFloat()), new IntWritable(r.nextInt()), new LongWritable(r.nextLong()), new BytesWritable("dingo".getBytes()), new LongWritable(r.nextLong()), new IntWritable(r.nextInt()), new BytesWritable("yak".getBytes()), new IntWritable(r.nextInt()) };
    TupleWritable sTuple = makeTuple(writs);
    assertTrue("Bad count", writs.length == verifIter(writs, sTuple, 0));
}
Also used : FloatWritable(org.apache.hadoop.io.FloatWritable) Random(java.util.Random) BooleanWritable(org.apache.hadoop.io.BooleanWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) IntWritable(org.apache.hadoop.io.IntWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 15 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.

the class TestJoinTupleWritable method testWritable.

@Test
public void testWritable() throws Exception {
    Random r = new Random();
    Writable[] writs = { new BooleanWritable(r.nextBoolean()), new FloatWritable(r.nextFloat()), new FloatWritable(r.nextFloat()), new IntWritable(r.nextInt()), new LongWritable(r.nextLong()), new BytesWritable("dingo".getBytes()), new LongWritable(r.nextLong()), new IntWritable(r.nextInt()), new BytesWritable("yak".getBytes()), new IntWritable(r.nextInt()) };
    TupleWritable sTuple = makeTuple(writs);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    sTuple.write(new DataOutputStream(out));
    ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
    TupleWritable dTuple = new TupleWritable();
    dTuple.readFields(new DataInputStream(in));
    assertTrue("Failed to write/read tuple", sTuple.equals(dTuple));
}
Also used : DataOutputStream(java.io.DataOutputStream) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) IntWritable(org.apache.hadoop.io.IntWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataInputStream(java.io.DataInputStream) FloatWritable(org.apache.hadoop.io.FloatWritable) Random(java.util.Random) BooleanWritable(org.apache.hadoop.io.BooleanWritable) ByteArrayInputStream(java.io.ByteArrayInputStream) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Aggregations

BytesWritable (org.apache.hadoop.io.BytesWritable)339 Test (org.junit.Test)92 Text (org.apache.hadoop.io.Text)81 LongWritable (org.apache.hadoop.io.LongWritable)66 IntWritable (org.apache.hadoop.io.IntWritable)54 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)51 ArrayList (java.util.ArrayList)48 List (java.util.List)48 Path (org.apache.hadoop.fs.Path)47 IOException (java.io.IOException)42 Configuration (org.apache.hadoop.conf.Configuration)41 FloatWritable (org.apache.hadoop.io.FloatWritable)37 Writable (org.apache.hadoop.io.Writable)36 BooleanWritable (org.apache.hadoop.io.BooleanWritable)35 FileSystem (org.apache.hadoop.fs.FileSystem)28 SequenceFile (org.apache.hadoop.io.SequenceFile)27 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)26 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)26 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)25 Random (java.util.Random)24