Search in sources :

Example 51 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestMRSequenceFileAsBinaryOutputFormat method testBinary.

@Test
public void testBinary() throws IOException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);
    FileOutputFormat.setOutputPath(job, outdir);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);
    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
    BytesWritable bkey = new BytesWritable();
    BytesWritable bval = new BytesWritable();
    TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    OutputFormat<BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    committer.setupJob(job);
    RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);
    IntWritable iwritable = new IntWritable();
    DoubleWritable dwritable = new DoubleWritable();
    DataOutputBuffer outbuf = new DataOutputBuffer();
    LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
    try {
        for (int i = 0; i < RECORDS; ++i) {
            iwritable = new IntWritable(r.nextInt());
            iwritable.write(outbuf);
            bkey.set(outbuf.getData(), 0, outbuf.getLength());
            outbuf.reset();
            dwritable = new DoubleWritable(r.nextDouble());
            dwritable.write(outbuf);
            bval.set(outbuf.getData(), 0, outbuf.getLength());
            outbuf.reset();
            writer.write(bkey, bval);
        }
    } finally {
        writer.close(context);
    }
    committer.commitTask(context);
    committer.commitJob(job);
    InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>();
    int count = 0;
    r.setSeed(seed);
    SequenceFileInputFormat.setInputPaths(job, outdir);
    LOG.info("Reading data by SequenceFileInputFormat");
    for (InputSplit split : iformat.getSplits(job)) {
        RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
        MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        try {
            int sourceInt;
            double sourceDouble;
            while (reader.nextKeyValue()) {
                sourceInt = r.nextInt();
                sourceDouble = r.nextDouble();
                iwritable = reader.getCurrentKey();
                dwritable = reader.getCurrentValue();
                assertEquals("Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get());
                assertTrue("Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0);
                ++count;
            }
        } finally {
            reader.close();
        }
    }
    assertEquals("Some records not found", RECORDS, count);
}
Also used : Path(org.apache.hadoop.fs.Path) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) BytesWritable(org.apache.hadoop.io.BytesWritable) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) DoubleWritable(org.apache.hadoop.io.DoubleWritable) Random(java.util.Random) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 52 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestJoinTupleWritable method testNestedIterable.

@Test
public void testNestedIterable() throws Exception {
    Random r = new Random();
    Writable[] writs = { new BooleanWritable(r.nextBoolean()), new FloatWritable(r.nextFloat()), new FloatWritable(r.nextFloat()), new IntWritable(r.nextInt()), new LongWritable(r.nextLong()), new BytesWritable("dingo".getBytes()), new LongWritable(r.nextLong()), new IntWritable(r.nextInt()), new BytesWritable("yak".getBytes()), new IntWritable(r.nextInt()) };
    TupleWritable sTuple = makeTuple(writs);
    assertTrue("Bad count", writs.length == verifIter(writs, sTuple, 0));
}
Also used : FloatWritable(org.apache.hadoop.io.FloatWritable) Random(java.util.Random) BooleanWritable(org.apache.hadoop.io.BooleanWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) IntWritable(org.apache.hadoop.io.IntWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 53 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestJoinTupleWritable method testWritable.

@Test
public void testWritable() throws Exception {
    Random r = new Random();
    Writable[] writs = { new BooleanWritable(r.nextBoolean()), new FloatWritable(r.nextFloat()), new FloatWritable(r.nextFloat()), new IntWritable(r.nextInt()), new LongWritable(r.nextLong()), new BytesWritable("dingo".getBytes()), new LongWritable(r.nextLong()), new IntWritable(r.nextInt()), new BytesWritable("yak".getBytes()), new IntWritable(r.nextInt()) };
    TupleWritable sTuple = makeTuple(writs);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    sTuple.write(new DataOutputStream(out));
    ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
    TupleWritable dTuple = new TupleWritable();
    dTuple.readFields(new DataInputStream(in));
    assertTrue("Failed to write/read tuple", sTuple.equals(dTuple));
}
Also used : DataOutputStream(java.io.DataOutputStream) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) IntWritable(org.apache.hadoop.io.IntWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataInputStream(java.io.DataInputStream) FloatWritable(org.apache.hadoop.io.FloatWritable) Random(java.util.Random) BooleanWritable(org.apache.hadoop.io.BooleanWritable) ByteArrayInputStream(java.io.ByteArrayInputStream) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 54 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestValueIterReset method test1.

/**
   * Test the case where we do a mark outside of a reset. Test for both file
   * and memory caches
   * @param key
   * @param values
   * @return
   * @throws IOException
   */
private static int test1(IntWritable key, MarkableIterator<IntWritable> values) throws IOException {
    IntWritable i;
    int errors = 0;
    int count = 0;
    ArrayList<IntWritable> expectedValues = new ArrayList<IntWritable>();
    ArrayList<IntWritable> expectedValues1 = new ArrayList<IntWritable>();
    LOG.info("Executing TEST:1 for Key:" + key);
    values.mark();
    LOG.info("TEST:1. Marking");
    while (values.hasNext()) {
        i = values.next();
        LOG.info(key + ":" + i);
        expectedValues.add(i);
        if (count == 2) {
            break;
        }
        count++;
    }
    values.reset();
    LOG.info("TEST:1. Reset");
    count = 0;
    while (values.hasNext()) {
        i = values.next();
        LOG.info(key + ":" + i);
        if (count < expectedValues.size()) {
            if (i != expectedValues.get(count)) {
                errors++;
                LOG.info("TEST:1. Check:1 Expected: " + expectedValues.get(count) + ", Got: " + i);
                return errors;
            }
        }
        // We have moved passed the first mark, but still in the memory cache
        if (count == 3) {
            values.mark();
            LOG.info("TEST:1. Marking -- " + key + ": " + i);
        }
        if (count >= 3) {
            expectedValues1.add(i);
        }
        if (count == 5) {
            break;
        }
        count++;
    }
    if (count < expectedValues.size()) {
        LOG.info(("TEST:1 Check:2. Iterator returned lesser values"));
        errors++;
        return errors;
    }
    values.reset();
    count = 0;
    LOG.info("TEST:1. Reset");
    expectedValues.clear();
    while (values.hasNext()) {
        i = values.next();
        LOG.info(key + ":" + i);
        if (count < expectedValues1.size()) {
            if (i != expectedValues1.get(count)) {
                errors++;
                LOG.info("TEST:1. Check:3 Expected: " + expectedValues1.get(count) + ", Got: " + i);
                return errors;
            }
        }
        // cache
        if (count == 25) {
            values.mark();
            LOG.info("TEST:1. Marking -- " + key + ":" + i);
        }
        if (count >= 25) {
            expectedValues.add(i);
        }
        count++;
    }
    if (count < expectedValues1.size()) {
        LOG.info(("TEST:1 Check:4. Iterator returned fewer values"));
        errors++;
        return errors;
    }
    values.reset();
    LOG.info("TEST:1. Reset");
    count = 0;
    while (values.hasNext()) {
        i = values.next();
        LOG.info(key + ":" + i);
        if (i != expectedValues.get(count)) {
            errors++;
            LOG.info("TEST:1. Check:5 Expected: " + expectedValues.get(count) + ", Got: " + i);
            return errors;
        }
    }
    LOG.info("TEST:1 Done");
    return errors;
}
Also used : ArrayList(java.util.ArrayList) IntWritable(org.apache.hadoop.io.IntWritable)

Example 55 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestJoinDatamerge method writeSimpleSrc.

private static Path[] writeSimpleSrc(Path testdir, Configuration conf, int srcs) throws IOException {
    SequenceFile.Writer[] out = null;
    Path[] src = new Path[srcs];
    try {
        out = createWriters(testdir, conf, srcs, src);
        final int capacity = srcs * 2 + 1;
        IntWritable key = new IntWritable();
        IntWritable val = new IntWritable();
        for (int k = 0; k < capacity; ++k) {
            for (int i = 0; i < srcs; ++i) {
                key.set(k % srcs == 0 ? k * srcs : k * srcs + i);
                val.set(10 * k + i);
                out[i].append(key, val);
                if (i == k) {
                    // add duplicate key
                    out[i].append(key, val);
                }
            }
        }
    } finally {
        if (out != null) {
            for (int i = 0; i < srcs; ++i) {
                if (out[i] != null)
                    out[i].close();
            }
        }
    }
    return src;
}
Also used : Path(org.apache.hadoop.fs.Path) IntWritable(org.apache.hadoop.io.IntWritable)

Aggregations

IntWritable (org.apache.hadoop.io.IntWritable)338 Test (org.junit.Test)120 Text (org.apache.hadoop.io.Text)115 LongWritable (org.apache.hadoop.io.LongWritable)79 Path (org.apache.hadoop.fs.Path)66 FloatWritable (org.apache.hadoop.io.FloatWritable)58 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)56 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)56 BooleanWritable (org.apache.hadoop.io.BooleanWritable)51 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)50 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)47 BytesWritable (org.apache.hadoop.io.BytesWritable)45 SequenceFile (org.apache.hadoop.io.SequenceFile)41 ArrayList (java.util.ArrayList)40 Writable (org.apache.hadoop.io.Writable)39 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)37 Configuration (org.apache.hadoop.conf.Configuration)35 IOException (java.io.IOException)30 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)29 Random (java.util.Random)28