Search in sources :

Example 16 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestMRMultipleOutputs method _testMultipleOutputs.

protected void _testMultipleOutputs(boolean withCounters) throws Exception {
    String input = "a\nb\nc\nd\ne\nc\nd\ne";
    Configuration conf = createJobConf();
    Job job = MapReduceTestUtil.createJob(conf, IN_DIR, OUT_DIR, 2, 1, input);
    job.setJobName("mo");
    MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class, LongWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, SEQUENCE, SequenceFileOutputFormat.class, IntWritable.class, Text.class);
    MultipleOutputs.setCountersEnabled(job, withCounters);
    job.setMapperClass(MOMap.class);
    job.setReducerClass(MOReduce.class);
    job.waitForCompletion(true);
    // assert number of named output part files
    int namedOutputCount = 0;
    int valueBasedOutputCount = 0;
    FileSystem fs = OUT_DIR.getFileSystem(conf);
    FileStatus[] statuses = fs.listStatus(OUT_DIR);
    for (FileStatus status : statuses) {
        String fileName = status.getPath().getName();
        if (fileName.equals("text-m-00000") || fileName.equals("text-m-00001") || fileName.equals("text-r-00000") || fileName.equals("sequence_A-m-00000") || fileName.equals("sequence_A-m-00001") || fileName.equals("sequence_B-m-00000") || fileName.equals("sequence_B-m-00001") || fileName.equals("sequence_B-r-00000") || fileName.equals("sequence_C-r-00000")) {
            namedOutputCount++;
        } else if (fileName.equals("a-r-00000") || fileName.equals("b-r-00000") || fileName.equals("c-r-00000") || fileName.equals("d-r-00000") || fileName.equals("e-r-00000")) {
            valueBasedOutputCount++;
        }
    }
    assertEquals(9, namedOutputCount);
    assertEquals(5, valueBasedOutputCount);
    // assert TextOutputFormat files correctness
    BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(job), "text-r-00000"))));
    int count = 0;
    String line = reader.readLine();
    while (line != null) {
        assertTrue(line.endsWith(TEXT));
        line = reader.readLine();
        count++;
    }
    reader.close();
    assertFalse(count == 0);
    // assert SequenceOutputFormat files correctness
    SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(job), "sequence_B-r-00000"), conf);
    assertEquals(IntWritable.class, seqReader.getKeyClass());
    assertEquals(Text.class, seqReader.getValueClass());
    count = 0;
    IntWritable key = new IntWritable();
    Text value = new Text();
    while (seqReader.next(key, value)) {
        assertEquals(SEQUENCE, value.toString());
        count++;
    }
    seqReader.close();
    assertFalse(count == 0);
    if (withCounters) {
        CounterGroup counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
        assertEquals(9, counters.size());
        assertEquals(4, counters.findCounter(TEXT).getValue());
        assertEquals(2, counters.findCounter(SEQUENCE + "_A").getValue());
        assertEquals(4, counters.findCounter(SEQUENCE + "_B").getValue());
        assertEquals(2, counters.findCounter(SEQUENCE + "_C").getValue());
        assertEquals(2, counters.findCounter("a").getValue());
        assertEquals(2, counters.findCounter("b").getValue());
        assertEquals(4, counters.findCounter("c").getValue());
        assertEquals(4, counters.findCounter("d").getValue());
        assertEquals(4, counters.findCounter("e").getValue());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) CounterGroup(org.apache.hadoop.mapreduce.CounterGroup) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) Text(org.apache.hadoop.io.Text) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) BufferedReader(java.io.BufferedReader) Job(org.apache.hadoop.mapreduce.Job) IntWritable(org.apache.hadoop.io.IntWritable)

Example 17 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestMRSequenceFileAsBinaryOutputFormat method testBinary.

@Test
public void testBinary() throws IOException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);
    FileOutputFormat.setOutputPath(job, outdir);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);
    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
    BytesWritable bkey = new BytesWritable();
    BytesWritable bval = new BytesWritable();
    TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    OutputFormat<BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    committer.setupJob(job);
    RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);
    IntWritable iwritable = new IntWritable();
    DoubleWritable dwritable = new DoubleWritable();
    DataOutputBuffer outbuf = new DataOutputBuffer();
    LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
    try {
        for (int i = 0; i < RECORDS; ++i) {
            iwritable = new IntWritable(r.nextInt());
            iwritable.write(outbuf);
            bkey.set(outbuf.getData(), 0, outbuf.getLength());
            outbuf.reset();
            dwritable = new DoubleWritable(r.nextDouble());
            dwritable.write(outbuf);
            bval.set(outbuf.getData(), 0, outbuf.getLength());
            outbuf.reset();
            writer.write(bkey, bval);
        }
    } finally {
        writer.close(context);
    }
    committer.commitTask(context);
    committer.commitJob(job);
    InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>();
    int count = 0;
    r.setSeed(seed);
    SequenceFileInputFormat.setInputPaths(job, outdir);
    LOG.info("Reading data by SequenceFileInputFormat");
    for (InputSplit split : iformat.getSplits(job)) {
        RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
        MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        try {
            int sourceInt;
            double sourceDouble;
            while (reader.nextKeyValue()) {
                sourceInt = r.nextInt();
                sourceDouble = r.nextDouble();
                iwritable = reader.getCurrentKey();
                dwritable = reader.getCurrentValue();
                assertEquals("Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get());
                assertTrue("Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0);
                ++count;
            }
        } finally {
            reader.close();
        }
    }
    assertEquals("Some records not found", RECORDS, count);
}
Also used : Path(org.apache.hadoop.fs.Path) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) BytesWritable(org.apache.hadoop.io.BytesWritable) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) DoubleWritable(org.apache.hadoop.io.DoubleWritable) Random(java.util.Random) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 18 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestJoinTupleWritable method testNestedIterable.

@Test
public void testNestedIterable() throws Exception {
    Random r = new Random();
    Writable[] writs = { new BooleanWritable(r.nextBoolean()), new FloatWritable(r.nextFloat()), new FloatWritable(r.nextFloat()), new IntWritable(r.nextInt()), new LongWritable(r.nextLong()), new BytesWritable("dingo".getBytes()), new LongWritable(r.nextLong()), new IntWritable(r.nextInt()), new BytesWritable("yak".getBytes()), new IntWritable(r.nextInt()) };
    TupleWritable sTuple = makeTuple(writs);
    assertTrue("Bad count", writs.length == verifIter(writs, sTuple, 0));
}
Also used : FloatWritable(org.apache.hadoop.io.FloatWritable) Random(java.util.Random) BooleanWritable(org.apache.hadoop.io.BooleanWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) IntWritable(org.apache.hadoop.io.IntWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 19 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestJoinTupleWritable method testWritable.

@Test
public void testWritable() throws Exception {
    Random r = new Random();
    Writable[] writs = { new BooleanWritable(r.nextBoolean()), new FloatWritable(r.nextFloat()), new FloatWritable(r.nextFloat()), new IntWritable(r.nextInt()), new LongWritable(r.nextLong()), new BytesWritable("dingo".getBytes()), new LongWritable(r.nextLong()), new IntWritable(r.nextInt()), new BytesWritable("yak".getBytes()), new IntWritable(r.nextInt()) };
    TupleWritable sTuple = makeTuple(writs);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    sTuple.write(new DataOutputStream(out));
    ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
    TupleWritable dTuple = new TupleWritable();
    dTuple.readFields(new DataInputStream(in));
    assertTrue("Failed to write/read tuple", sTuple.equals(dTuple));
}
Also used : DataOutputStream(java.io.DataOutputStream) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) IntWritable(org.apache.hadoop.io.IntWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataInputStream(java.io.DataInputStream) FloatWritable(org.apache.hadoop.io.FloatWritable) Random(java.util.Random) BooleanWritable(org.apache.hadoop.io.BooleanWritable) ByteArrayInputStream(java.io.ByteArrayInputStream) LongWritable(org.apache.hadoop.io.LongWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 20 with IntWritable

use of org.apache.hadoop.io.IntWritable in project hadoop by apache.

the class TestValueIterReset method test1.

/**
   * Test the case where we do a mark outside of a reset. Test for both file
   * and memory caches
   * @param key
   * @param values
   * @return
   * @throws IOException
   */
private static int test1(IntWritable key, MarkableIterator<IntWritable> values) throws IOException {
    IntWritable i;
    int errors = 0;
    int count = 0;
    ArrayList<IntWritable> expectedValues = new ArrayList<IntWritable>();
    ArrayList<IntWritable> expectedValues1 = new ArrayList<IntWritable>();
    LOG.info("Executing TEST:1 for Key:" + key);
    values.mark();
    LOG.info("TEST:1. Marking");
    while (values.hasNext()) {
        i = values.next();
        LOG.info(key + ":" + i);
        expectedValues.add(i);
        if (count == 2) {
            break;
        }
        count++;
    }
    values.reset();
    LOG.info("TEST:1. Reset");
    count = 0;
    while (values.hasNext()) {
        i = values.next();
        LOG.info(key + ":" + i);
        if (count < expectedValues.size()) {
            if (i != expectedValues.get(count)) {
                errors++;
                LOG.info("TEST:1. Check:1 Expected: " + expectedValues.get(count) + ", Got: " + i);
                return errors;
            }
        }
        // We have moved passed the first mark, but still in the memory cache
        if (count == 3) {
            values.mark();
            LOG.info("TEST:1. Marking -- " + key + ": " + i);
        }
        if (count >= 3) {
            expectedValues1.add(i);
        }
        if (count == 5) {
            break;
        }
        count++;
    }
    if (count < expectedValues.size()) {
        LOG.info(("TEST:1 Check:2. Iterator returned lesser values"));
        errors++;
        return errors;
    }
    values.reset();
    count = 0;
    LOG.info("TEST:1. Reset");
    expectedValues.clear();
    while (values.hasNext()) {
        i = values.next();
        LOG.info(key + ":" + i);
        if (count < expectedValues1.size()) {
            if (i != expectedValues1.get(count)) {
                errors++;
                LOG.info("TEST:1. Check:3 Expected: " + expectedValues1.get(count) + ", Got: " + i);
                return errors;
            }
        }
        // cache
        if (count == 25) {
            values.mark();
            LOG.info("TEST:1. Marking -- " + key + ":" + i);
        }
        if (count >= 25) {
            expectedValues.add(i);
        }
        count++;
    }
    if (count < expectedValues1.size()) {
        LOG.info(("TEST:1 Check:4. Iterator returned fewer values"));
        errors++;
        return errors;
    }
    values.reset();
    LOG.info("TEST:1. Reset");
    count = 0;
    while (values.hasNext()) {
        i = values.next();
        LOG.info(key + ":" + i);
        if (i != expectedValues.get(count)) {
            errors++;
            LOG.info("TEST:1. Check:5 Expected: " + expectedValues.get(count) + ", Got: " + i);
            return errors;
        }
    }
    LOG.info("TEST:1 Done");
    return errors;
}
Also used : ArrayList(java.util.ArrayList) IntWritable(org.apache.hadoop.io.IntWritable)

Aggregations

IntWritable (org.apache.hadoop.io.IntWritable)312 Test (org.junit.Test)116 Text (org.apache.hadoop.io.Text)102 LongWritable (org.apache.hadoop.io.LongWritable)70 Path (org.apache.hadoop.fs.Path)64 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)55 FloatWritable (org.apache.hadoop.io.FloatWritable)48 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)47 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)43 BooleanWritable (org.apache.hadoop.io.BooleanWritable)42 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)40 SequenceFile (org.apache.hadoop.io.SequenceFile)39 BytesWritable (org.apache.hadoop.io.BytesWritable)37 Writable (org.apache.hadoop.io.Writable)35 ArrayList (java.util.ArrayList)34 Configuration (org.apache.hadoop.conf.Configuration)33 Random (java.util.Random)29 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)29 IOException (java.io.IOException)28 DeferredJavaObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject)28