use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestMRMultipleOutputs method _testMultipleOutputs.
protected void _testMultipleOutputs(boolean withCounters) throws Exception {
String input = "a\nb\nc\nd\ne\nc\nd\ne";
Configuration conf = createJobConf();
Job job = MapReduceTestUtil.createJob(conf, IN_DIR, OUT_DIR, 2, 1, input);
job.setJobName("mo");
MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class, LongWritable.class, Text.class);
MultipleOutputs.addNamedOutput(job, SEQUENCE, SequenceFileOutputFormat.class, IntWritable.class, Text.class);
MultipleOutputs.setCountersEnabled(job, withCounters);
job.setMapperClass(MOMap.class);
job.setReducerClass(MOReduce.class);
job.waitForCompletion(true);
// assert number of named output part files
int namedOutputCount = 0;
int valueBasedOutputCount = 0;
FileSystem fs = OUT_DIR.getFileSystem(conf);
FileStatus[] statuses = fs.listStatus(OUT_DIR);
for (FileStatus status : statuses) {
String fileName = status.getPath().getName();
if (fileName.equals("text-m-00000") || fileName.equals("text-m-00001") || fileName.equals("text-r-00000") || fileName.equals("sequence_A-m-00000") || fileName.equals("sequence_A-m-00001") || fileName.equals("sequence_B-m-00000") || fileName.equals("sequence_B-m-00001") || fileName.equals("sequence_B-r-00000") || fileName.equals("sequence_C-r-00000")) {
namedOutputCount++;
} else if (fileName.equals("a-r-00000") || fileName.equals("b-r-00000") || fileName.equals("c-r-00000") || fileName.equals("d-r-00000") || fileName.equals("e-r-00000")) {
valueBasedOutputCount++;
}
}
assertEquals(9, namedOutputCount);
assertEquals(5, valueBasedOutputCount);
// assert TextOutputFormat files correctness
BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(job), "text-r-00000"))));
int count = 0;
String line = reader.readLine();
while (line != null) {
assertTrue(line.endsWith(TEXT));
line = reader.readLine();
count++;
}
reader.close();
assertFalse(count == 0);
// assert SequenceOutputFormat files correctness
SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(job), "sequence_B-r-00000"), conf);
assertEquals(IntWritable.class, seqReader.getKeyClass());
assertEquals(Text.class, seqReader.getValueClass());
count = 0;
IntWritable key = new IntWritable();
Text value = new Text();
while (seqReader.next(key, value)) {
assertEquals(SEQUENCE, value.toString());
count++;
}
seqReader.close();
assertFalse(count == 0);
if (withCounters) {
CounterGroup counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
assertEquals(9, counters.size());
assertEquals(4, counters.findCounter(TEXT).getValue());
assertEquals(2, counters.findCounter(SEQUENCE + "_A").getValue());
assertEquals(4, counters.findCounter(SEQUENCE + "_B").getValue());
assertEquals(2, counters.findCounter(SEQUENCE + "_C").getValue());
assertEquals(2, counters.findCounter("a").getValue());
assertEquals(2, counters.findCounter("b").getValue());
assertEquals(4, counters.findCounter("c").getValue());
assertEquals(4, counters.findCounter("d").getValue());
assertEquals(4, counters.findCounter("e").getValue());
}
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestMRSequenceFileAsBinaryOutputFormat method testBinary.
@Test
public void testBinary() throws IOException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
Random r = new Random();
long seed = r.nextLong();
r.setSeed(seed);
FileOutputFormat.setOutputPath(job, outdir);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);
SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
BytesWritable bkey = new BytesWritable();
BytesWritable bval = new BytesWritable();
TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
OutputFormat<BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat();
OutputCommitter committer = outputFormat.getOutputCommitter(context);
committer.setupJob(job);
RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);
IntWritable iwritable = new IntWritable();
DoubleWritable dwritable = new DoubleWritable();
DataOutputBuffer outbuf = new DataOutputBuffer();
LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
try {
for (int i = 0; i < RECORDS; ++i) {
iwritable = new IntWritable(r.nextInt());
iwritable.write(outbuf);
bkey.set(outbuf.getData(), 0, outbuf.getLength());
outbuf.reset();
dwritable = new DoubleWritable(r.nextDouble());
dwritable.write(outbuf);
bval.set(outbuf.getData(), 0, outbuf.getLength());
outbuf.reset();
writer.write(bkey, bval);
}
} finally {
writer.close(context);
}
committer.commitTask(context);
committer.commitJob(job);
InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>();
int count = 0;
r.setSeed(seed);
SequenceFileInputFormat.setInputPaths(job, outdir);
LOG.info("Reading data by SequenceFileInputFormat");
for (InputSplit split : iformat.getSplits(job)) {
RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
try {
int sourceInt;
double sourceDouble;
while (reader.nextKeyValue()) {
sourceInt = r.nextInt();
sourceDouble = r.nextDouble();
iwritable = reader.getCurrentKey();
dwritable = reader.getCurrentValue();
assertEquals("Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get());
assertTrue("Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0);
++count;
}
} finally {
reader.close();
}
}
assertEquals("Some records not found", RECORDS, count);
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestJoinTupleWritable method testNestedIterable.
@Test
public void testNestedIterable() throws Exception {
Random r = new Random();
Writable[] writs = { new BooleanWritable(r.nextBoolean()), new FloatWritable(r.nextFloat()), new FloatWritable(r.nextFloat()), new IntWritable(r.nextInt()), new LongWritable(r.nextLong()), new BytesWritable("dingo".getBytes()), new LongWritable(r.nextLong()), new IntWritable(r.nextInt()), new BytesWritable("yak".getBytes()), new IntWritable(r.nextInt()) };
TupleWritable sTuple = makeTuple(writs);
assertTrue("Bad count", writs.length == verifIter(writs, sTuple, 0));
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestJoinTupleWritable method testWritable.
@Test
public void testWritable() throws Exception {
Random r = new Random();
Writable[] writs = { new BooleanWritable(r.nextBoolean()), new FloatWritable(r.nextFloat()), new FloatWritable(r.nextFloat()), new IntWritable(r.nextInt()), new LongWritable(r.nextLong()), new BytesWritable("dingo".getBytes()), new LongWritable(r.nextLong()), new IntWritable(r.nextInt()), new BytesWritable("yak".getBytes()), new IntWritable(r.nextInt()) };
TupleWritable sTuple = makeTuple(writs);
ByteArrayOutputStream out = new ByteArrayOutputStream();
sTuple.write(new DataOutputStream(out));
ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
TupleWritable dTuple = new TupleWritable();
dTuple.readFields(new DataInputStream(in));
assertTrue("Failed to write/read tuple", sTuple.equals(dTuple));
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestValueIterReset method test1.
/**
* Test the case where we do a mark outside of a reset. Test for both file
* and memory caches
* @param key
* @param values
* @return
* @throws IOException
*/
private static int test1(IntWritable key, MarkableIterator<IntWritable> values) throws IOException {
IntWritable i;
int errors = 0;
int count = 0;
ArrayList<IntWritable> expectedValues = new ArrayList<IntWritable>();
ArrayList<IntWritable> expectedValues1 = new ArrayList<IntWritable>();
LOG.info("Executing TEST:1 for Key:" + key);
values.mark();
LOG.info("TEST:1. Marking");
while (values.hasNext()) {
i = values.next();
LOG.info(key + ":" + i);
expectedValues.add(i);
if (count == 2) {
break;
}
count++;
}
values.reset();
LOG.info("TEST:1. Reset");
count = 0;
while (values.hasNext()) {
i = values.next();
LOG.info(key + ":" + i);
if (count < expectedValues.size()) {
if (i != expectedValues.get(count)) {
errors++;
LOG.info("TEST:1. Check:1 Expected: " + expectedValues.get(count) + ", Got: " + i);
return errors;
}
}
// We have moved passed the first mark, but still in the memory cache
if (count == 3) {
values.mark();
LOG.info("TEST:1. Marking -- " + key + ": " + i);
}
if (count >= 3) {
expectedValues1.add(i);
}
if (count == 5) {
break;
}
count++;
}
if (count < expectedValues.size()) {
LOG.info(("TEST:1 Check:2. Iterator returned lesser values"));
errors++;
return errors;
}
values.reset();
count = 0;
LOG.info("TEST:1. Reset");
expectedValues.clear();
while (values.hasNext()) {
i = values.next();
LOG.info(key + ":" + i);
if (count < expectedValues1.size()) {
if (i != expectedValues1.get(count)) {
errors++;
LOG.info("TEST:1. Check:3 Expected: " + expectedValues1.get(count) + ", Got: " + i);
return errors;
}
}
// cache
if (count == 25) {
values.mark();
LOG.info("TEST:1. Marking -- " + key + ":" + i);
}
if (count >= 25) {
expectedValues.add(i);
}
count++;
}
if (count < expectedValues1.size()) {
LOG.info(("TEST:1 Check:4. Iterator returned fewer values"));
errors++;
return errors;
}
values.reset();
LOG.info("TEST:1. Reset");
count = 0;
while (values.hasNext()) {
i = values.next();
LOG.info(key + ":" + i);
if (i != expectedValues.get(count)) {
errors++;
LOG.info("TEST:1. Check:5 Expected: " + expectedValues.get(count) + ", Got: " + i);
return errors;
}
}
LOG.info("TEST:1 Done");
return errors;
}
Aggregations