use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestMRSequenceFileAsBinaryOutputFormat method testBinary.
@Test
public void testBinary() throws IOException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
Random r = new Random();
long seed = r.nextLong();
r.setSeed(seed);
FileOutputFormat.setOutputPath(job, outdir);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);
SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
BytesWritable bkey = new BytesWritable();
BytesWritable bval = new BytesWritable();
TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
OutputFormat<BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat();
OutputCommitter committer = outputFormat.getOutputCommitter(context);
committer.setupJob(job);
RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);
IntWritable iwritable = new IntWritable();
DoubleWritable dwritable = new DoubleWritable();
DataOutputBuffer outbuf = new DataOutputBuffer();
LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
try {
for (int i = 0; i < RECORDS; ++i) {
iwritable = new IntWritable(r.nextInt());
iwritable.write(outbuf);
bkey.set(outbuf.getData(), 0, outbuf.getLength());
outbuf.reset();
dwritable = new DoubleWritable(r.nextDouble());
dwritable.write(outbuf);
bval.set(outbuf.getData(), 0, outbuf.getLength());
outbuf.reset();
writer.write(bkey, bval);
}
} finally {
writer.close(context);
}
committer.commitTask(context);
committer.commitJob(job);
InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>();
int count = 0;
r.setSeed(seed);
SequenceFileInputFormat.setInputPaths(job, outdir);
LOG.info("Reading data by SequenceFileInputFormat");
for (InputSplit split : iformat.getSplits(job)) {
RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
try {
int sourceInt;
double sourceDouble;
while (reader.nextKeyValue()) {
sourceInt = r.nextInt();
sourceDouble = r.nextDouble();
iwritable = reader.getCurrentKey();
dwritable = reader.getCurrentValue();
assertEquals("Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get());
assertTrue("Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0);
++count;
}
} finally {
reader.close();
}
}
assertEquals("Some records not found", RECORDS, count);
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestJoinTupleWritable method testNestedIterable.
@Test
public void testNestedIterable() throws Exception {
Random r = new Random();
Writable[] writs = { new BooleanWritable(r.nextBoolean()), new FloatWritable(r.nextFloat()), new FloatWritable(r.nextFloat()), new IntWritable(r.nextInt()), new LongWritable(r.nextLong()), new BytesWritable("dingo".getBytes()), new LongWritable(r.nextLong()), new IntWritable(r.nextInt()), new BytesWritable("yak".getBytes()), new IntWritable(r.nextInt()) };
TupleWritable sTuple = makeTuple(writs);
assertTrue("Bad count", writs.length == verifIter(writs, sTuple, 0));
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestJoinTupleWritable method testWritable.
@Test
public void testWritable() throws Exception {
Random r = new Random();
Writable[] writs = { new BooleanWritable(r.nextBoolean()), new FloatWritable(r.nextFloat()), new FloatWritable(r.nextFloat()), new IntWritable(r.nextInt()), new LongWritable(r.nextLong()), new BytesWritable("dingo".getBytes()), new LongWritable(r.nextLong()), new IntWritable(r.nextInt()), new BytesWritable("yak".getBytes()), new IntWritable(r.nextInt()) };
TupleWritable sTuple = makeTuple(writs);
ByteArrayOutputStream out = new ByteArrayOutputStream();
sTuple.write(new DataOutputStream(out));
ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
TupleWritable dTuple = new TupleWritable();
dTuple.readFields(new DataInputStream(in));
assertTrue("Failed to write/read tuple", sTuple.equals(dTuple));
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestValueIterReset method test1.
/**
* Test the case where we do a mark outside of a reset. Test for both file
* and memory caches
* @param key
* @param values
* @return
* @throws IOException
*/
private static int test1(IntWritable key, MarkableIterator<IntWritable> values) throws IOException {
IntWritable i;
int errors = 0;
int count = 0;
ArrayList<IntWritable> expectedValues = new ArrayList<IntWritable>();
ArrayList<IntWritable> expectedValues1 = new ArrayList<IntWritable>();
LOG.info("Executing TEST:1 for Key:" + key);
values.mark();
LOG.info("TEST:1. Marking");
while (values.hasNext()) {
i = values.next();
LOG.info(key + ":" + i);
expectedValues.add(i);
if (count == 2) {
break;
}
count++;
}
values.reset();
LOG.info("TEST:1. Reset");
count = 0;
while (values.hasNext()) {
i = values.next();
LOG.info(key + ":" + i);
if (count < expectedValues.size()) {
if (i != expectedValues.get(count)) {
errors++;
LOG.info("TEST:1. Check:1 Expected: " + expectedValues.get(count) + ", Got: " + i);
return errors;
}
}
// We have moved passed the first mark, but still in the memory cache
if (count == 3) {
values.mark();
LOG.info("TEST:1. Marking -- " + key + ": " + i);
}
if (count >= 3) {
expectedValues1.add(i);
}
if (count == 5) {
break;
}
count++;
}
if (count < expectedValues.size()) {
LOG.info(("TEST:1 Check:2. Iterator returned lesser values"));
errors++;
return errors;
}
values.reset();
count = 0;
LOG.info("TEST:1. Reset");
expectedValues.clear();
while (values.hasNext()) {
i = values.next();
LOG.info(key + ":" + i);
if (count < expectedValues1.size()) {
if (i != expectedValues1.get(count)) {
errors++;
LOG.info("TEST:1. Check:3 Expected: " + expectedValues1.get(count) + ", Got: " + i);
return errors;
}
}
// cache
if (count == 25) {
values.mark();
LOG.info("TEST:1. Marking -- " + key + ":" + i);
}
if (count >= 25) {
expectedValues.add(i);
}
count++;
}
if (count < expectedValues1.size()) {
LOG.info(("TEST:1 Check:4. Iterator returned fewer values"));
errors++;
return errors;
}
values.reset();
LOG.info("TEST:1. Reset");
count = 0;
while (values.hasNext()) {
i = values.next();
LOG.info(key + ":" + i);
if (i != expectedValues.get(count)) {
errors++;
LOG.info("TEST:1. Check:5 Expected: " + expectedValues.get(count) + ", Got: " + i);
return errors;
}
}
LOG.info("TEST:1 Done");
return errors;
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestJoinDatamerge method writeSimpleSrc.
private static Path[] writeSimpleSrc(Path testdir, Configuration conf, int srcs) throws IOException {
SequenceFile.Writer[] out = null;
Path[] src = new Path[srcs];
try {
out = createWriters(testdir, conf, srcs, src);
final int capacity = srcs * 2 + 1;
IntWritable key = new IntWritable();
IntWritable val = new IntWritable();
for (int k = 0; k < capacity; ++k) {
for (int i = 0; i < srcs; ++i) {
key.set(k % srcs == 0 ? k * srcs : k * srcs + i);
val.set(10 * k + i);
out[i].append(key, val);
if (i == k) {
// add duplicate key
out[i].append(key, val);
}
}
}
} finally {
if (out != null) {
for (int i = 0; i < srcs; ++i) {
if (out[i] != null)
out[i].close();
}
}
}
return src;
}
Aggregations