use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestJoinDatamerge method writeSimpleSrc.
private static Path[] writeSimpleSrc(Path testdir, Configuration conf, int srcs) throws IOException {
SequenceFile.Writer[] out = null;
Path[] src = new Path[srcs];
try {
out = createWriters(testdir, conf, srcs, src);
final int capacity = srcs * 2 + 1;
IntWritable key = new IntWritable();
IntWritable val = new IntWritable();
for (int k = 0; k < capacity; ++k) {
for (int i = 0; i < srcs; ++i) {
key.set(k % srcs == 0 ? k * srcs : k * srcs + i);
val.set(10 * k + i);
out[i].append(key, val);
if (i == k) {
// add duplicate key
out[i].append(key, val);
}
}
}
} finally {
if (out != null) {
for (int i = 0; i < srcs; ++i) {
if (out[i] != null)
out[i].close();
}
}
}
return src;
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestJoinDatamerge method testNestedJoin.
@Test
public void testNestedJoin() throws Exception {
// outer(inner(S1,...,Sn),outer(S1,...Sn))
final int SOURCES = 3;
final int ITEMS = (SOURCES + 1) * (SOURCES + 1);
Configuration conf = new Configuration();
Path base = cluster.getFileSystem().makeQualified(new Path("/nested"));
int[][] source = new int[SOURCES][];
for (int i = 0; i < SOURCES; ++i) {
source[i] = new int[ITEMS];
for (int j = 0; j < ITEMS; ++j) {
source[i][j] = (i + 2) * (j + 1);
}
}
Path[] src = new Path[SOURCES];
SequenceFile.Writer[] out = createWriters(base, conf, SOURCES, src);
IntWritable k = new IntWritable();
for (int i = 0; i < SOURCES; ++i) {
IntWritable v = new IntWritable();
v.set(i);
for (int j = 0; j < ITEMS; ++j) {
k.set(source[i][j]);
out[i].append(k, v);
}
out[i].close();
}
out = null;
StringBuilder sb = new StringBuilder();
sb.append("outer(inner(");
for (int i = 0; i < SOURCES; ++i) {
sb.append(CompositeInputFormat.compose(SequenceFileInputFormat.class, src[i].toString()));
if (i + 1 != SOURCES)
sb.append(",");
}
sb.append("),outer(");
sb.append(CompositeInputFormat.compose(MapReduceTestUtil.Fake_IF.class, "foobar"));
sb.append(",");
for (int i = 0; i < SOURCES; ++i) {
sb.append(CompositeInputFormat.compose(SequenceFileInputFormat.class, src[i].toString()));
sb.append(",");
}
sb.append(CompositeInputFormat.compose(MapReduceTestUtil.Fake_IF.class, "raboof") + "))");
conf.set(CompositeInputFormat.JOIN_EXPR, sb.toString());
MapReduceTestUtil.Fake_IF.setKeyClass(conf, IntWritable.class);
MapReduceTestUtil.Fake_IF.setValClass(conf, IntWritable.class);
Job job = Job.getInstance(conf);
Path outf = new Path(base, "out");
FileOutputFormat.setOutputPath(job, outf);
job.setInputFormatClass(CompositeInputFormat.class);
job.setMapperClass(Mapper.class);
job.setReducerClass(Reducer.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(TupleWritable.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.waitForCompletion(true);
assertTrue("Job failed", job.isSuccessful());
FileStatus[] outlist = cluster.getFileSystem().listStatus(outf, new Utils.OutputFileUtils.OutputFilesFilter());
assertEquals(1, outlist.length);
assertTrue(0 < outlist[0].getLen());
SequenceFile.Reader r = new SequenceFile.Reader(cluster.getFileSystem(), outlist[0].getPath(), conf);
TupleWritable v = new TupleWritable();
while (r.next(k, v)) {
assertFalse(((TupleWritable) v.get(1)).has(0));
assertFalse(((TupleWritable) v.get(1)).has(SOURCES + 1));
boolean chk = true;
int ki = k.get();
for (int i = 2; i < SOURCES + 2; ++i) {
if ((ki % i) == 0 && ki <= i * ITEMS) {
assertEquals(i - 2, ((IntWritable) ((TupleWritable) v.get(1)).get((i - 1))).get());
} else
chk = false;
}
if (chk) {
// present in all sources; chk inner
assertTrue(v.has(0));
for (int i = 0; i < SOURCES; ++i) assertTrue(((TupleWritable) v.get(0)).has(i));
} else {
// should not be present in inner join
assertFalse(v.has(0));
}
}
r.close();
base.getFileSystem(conf).delete(base, true);
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestInputSampler method testSplitSampler.
/**
* Verify SplitSampler contract, that an equal number of records are taken
* from the first splits.
*/
@Test
// IntWritable comparator not typesafe
@SuppressWarnings("unchecked")
public void testSplitSampler() throws Exception {
final int TOT_SPLITS = 15;
final int NUM_SPLITS = 5;
final int STEP_SAMPLE = 5;
final int NUM_SAMPLES = NUM_SPLITS * STEP_SAMPLE;
InputSampler.Sampler<IntWritable, NullWritable> sampler = new InputSampler.SplitSampler<IntWritable, NullWritable>(NUM_SAMPLES, NUM_SPLITS);
int[] inits = new int[TOT_SPLITS];
for (int i = 0; i < TOT_SPLITS; ++i) {
inits[i] = i * STEP_SAMPLE;
}
Job ignored = Job.getInstance();
Object[] samples = sampler.getSample(new TestInputSamplerIF(100000, TOT_SPLITS, inits), ignored);
assertEquals(NUM_SAMPLES, samples.length);
Arrays.sort(samples, new IntWritable.Comparator());
for (int i = 0; i < NUM_SAMPLES; ++i) {
assertEquals(i, ((IntWritable) samples[i]).get());
}
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestInputSampler method testIntervalSampler.
/**
* Verify IntervalSampler contract, that samples are taken at regular
* intervals from the given splits.
*/
@Test
// IntWritable comparator not typesafe
@SuppressWarnings("unchecked")
public void testIntervalSampler() throws Exception {
final int TOT_SPLITS = 16;
final int PER_SPLIT_SAMPLE = 4;
final int NUM_SAMPLES = TOT_SPLITS * PER_SPLIT_SAMPLE;
final double FREQ = 1.0 / TOT_SPLITS;
InputSampler.Sampler<IntWritable, NullWritable> sampler = new InputSampler.IntervalSampler<IntWritable, NullWritable>(FREQ, NUM_SAMPLES);
int[] inits = new int[TOT_SPLITS];
for (int i = 0; i < TOT_SPLITS; ++i) {
inits[i] = i;
}
Job ignored = Job.getInstance();
Object[] samples = sampler.getSample(new TestInputSamplerIF(NUM_SAMPLES, TOT_SPLITS, inits), ignored);
assertEquals(NUM_SAMPLES, samples.length);
Arrays.sort(samples, new IntWritable.Comparator());
for (int i = 0; i < NUM_SAMPLES; ++i) {
assertEquals(i, ((IntWritable) samples[i]).get());
}
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class ExternalMapperReducer method reduce.
public void reduce(WritableComparable key, Iterator<Writable> values, OutputCollector<WritableComparable, IntWritable> output, Reporter reporter) throws IOException {
int count = 0;
while (values.hasNext()) {
count++;
values.next();
}
output.collect(key, new IntWritable(count));
}
Aggregations