use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestJoinDatamerge method testNestedJoin.
@Test
public void testNestedJoin() throws Exception {
// outer(inner(S1,...,Sn),outer(S1,...Sn))
final int SOURCES = 3;
final int ITEMS = (SOURCES + 1) * (SOURCES + 1);
Configuration conf = new Configuration();
Path base = cluster.getFileSystem().makeQualified(new Path("/nested"));
int[][] source = new int[SOURCES][];
for (int i = 0; i < SOURCES; ++i) {
source[i] = new int[ITEMS];
for (int j = 0; j < ITEMS; ++j) {
source[i][j] = (i + 2) * (j + 1);
}
}
Path[] src = new Path[SOURCES];
SequenceFile.Writer[] out = createWriters(base, conf, SOURCES, src);
IntWritable k = new IntWritable();
for (int i = 0; i < SOURCES; ++i) {
IntWritable v = new IntWritable();
v.set(i);
for (int j = 0; j < ITEMS; ++j) {
k.set(source[i][j]);
out[i].append(k, v);
}
out[i].close();
}
out = null;
StringBuilder sb = new StringBuilder();
sb.append("outer(inner(");
for (int i = 0; i < SOURCES; ++i) {
sb.append(CompositeInputFormat.compose(SequenceFileInputFormat.class, src[i].toString()));
if (i + 1 != SOURCES)
sb.append(",");
}
sb.append("),outer(");
sb.append(CompositeInputFormat.compose(MapReduceTestUtil.Fake_IF.class, "foobar"));
sb.append(",");
for (int i = 0; i < SOURCES; ++i) {
sb.append(CompositeInputFormat.compose(SequenceFileInputFormat.class, src[i].toString()));
sb.append(",");
}
sb.append(CompositeInputFormat.compose(MapReduceTestUtil.Fake_IF.class, "raboof") + "))");
conf.set(CompositeInputFormat.JOIN_EXPR, sb.toString());
MapReduceTestUtil.Fake_IF.setKeyClass(conf, IntWritable.class);
MapReduceTestUtil.Fake_IF.setValClass(conf, IntWritable.class);
Job job = Job.getInstance(conf);
Path outf = new Path(base, "out");
FileOutputFormat.setOutputPath(job, outf);
job.setInputFormatClass(CompositeInputFormat.class);
job.setMapperClass(Mapper.class);
job.setReducerClass(Reducer.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(TupleWritable.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.waitForCompletion(true);
assertTrue("Job failed", job.isSuccessful());
FileStatus[] outlist = cluster.getFileSystem().listStatus(outf, new Utils.OutputFileUtils.OutputFilesFilter());
assertEquals(1, outlist.length);
assertTrue(0 < outlist[0].getLen());
SequenceFile.Reader r = new SequenceFile.Reader(cluster.getFileSystem(), outlist[0].getPath(), conf);
TupleWritable v = new TupleWritable();
while (r.next(k, v)) {
assertFalse(((TupleWritable) v.get(1)).has(0));
assertFalse(((TupleWritable) v.get(1)).has(SOURCES + 1));
boolean chk = true;
int ki = k.get();
for (int i = 2; i < SOURCES + 2; ++i) {
if ((ki % i) == 0 && ki <= i * ITEMS) {
assertEquals(i - 2, ((IntWritable) ((TupleWritable) v.get(1)).get((i - 1))).get());
} else
chk = false;
}
if (chk) {
// present in all sources; chk inner
assertTrue(v.has(0));
for (int i = 0; i < SOURCES; ++i) assertTrue(((TupleWritable) v.get(0)).has(i));
} else {
// should not be present in inner join
assertFalse(v.has(0));
}
}
r.close();
base.getFileSystem(conf).delete(base, true);
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestInputSampler method testSplitSampler.
/**
* Verify SplitSampler contract, that an equal number of records are taken
* from the first splits.
*/
@Test
// IntWritable comparator not typesafe
@SuppressWarnings("unchecked")
public void testSplitSampler() throws Exception {
final int TOT_SPLITS = 15;
final int NUM_SPLITS = 5;
final int STEP_SAMPLE = 5;
final int NUM_SAMPLES = NUM_SPLITS * STEP_SAMPLE;
InputSampler.Sampler<IntWritable, NullWritable> sampler = new InputSampler.SplitSampler<IntWritable, NullWritable>(NUM_SAMPLES, NUM_SPLITS);
int[] inits = new int[TOT_SPLITS];
for (int i = 0; i < TOT_SPLITS; ++i) {
inits[i] = i * STEP_SAMPLE;
}
Job ignored = Job.getInstance();
Object[] samples = sampler.getSample(new TestInputSamplerIF(100000, TOT_SPLITS, inits), ignored);
assertEquals(NUM_SAMPLES, samples.length);
Arrays.sort(samples, new IntWritable.Comparator());
for (int i = 0; i < NUM_SAMPLES; ++i) {
assertEquals(i, ((IntWritable) samples[i]).get());
}
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestInputSampler method testIntervalSampler.
/**
* Verify IntervalSampler contract, that samples are taken at regular
* intervals from the given splits.
*/
@Test
// IntWritable comparator not typesafe
@SuppressWarnings("unchecked")
public void testIntervalSampler() throws Exception {
final int TOT_SPLITS = 16;
final int PER_SPLIT_SAMPLE = 4;
final int NUM_SAMPLES = TOT_SPLITS * PER_SPLIT_SAMPLE;
final double FREQ = 1.0 / TOT_SPLITS;
InputSampler.Sampler<IntWritable, NullWritable> sampler = new InputSampler.IntervalSampler<IntWritable, NullWritable>(FREQ, NUM_SAMPLES);
int[] inits = new int[TOT_SPLITS];
for (int i = 0; i < TOT_SPLITS; ++i) {
inits[i] = i;
}
Job ignored = Job.getInstance();
Object[] samples = sampler.getSample(new TestInputSamplerIF(NUM_SAMPLES, TOT_SPLITS, inits), ignored);
assertEquals(NUM_SAMPLES, samples.length);
Arrays.sort(samples, new IntWritable.Comparator());
for (int i = 0; i < NUM_SAMPLES; ++i) {
assertEquals(i, ((IntWritable) samples[i]).get());
}
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class ExternalMapperReducer method reduce.
public void reduce(WritableComparable key, Iterator<Writable> values, OutputCollector<WritableComparable, IntWritable> output, Reporter reporter) throws IOException {
int count = 0;
while (values.hasNext()) {
count++;
values.next();
}
output.collect(key, new IntWritable(count));
}
use of org.apache.hadoop.io.IntWritable in project hadoop by apache.
the class TestComparators method configure.
@Before
public void configure() throws Exception {
Path testdir = new Path(TEST_DIR.getAbsolutePath());
Path inDir = new Path(testdir, "in");
Path outDir = new Path(testdir, "out");
FileSystem fs = FileSystem.get(conf);
fs.delete(testdir, true);
conf.setInputFormat(SequenceFileInputFormat.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setOutputKeyClass(IntWritable.class);
conf.setOutputValueClass(Text.class);
conf.setMapOutputValueClass(IntWritable.class);
// set up two map jobs, so we can test merge phase in Reduce also
conf.setNumMapTasks(2);
conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);
conf.setOutputFormat(SequenceFileOutputFormat.class);
if (!fs.mkdirs(testdir)) {
throw new IOException("Mkdirs failed to create " + testdir.toString());
}
if (!fs.mkdirs(inDir)) {
throw new IOException("Mkdirs failed to create " + inDir.toString());
}
// set up input data in 2 files
Path inFile = new Path(inDir, "part0");
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, inFile, IntWritable.class, IntWritable.class);
writer.append(new IntWritable(11), new IntWritable(999));
writer.append(new IntWritable(23), new IntWritable(456));
writer.append(new IntWritable(10), new IntWritable(780));
writer.close();
inFile = new Path(inDir, "part1");
writer = SequenceFile.createWriter(fs, conf, inFile, IntWritable.class, IntWritable.class);
writer.append(new IntWritable(45), new IntWritable(100));
writer.append(new IntWritable(18), new IntWritable(200));
writer.append(new IntWritable(27), new IntWritable(300));
writer.close();
jc = new JobClient(conf);
}
Aggregations