use of org.apache.hadoop.io.NullWritable in project hadoop by apache.
the class TestInputSampler method testMapredIntervalSampler.
/**
* Verify IntervalSampler in mapred.lib.InputSampler, which is added back
* for binary compatibility of M/R 1.x
*/
@Test(timeout = 30000)
// IntWritable comparator not typesafe
@SuppressWarnings("unchecked")
public void testMapredIntervalSampler() throws Exception {
final int TOT_SPLITS = 16;
final int PER_SPLIT_SAMPLE = 4;
final int NUM_SAMPLES = TOT_SPLITS * PER_SPLIT_SAMPLE;
final double FREQ = 1.0 / TOT_SPLITS;
org.apache.hadoop.mapred.lib.InputSampler.Sampler<IntWritable, NullWritable> sampler = new org.apache.hadoop.mapred.lib.InputSampler.IntervalSampler<IntWritable, NullWritable>(FREQ, NUM_SAMPLES);
int[] inits = new int[TOT_SPLITS];
for (int i = 0; i < TOT_SPLITS; ++i) {
inits[i] = i;
}
Job ignored = Job.getInstance();
Object[] samples = sampler.getSample(new TestInputSamplerIF(NUM_SAMPLES, TOT_SPLITS, inits), ignored);
assertEquals(NUM_SAMPLES, samples.length);
Arrays.sort(samples, new IntWritable.Comparator());
for (int i = 0; i < NUM_SAMPLES; ++i) {
assertEquals(i, ((IntWritable) samples[i]).get());
}
}
use of org.apache.hadoop.io.NullWritable in project hadoop by apache.
the class TestInputSampler method testMapredSplitSampler.
/**
* Verify SplitSampler contract in mapred.lib.InputSampler, which is added
* back for binary compatibility of M/R 1.x
*/
@Test(timeout = 30000)
// IntWritable comparator not typesafe
@SuppressWarnings("unchecked")
public void testMapredSplitSampler() throws Exception {
final int TOT_SPLITS = 15;
final int NUM_SPLITS = 5;
final int STEP_SAMPLE = 5;
final int NUM_SAMPLES = NUM_SPLITS * STEP_SAMPLE;
org.apache.hadoop.mapred.lib.InputSampler.Sampler<IntWritable, NullWritable> sampler = new org.apache.hadoop.mapred.lib.InputSampler.SplitSampler<IntWritable, NullWritable>(NUM_SAMPLES, NUM_SPLITS);
int[] inits = new int[TOT_SPLITS];
for (int i = 0; i < TOT_SPLITS; ++i) {
inits[i] = i * STEP_SAMPLE;
}
Object[] samples = sampler.getSample(new TestMapredInputSamplerIF(100000, TOT_SPLITS, inits), new JobConf());
assertEquals(NUM_SAMPLES, samples.length);
Arrays.sort(samples, new IntWritable.Comparator());
for (int i = 0; i < NUM_SAMPLES; ++i) {
// mapred.lib.InputSampler.SplitSampler has a sampling step
assertEquals(i % STEP_SAMPLE + TOT_SPLITS * (i / STEP_SAMPLE), ((IntWritable) samples[i]).get());
}
}
use of org.apache.hadoop.io.NullWritable in project hadoop by apache.
the class TestTotalOrderPartitioner method testTotalOrderWithCustomSerialization.
@Test
public void testTotalOrderWithCustomSerialization() throws Exception {
TotalOrderPartitioner<String, NullWritable> partitioner = new TotalOrderPartitioner<String, NullWritable>();
Configuration conf = new Configuration();
conf.setStrings(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, JavaSerialization.class.getName(), WritableSerialization.class.getName());
conf.setClass(MRJobConfig.KEY_COMPARATOR, JavaSerializationComparator.class, Comparator.class);
Path p = TestTotalOrderPartitioner.<String>writePartitionFile("totalordercustomserialization", conf, splitJavaStrings);
conf.setClass(MRJobConfig.MAP_OUTPUT_KEY_CLASS, String.class, Object.class);
try {
partitioner.setConf(conf);
NullWritable nw = NullWritable.get();
for (Check<String> chk : testJavaStrings) {
assertEquals(chk.data.toString(), chk.part, partitioner.getPartition(chk.data, nw, splitJavaStrings.length + 1));
}
} finally {
p.getFileSystem(conf).delete(p, true);
}
}
use of org.apache.hadoop.io.NullWritable in project hadoop by apache.
the class TestTotalOrderPartitioner method testTotalOrderCustomComparator.
@Test
public void testTotalOrderCustomComparator() throws Exception {
TotalOrderPartitioner<Text, NullWritable> partitioner = new TotalOrderPartitioner<Text, NullWritable>();
Configuration conf = new Configuration();
Text[] revSplitStrings = Arrays.copyOf(splitStrings, splitStrings.length);
Arrays.sort(revSplitStrings, new ReverseStringComparator());
Path p = TestTotalOrderPartitioner.<Text>writePartitionFile("totalordercustomcomparator", conf, revSplitStrings);
conf.setBoolean(TotalOrderPartitioner.NATURAL_ORDER, false);
conf.setClass(MRJobConfig.MAP_OUTPUT_KEY_CLASS, Text.class, Object.class);
conf.setClass(MRJobConfig.KEY_COMPARATOR, ReverseStringComparator.class, RawComparator.class);
ArrayList<Check<Text>> revCheck = new ArrayList<Check<Text>>();
revCheck.add(new Check<Text>(new Text("aaaaa"), 9));
revCheck.add(new Check<Text>(new Text("aaabb"), 9));
revCheck.add(new Check<Text>(new Text("aabbb"), 9));
revCheck.add(new Check<Text>(new Text("aaaaa"), 9));
revCheck.add(new Check<Text>(new Text("babbb"), 8));
revCheck.add(new Check<Text>(new Text("baabb"), 8));
revCheck.add(new Check<Text>(new Text("yai"), 1));
revCheck.add(new Check<Text>(new Text("yak"), 1));
revCheck.add(new Check<Text>(new Text("z"), 0));
revCheck.add(new Check<Text>(new Text("ddngo"), 4));
revCheck.add(new Check<Text>(new Text("hi"), 3));
try {
partitioner.setConf(conf);
NullWritable nw = NullWritable.get();
for (Check<Text> chk : revCheck) {
assertEquals(chk.data.toString(), chk.part, partitioner.getPartition(chk.data, nw, splitStrings.length + 1));
}
} finally {
p.getFileSystem(conf).delete(p, true);
}
}
use of org.apache.hadoop.io.NullWritable in project hadoop by apache.
the class TestTotalOrderPartitioner method testTotalOrderBinarySearch.
@Test
public void testTotalOrderBinarySearch() throws Exception {
TotalOrderPartitioner<Text, NullWritable> partitioner = new TotalOrderPartitioner<Text, NullWritable>();
Configuration conf = new Configuration();
Path p = TestTotalOrderPartitioner.<Text>writePartitionFile("totalorderbinarysearch", conf, splitStrings);
conf.setBoolean(TotalOrderPartitioner.NATURAL_ORDER, false);
conf.setClass(MRJobConfig.MAP_OUTPUT_KEY_CLASS, Text.class, Object.class);
try {
partitioner.setConf(conf);
NullWritable nw = NullWritable.get();
for (Check<Text> chk : testStrings) {
assertEquals(chk.data.toString(), chk.part, partitioner.getPartition(chk.data, nw, splitStrings.length + 1));
}
} finally {
p.getFileSystem(conf).delete(p, true);
}
}
Aggregations