Search in sources :

Example 76 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestInputSampler method testMapredIntervalSampler.

/**
   * Verify IntervalSampler in mapred.lib.InputSampler, which is added back
   * for binary compatibility of M/R 1.x
   */
@Test(timeout = 30000)
// IntWritable comparator not typesafe
@SuppressWarnings("unchecked")
public void testMapredIntervalSampler() throws Exception {
    final int TOT_SPLITS = 16;
    final int PER_SPLIT_SAMPLE = 4;
    final int NUM_SAMPLES = TOT_SPLITS * PER_SPLIT_SAMPLE;
    final double FREQ = 1.0 / TOT_SPLITS;
    org.apache.hadoop.mapred.lib.InputSampler.Sampler<IntWritable, NullWritable> sampler = new org.apache.hadoop.mapred.lib.InputSampler.IntervalSampler<IntWritable, NullWritable>(FREQ, NUM_SAMPLES);
    int[] inits = new int[TOT_SPLITS];
    for (int i = 0; i < TOT_SPLITS; ++i) {
        inits[i] = i;
    }
    Job ignored = Job.getInstance();
    Object[] samples = sampler.getSample(new TestInputSamplerIF(NUM_SAMPLES, TOT_SPLITS, inits), ignored);
    assertEquals(NUM_SAMPLES, samples.length);
    Arrays.sort(samples, new IntWritable.Comparator());
    for (int i = 0; i < NUM_SAMPLES; ++i) {
        assertEquals(i, ((IntWritable) samples[i]).get());
    }
}
Also used : NullWritable(org.apache.hadoop.io.NullWritable) Job(org.apache.hadoop.mapreduce.Job) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 77 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestInputSampler method testMapredSplitSampler.

/**
   * Verify SplitSampler contract in mapred.lib.InputSampler, which is added
   * back for binary compatibility of M/R 1.x
   */
@Test(timeout = 30000)
// IntWritable comparator not typesafe
@SuppressWarnings("unchecked")
public void testMapredSplitSampler() throws Exception {
    final int TOT_SPLITS = 15;
    final int NUM_SPLITS = 5;
    final int STEP_SAMPLE = 5;
    final int NUM_SAMPLES = NUM_SPLITS * STEP_SAMPLE;
    org.apache.hadoop.mapred.lib.InputSampler.Sampler<IntWritable, NullWritable> sampler = new org.apache.hadoop.mapred.lib.InputSampler.SplitSampler<IntWritable, NullWritable>(NUM_SAMPLES, NUM_SPLITS);
    int[] inits = new int[TOT_SPLITS];
    for (int i = 0; i < TOT_SPLITS; ++i) {
        inits[i] = i * STEP_SAMPLE;
    }
    Object[] samples = sampler.getSample(new TestMapredInputSamplerIF(100000, TOT_SPLITS, inits), new JobConf());
    assertEquals(NUM_SAMPLES, samples.length);
    Arrays.sort(samples, new IntWritable.Comparator());
    for (int i = 0; i < NUM_SAMPLES; ++i) {
        // mapred.lib.InputSampler.SplitSampler has a sampling step
        assertEquals(i % STEP_SAMPLE + TOT_SPLITS * (i / STEP_SAMPLE), ((IntWritable) samples[i]).get());
    }
}
Also used : NullWritable(org.apache.hadoop.io.NullWritable) JobConf(org.apache.hadoop.mapred.JobConf) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 78 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestTotalOrderPartitioner method testTotalOrderWithCustomSerialization.

@Test
public void testTotalOrderWithCustomSerialization() throws Exception {
    TotalOrderPartitioner<String, NullWritable> partitioner = new TotalOrderPartitioner<String, NullWritable>();
    Configuration conf = new Configuration();
    conf.setStrings(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, JavaSerialization.class.getName(), WritableSerialization.class.getName());
    conf.setClass(MRJobConfig.KEY_COMPARATOR, JavaSerializationComparator.class, Comparator.class);
    Path p = TestTotalOrderPartitioner.<String>writePartitionFile("totalordercustomserialization", conf, splitJavaStrings);
    conf.setClass(MRJobConfig.MAP_OUTPUT_KEY_CLASS, String.class, Object.class);
    try {
        partitioner.setConf(conf);
        NullWritable nw = NullWritable.get();
        for (Check<String> chk : testJavaStrings) {
            assertEquals(chk.data.toString(), chk.part, partitioner.getPartition(chk.data, nw, splitJavaStrings.length + 1));
        }
    } finally {
        p.getFileSystem(conf).delete(p, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) JavaSerialization(org.apache.hadoop.io.serializer.JavaSerialization) NullWritable(org.apache.hadoop.io.NullWritable) WritableSerialization(org.apache.hadoop.io.serializer.WritableSerialization) Test(org.junit.Test)

Example 79 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestTotalOrderPartitioner method testTotalOrderCustomComparator.

@Test
public void testTotalOrderCustomComparator() throws Exception {
    TotalOrderPartitioner<Text, NullWritable> partitioner = new TotalOrderPartitioner<Text, NullWritable>();
    Configuration conf = new Configuration();
    Text[] revSplitStrings = Arrays.copyOf(splitStrings, splitStrings.length);
    Arrays.sort(revSplitStrings, new ReverseStringComparator());
    Path p = TestTotalOrderPartitioner.<Text>writePartitionFile("totalordercustomcomparator", conf, revSplitStrings);
    conf.setBoolean(TotalOrderPartitioner.NATURAL_ORDER, false);
    conf.setClass(MRJobConfig.MAP_OUTPUT_KEY_CLASS, Text.class, Object.class);
    conf.setClass(MRJobConfig.KEY_COMPARATOR, ReverseStringComparator.class, RawComparator.class);
    ArrayList<Check<Text>> revCheck = new ArrayList<Check<Text>>();
    revCheck.add(new Check<Text>(new Text("aaaaa"), 9));
    revCheck.add(new Check<Text>(new Text("aaabb"), 9));
    revCheck.add(new Check<Text>(new Text("aabbb"), 9));
    revCheck.add(new Check<Text>(new Text("aaaaa"), 9));
    revCheck.add(new Check<Text>(new Text("babbb"), 8));
    revCheck.add(new Check<Text>(new Text("baabb"), 8));
    revCheck.add(new Check<Text>(new Text("yai"), 1));
    revCheck.add(new Check<Text>(new Text("yak"), 1));
    revCheck.add(new Check<Text>(new Text("z"), 0));
    revCheck.add(new Check<Text>(new Text("ddngo"), 4));
    revCheck.add(new Check<Text>(new Text("hi"), 3));
    try {
        partitioner.setConf(conf);
        NullWritable nw = NullWritable.get();
        for (Check<Text> chk : revCheck) {
            assertEquals(chk.data.toString(), chk.part, partitioner.getPartition(chk.data, nw, splitStrings.length + 1));
        }
    } finally {
        p.getFileSystem(conf).delete(p, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Example 80 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestTotalOrderPartitioner method testTotalOrderBinarySearch.

@Test
public void testTotalOrderBinarySearch() throws Exception {
    TotalOrderPartitioner<Text, NullWritable> partitioner = new TotalOrderPartitioner<Text, NullWritable>();
    Configuration conf = new Configuration();
    Path p = TestTotalOrderPartitioner.<Text>writePartitionFile("totalorderbinarysearch", conf, splitStrings);
    conf.setBoolean(TotalOrderPartitioner.NATURAL_ORDER, false);
    conf.setClass(MRJobConfig.MAP_OUTPUT_KEY_CLASS, Text.class, Object.class);
    try {
        partitioner.setConf(conf);
        NullWritable nw = NullWritable.get();
        for (Check<Text> chk : testStrings) {
            assertEquals(chk.data.toString(), chk.part, partitioner.getPartition(chk.data, nw, splitStrings.length + 1));
        }
    } finally {
        p.getFileSystem(conf).delete(p, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Aggregations

NullWritable (org.apache.hadoop.io.NullWritable)101 Test (org.junit.Test)65 Configuration (org.apache.hadoop.conf.Configuration)41 Path (org.apache.hadoop.fs.Path)41 File (java.io.File)29 FileSystem (org.apache.hadoop.fs.FileSystem)26 SequenceFile (org.apache.hadoop.io.SequenceFile)22 JobConf (org.apache.hadoop.mapred.JobConf)22 RouteBuilder (org.apache.camel.builder.RouteBuilder)18 MockEndpoint (org.apache.camel.component.mock.MockEndpoint)18 ArrayFile (org.apache.hadoop.io.ArrayFile)18 Text (org.apache.hadoop.io.Text)16 InputSplit (org.apache.hadoop.mapred.InputSplit)16 LongWritable (org.apache.hadoop.io.LongWritable)15 IntWritable (org.apache.hadoop.io.IntWritable)10 Writer (org.apache.hadoop.io.SequenceFile.Writer)9 CharacteristicSetWritable (org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable)8 IOException (java.io.IOException)7 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)7 FloatWritable (org.apache.hadoop.io.FloatWritable)7