Search in sources :

Example 6 with RawKeyValueIterator

use of org.apache.hadoop.mapred.RawKeyValueIterator in project hadoop by apache.

the class TestMerger method testMergeShouldReturnProperProgress.

@SuppressWarnings({ "unchecked" })
public void testMergeShouldReturnProperProgress(List<Segment<Text, Text>> segments) throws IOException {
    Path tmpDir = new Path("localpath");
    Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass();
    Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass();
    RawComparator<Text> comparator = jobConf.getOutputKeyComparator();
    Counter readsCounter = new Counter();
    Counter writesCounter = new Counter();
    Progress mergePhase = new Progress();
    RawKeyValueIterator mergeQueue = Merger.merge(conf, fs, keyClass, valueClass, segments, 2, tmpDir, comparator, getReporter(), readsCounter, writesCounter, mergePhase);
    final float epsilon = 0.00001f;
    // Reading 6 keys total, 3 each in 2 segments, so each key read moves the
    // progress forward 1/6th of the way. Initially the first keys from each
    // segment have been read as part of the merge setup, so progress = 2/6.
    Assert.assertEquals(2 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    // The first next() returns one of the keys already read during merge setup
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(2 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    // Subsequent next() calls should read one key and move progress
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(3 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(4 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    // At this point we've exhausted all of the keys in one segment
    // so getting the next key will return the already cached key from the
    // other segment
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(4 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    // Subsequent next() calls should read one key and move progress
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(5 / 6.0f, mergeQueue.getProgress().get(), epsilon);
    Assert.assertTrue(mergeQueue.next());
    Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);
    // Now there should be no more input
    Assert.assertFalse(mergeQueue.next());
    Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);
    Assert.assertTrue(mergeQueue.getKey() == null);
    Assert.assertEquals(0, mergeQueue.getValue().getData().length);
}
Also used : CompressAwarePath(org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath) Path(org.apache.hadoop.fs.Path) Progress(org.apache.hadoop.util.Progress) Counter(org.apache.hadoop.mapred.Counters.Counter) Text(org.apache.hadoop.io.Text) RawKeyValueIterator(org.apache.hadoop.mapred.RawKeyValueIterator)

Example 7 with RawKeyValueIterator

use of org.apache.hadoop.mapred.RawKeyValueIterator in project tez by apache.

the class MRTask method createReduceContext.

protected static <INKEY, INVALUE, OUTKEY, OUTVALUE> org.apache.hadoop.mapreduce.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE>.Context createReduceContext(org.apache.hadoop.mapreduce.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE> reducer, Configuration job, TaskAttemptID taskId, final TezRawKeyValueIterator rIter, org.apache.hadoop.mapreduce.Counter inputKeyCounter, org.apache.hadoop.mapreduce.Counter inputValueCounter, org.apache.hadoop.mapreduce.RecordWriter<OUTKEY, OUTVALUE> output, org.apache.hadoop.mapreduce.OutputCommitter committer, org.apache.hadoop.mapreduce.StatusReporter reporter, RawComparator<INKEY> comparator, Class<INKEY> keyClass, Class<INVALUE> valueClass) throws IOException, InterruptedException {
    RawKeyValueIterator r = new RawKeyValueIterator() {

        @Override
        public boolean next() throws IOException {
            return rIter.next();
        }

        @Override
        public DataInputBuffer getValue() throws IOException {
            return rIter.getValue();
        }

        @Override
        public Progress getProgress() {
            return rIter.getProgress();
        }

        @Override
        public DataInputBuffer getKey() throws IOException {
            return rIter.getKey();
        }

        @Override
        public void close() throws IOException {
            rIter.close();
        }
    };
    org.apache.hadoop.mapreduce.ReduceContext<INKEY, INVALUE, OUTKEY, OUTVALUE> reduceContext = new ReduceContextImpl<INKEY, INVALUE, OUTKEY, OUTVALUE>(job, taskId, r, inputKeyCounter, inputValueCounter, output, committer, reporter, comparator, keyClass, valueClass);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Using key class: " + keyClass + ", valueClass: " + valueClass);
    }
    org.apache.hadoop.mapreduce.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE>.Context reducerContext = new WrappedReducer<INKEY, INVALUE, OUTKEY, OUTVALUE>().getReducerContext(reduceContext);
    return reducerContext;
}
Also used : ReduceContextImpl(org.apache.hadoop.mapreduce.task.ReduceContextImpl) TezRawKeyValueIterator(org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator) RawKeyValueIterator(org.apache.hadoop.mapred.RawKeyValueIterator) WrappedReducer(org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer)

Aggregations

RawKeyValueIterator (org.apache.hadoop.mapred.RawKeyValueIterator)7 WrappedReducer (org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer)4 ReduceContextImpl (org.apache.hadoop.mapreduce.task.ReduceContextImpl)4 CustomOutputCommitter (org.apache.hadoop.CustomOutputCommitter)2 Configuration (org.apache.hadoop.conf.Configuration)2 Path (org.apache.hadoop.fs.Path)2 NullWritable (org.apache.hadoop.io.NullWritable)2 SleepReducer (org.apache.hadoop.mapred.gridmix.SleepJob.SleepReducer)2 Counter (org.apache.hadoop.mapreduce.Counter)2 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)2 StatusReporter (org.apache.hadoop.mapreduce.StatusReporter)2 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)2 GenericCounter (org.apache.hadoop.mapreduce.counters.GenericCounter)2 DummyReporter (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl.DummyReporter)2 Progress (org.apache.hadoop.util.Progress)2 TezRawKeyValueIterator (org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator)2 Test (org.junit.Test)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1