Search in sources :

Example 1 with WrappedReducer

use of org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer in project hadoop by apache.

the class TestGridMixClasses method testSleepReducer.

/*
   * test SleepReducer
   */
@Test(timeout = 3000)
public void testSleepReducer() throws Exception {
    Configuration conf = new Configuration();
    conf.setInt(JobContext.NUM_REDUCES, 2);
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    conf.setBoolean(FileOutputFormat.COMPRESS, true);
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
    TaskAttemptID taskId = new TaskAttemptID();
    RawKeyValueIterator input = new FakeRawKeyValueReducerIterator();
    Counter counter = new GenericCounter();
    Counter inputValueCounter = new GenericCounter();
    RecordWriter<NullWritable, NullWritable> output = new LoadRecordReduceWriter();
    OutputCommitter committer = new CustomOutputCommitter();
    StatusReporter reporter = new DummyReporter();
    RawComparator<GridmixKey> comparator = new FakeRawComparator();
    ReduceContext<GridmixKey, NullWritable, NullWritable, NullWritable> reducecontext = new ReduceContextImpl<GridmixKey, NullWritable, NullWritable, NullWritable>(conf, taskId, input, counter, inputValueCounter, output, committer, reporter, comparator, GridmixKey.class, NullWritable.class);
    org.apache.hadoop.mapreduce.Reducer<GridmixKey, NullWritable, NullWritable, NullWritable>.Context<GridmixKey, NullWritable, NullWritable, NullWritable> context = new WrappedReducer<GridmixKey, NullWritable, NullWritable, NullWritable>().getReducerContext(reducecontext);
    SleepReducer test = new SleepReducer();
    long start = System.currentTimeMillis();
    test.setup(context);
    long sleeper = context.getCurrentKey().getReduceOutputBytes();
    // status has been changed
    assertEquals("Sleeping... " + sleeper + " ms left", context.getStatus());
    // should sleep 0.9 sec
    assertTrue(System.currentTimeMillis() >= (start + sleeper));
    test.cleanup(context);
    // status has been changed again
    assertEquals("Slept for " + sleeper, context.getStatus());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ReduceContextImpl(org.apache.hadoop.mapreduce.task.ReduceContextImpl) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) GenericCounter(org.apache.hadoop.mapreduce.counters.GenericCounter) Counter(org.apache.hadoop.mapreduce.Counter) CustomOutputCommitter(org.apache.hadoop.CustomOutputCommitter) CustomOutputCommitter(org.apache.hadoop.CustomOutputCommitter) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) GenericCounter(org.apache.hadoop.mapreduce.counters.GenericCounter) DummyReporter(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl.DummyReporter) NullWritable(org.apache.hadoop.io.NullWritable) RawKeyValueIterator(org.apache.hadoop.mapred.RawKeyValueIterator) SleepReducer(org.apache.hadoop.mapred.gridmix.SleepJob.SleepReducer) WrappedReducer(org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer) SleepReducer(org.apache.hadoop.mapred.gridmix.SleepJob.SleepReducer) StatusReporter(org.apache.hadoop.mapreduce.StatusReporter) Test(org.junit.Test)

Example 2 with WrappedReducer

use of org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer in project hadoop by apache.

the class TestGridMixClasses method testLoadJobLoadReducer.

/*
   * test LoadReducer
   */
@Test(timeout = 3000)
public void testLoadJobLoadReducer() throws Exception {
    LoadJob.LoadReducer test = new LoadJob.LoadReducer();
    Configuration conf = new Configuration();
    conf.setInt(JobContext.NUM_REDUCES, 2);
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    conf.setBoolean(FileOutputFormat.COMPRESS, true);
    CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true);
    conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true);
    TaskAttemptID taskid = new TaskAttemptID();
    RawKeyValueIterator input = new FakeRawKeyValueIterator();
    Counter counter = new GenericCounter();
    Counter inputValueCounter = new GenericCounter();
    LoadRecordWriter output = new LoadRecordWriter();
    OutputCommitter committer = new CustomOutputCommitter();
    StatusReporter reporter = new DummyReporter();
    RawComparator<GridmixKey> comparator = new FakeRawComparator();
    ReduceContext<GridmixKey, GridmixRecord, NullWritable, GridmixRecord> reduceContext = new ReduceContextImpl<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>(conf, taskid, input, counter, inputValueCounter, output, committer, reporter, comparator, GridmixKey.class, GridmixRecord.class);
    // read for previous data
    reduceContext.nextKeyValue();
    org.apache.hadoop.mapreduce.Reducer<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>.Context<GridmixKey, GridmixRecord, NullWritable, GridmixRecord> context = new WrappedReducer<GridmixKey, GridmixRecord, NullWritable, GridmixRecord>().getReducerContext(reduceContext);
    // test.setup(context);
    test.run(context);
    // have been readed 9 records (-1 for previous)
    assertEquals(9, counter.getValue());
    assertEquals(10, inputValueCounter.getValue());
    assertEquals(1, output.getData().size());
    GridmixRecord record = output.getData().values().iterator().next();
    assertEquals(1593, record.getSize());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ReduceContextImpl(org.apache.hadoop.mapreduce.task.ReduceContextImpl) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) GenericCounter(org.apache.hadoop.mapreduce.counters.GenericCounter) Counter(org.apache.hadoop.mapreduce.Counter) CustomOutputCommitter(org.apache.hadoop.CustomOutputCommitter) CustomOutputCommitter(org.apache.hadoop.CustomOutputCommitter) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) GenericCounter(org.apache.hadoop.mapreduce.counters.GenericCounter) DummyReporter(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl.DummyReporter) NullWritable(org.apache.hadoop.io.NullWritable) RawKeyValueIterator(org.apache.hadoop.mapred.RawKeyValueIterator) WrappedReducer(org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer) SleepReducer(org.apache.hadoop.mapred.gridmix.SleepJob.SleepReducer) StatusReporter(org.apache.hadoop.mapreduce.StatusReporter) Test(org.junit.Test)

Example 3 with WrappedReducer

use of org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer in project tez by apache.

the class MRCombiner method createReduceContext.

private static <KEYIN, VALUEIN, KEYOUT, VALUEOUT> org.apache.hadoop.mapreduce.Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context createReduceContext(Configuration conf, TaskAttemptID mrTaskAttemptID, final TezRawKeyValueIterator rawIter, Counter combineInputRecordsCounter, Counter combineOutputRecordsCounter, RecordWriter<KEYOUT, VALUEOUT> recordWriter, MRTaskReporter reporter, RawComparator<KEYIN> comparator, Class<KEYIN> keyClass, Class<VALUEIN> valClass) throws InterruptedException, IOException {
    RawKeyValueIterator r = new RawKeyValueIterator() {

        @Override
        public boolean next() throws IOException {
            return rawIter.next();
        }

        @Override
        public DataInputBuffer getValue() throws IOException {
            return rawIter.getValue();
        }

        @Override
        public Progress getProgress() {
            return rawIter.getProgress();
        }

        @Override
        public DataInputBuffer getKey() throws IOException {
            return rawIter.getKey();
        }

        @Override
        public void close() throws IOException {
            rawIter.close();
        }
    };
    ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> rContext = new ReduceContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT>(conf, mrTaskAttemptID, r, null, combineInputRecordsCounter, recordWriter, null, reporter, comparator, keyClass, valClass);
    org.apache.hadoop.mapreduce.Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context reducerContext = new WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>().getReducerContext(rContext);
    return reducerContext;
}
Also used : ReduceContextImpl(org.apache.hadoop.mapreduce.task.ReduceContextImpl) Reducer(org.apache.hadoop.mapred.Reducer) WrappedReducer(org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer) TezRawKeyValueIterator(org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator) RawKeyValueIterator(org.apache.hadoop.mapred.RawKeyValueIterator)

Example 4 with WrappedReducer

use of org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer in project cdap by caskdata.

the class ReducerWrapper method createAutoFlushingContext.

private WrappedReducer.Context createAutoFlushingContext(final Context context, final BasicMapReduceTaskContext basicMapReduceContext) {
    // NOTE: we will change auto-flush to take into account size of buffered data, so no need to do/test a lot with
    //       current approach
    final int flushFreq = context.getConfiguration().getInt("c.reducer.flush.freq", 10000);
    @SuppressWarnings("unchecked") WrappedReducer.Context flushingContext = new WrappedReducer().new Context(context) {

        private int processedRecords = 0;

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            boolean result = super.nextKey();
            if (++processedRecords > flushFreq) {
                try {
                    LOG.trace("Flushing dataset operations...");
                    basicMapReduceContext.flushOperations();
                } catch (Exception e) {
                    LOG.error("Failed to persist changes", e);
                    throw Throwables.propagate(e);
                }
                processedRecords = 0;
            }
            return result;
        }
    };
    return flushingContext;
}
Also used : RuntimeContext(co.cask.cdap.api.RuntimeContext) WrappedReducer(org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer) IOException(java.io.IOException)

Example 5 with WrappedReducer

use of org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer in project ignite by apache.

the class HadoopV2ReduceTask method run0.

/**
 * {@inheritDoc}
 */
@SuppressWarnings({ "ConstantConditions", "unchecked" })
@Override
public void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException {
    OutputFormat outputFormat = null;
    Exception err = null;
    JobContextImpl jobCtx = taskCtx.jobContext();
    // Set mapper index for combiner tasks
    if (!reduce && taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();
    try {
        outputFormat = reduce || !taskCtx.job().info().hasReducer() ? prepareWriter(jobCtx) : null;
        Reducer reducer;
        if (reduce)
            reducer = ReflectionUtils.newInstance(jobCtx.getReducerClass(), jobCtx.getConfiguration());
        else
            reducer = ReflectionUtils.newInstance(jobCtx.getCombinerClass(), jobCtx.getConfiguration());
        try {
            reducer.run(new WrappedReducer().getReducerContext(hadoopContext()));
            if (!reduce)
                taskCtx.onMapperFinished();
        } finally {
            closeWriter();
        }
        commit(outputFormat);
    } catch (InterruptedException e) {
        err = e;
        Thread.currentThread().interrupt();
        throw new IgniteInterruptedCheckedException(e);
    } catch (Exception e) {
        err = e;
        throw new IgniteCheckedException(e);
    } finally {
        if (!reduce)
            HadoopMapperUtils.clearMapperIndex();
        if (err != null)
            abort(outputFormat);
    }
}
Also used : IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) OutputFormat(org.apache.hadoop.mapreduce.OutputFormat) WrappedReducer(org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer) WrappedReducer(org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer) Reducer(org.apache.hadoop.mapreduce.Reducer) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException)

Aggregations

WrappedReducer (org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer)7 RawKeyValueIterator (org.apache.hadoop.mapred.RawKeyValueIterator)4 ReduceContextImpl (org.apache.hadoop.mapreduce.task.ReduceContextImpl)4 RuntimeContext (co.cask.cdap.api.RuntimeContext)2 IOException (java.io.IOException)2 CustomOutputCommitter (org.apache.hadoop.CustomOutputCommitter)2 Configuration (org.apache.hadoop.conf.Configuration)2 NullWritable (org.apache.hadoop.io.NullWritable)2 SleepReducer (org.apache.hadoop.mapred.gridmix.SleepJob.SleepReducer)2 Counter (org.apache.hadoop.mapreduce.Counter)2 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)2 StatusReporter (org.apache.hadoop.mapreduce.StatusReporter)2 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)2 GenericCounter (org.apache.hadoop.mapreduce.counters.GenericCounter)2 DummyReporter (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl.DummyReporter)2 TezRawKeyValueIterator (org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator)2 Test (org.junit.Test)2 JobContextImpl (org.apache.hadoop.mapred.JobContextImpl)1 Reducer (org.apache.hadoop.mapred.Reducer)1 OutputFormat (org.apache.hadoop.mapreduce.OutputFormat)1