Search in sources :

Example 6 with TezRawKeyValueIterator

use of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator in project tez by apache.

the class MRCombiner method createReduceContext.

private static <KEYIN, VALUEIN, KEYOUT, VALUEOUT> org.apache.hadoop.mapreduce.Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context createReduceContext(Configuration conf, TaskAttemptID mrTaskAttemptID, final TezRawKeyValueIterator rawIter, Counter combineInputRecordsCounter, Counter combineOutputRecordsCounter, RecordWriter<KEYOUT, VALUEOUT> recordWriter, MRTaskReporter reporter, RawComparator<KEYIN> comparator, Class<KEYIN> keyClass, Class<VALUEIN> valClass) throws InterruptedException, IOException {
    RawKeyValueIterator r = new RawKeyValueIterator() {

        @Override
        public boolean next() throws IOException {
            return rawIter.next();
        }

        @Override
        public DataInputBuffer getValue() throws IOException {
            return rawIter.getValue();
        }

        @Override
        public Progress getProgress() {
            return rawIter.getProgress();
        }

        @Override
        public DataInputBuffer getKey() throws IOException {
            return rawIter.getKey();
        }

        @Override
        public void close() throws IOException {
            rawIter.close();
        }
    };
    ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> rContext = new ReduceContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT>(conf, mrTaskAttemptID, r, null, combineInputRecordsCounter, recordWriter, null, reporter, comparator, keyClass, valClass);
    org.apache.hadoop.mapreduce.Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context reducerContext = new WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>().getReducerContext(rContext);
    return reducerContext;
}
Also used : ReduceContextImpl(org.apache.hadoop.mapreduce.task.ReduceContextImpl) Reducer(org.apache.hadoop.mapred.Reducer) WrappedReducer(org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer) TezRawKeyValueIterator(org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator) RawKeyValueIterator(org.apache.hadoop.mapred.RawKeyValueIterator)

Example 7 with TezRawKeyValueIterator

use of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator in project tez by apache.

the class MergeManager method close.

public TezRawKeyValueIterator close(boolean tryFinalMerge) throws Throwable {
    if (!isShutdown.getAndSet(true)) {
        // Wait for on-going merges to complete
        if (memToMemMerger != null) {
            memToMemMerger.close();
        }
        inMemoryMerger.close();
        onDiskMerger.close();
        List<MapOutput> memory = new ArrayList<MapOutput>(inMemoryMergedMapOutputs);
        inMemoryMergedMapOutputs.clear();
        memory.addAll(inMemoryMapOutputs);
        inMemoryMapOutputs.clear();
        List<FileChunk> disk = new ArrayList<FileChunk>(onDiskMapOutputs);
        onDiskMapOutputs.clear();
        if (statsInMemTotal.count > 0) {
            LOG.info("TotalInMemFetchStats: count={}, totalSize={}, min={}, max={}, avg={}", statsInMemTotal.count, statsInMemTotal.size, statsInMemTotal.minSize, statsInMemTotal.maxSize, (statsInMemTotal.size / (float) statsInMemTotal.size));
        }
        // shuffle exception / error.
        if (tryFinalMerge) {
            try {
                TezRawKeyValueIterator kvIter = finalMerge(conf, rfs, memory, disk);
                this.finalMergeComplete = true;
                return kvIter;
            } catch (InterruptedException e) {
                // Cleanup the disk segments
                if (cleanup) {
                    cleanup(localFS, disk);
                    cleanup(localFS, onDiskMapOutputs);
                }
                // reset interrupt status
                Thread.currentThread().interrupt();
                throw e;
            }
        }
    }
    return null;
}
Also used : ArrayList(java.util.ArrayList) FileChunk(org.apache.hadoop.io.FileChunk) TezRawKeyValueIterator(org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator)

Example 8 with TezRawKeyValueIterator

use of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator in project tez by apache.

the class OrderedGroupedKVInput method getReader.

/**
 * Get a KVReader for the Input.</p> This method will block until the input is
 * ready - i.e. the copy and merge stages are complete. Users can use the
 * isInputReady method to check if the input is ready, which gives an
 * indication of whether this method will block or not.
 *
 * NOTE: All values for the current K-V pair must be read prior to invoking
 * moveToNext. Once moveToNext() is called, the valueIterator from the
 * previous K-V pair will throw an Exception
 *
 * @return a KVReader over the sorted input.
 * @throws {@link IOInterruptedException} if IO was performing a blocking operation and was interrupted
 */
@Override
public KeyValuesReader getReader() throws IOException, TezException {
    // Cannot synchronize entire method since this is called form user code and can block.
    TezRawKeyValueIterator rawIterLocal;
    synchronized (this) {
        rawIterLocal = rawIter;
        if (getNumPhysicalInputs() == 0) {
            return new KeyValuesReader() {

                @Override
                public boolean next() throws IOException {
                    getContext().notifyProgress();
                    hasCompletedProcessing();
                    completedProcessing = true;
                    return false;
                }

                @Override
                public Object getCurrentKey() throws IOException {
                    throw new RuntimeException("No data available in Input");
                }

                @Override
                public Iterable<Object> getCurrentValues() throws IOException {
                    throw new RuntimeException("No data available in Input");
                }
            };
        }
    }
    if (rawIterLocal == null) {
        try {
            waitForInputReady();
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new IOInterruptedException("Interrupted while waiting for input ready", e);
        }
    }
    @SuppressWarnings("rawtypes") ValuesIterator valuesIter = null;
    synchronized (this) {
        valuesIter = vIter;
    }
    return new OrderedGroupedKeyValuesReader(valuesIter, getContext());
}
Also used : IOInterruptedException(org.apache.tez.runtime.library.api.IOInterruptedException) KeyValuesReader(org.apache.tez.runtime.library.api.KeyValuesReader) ValuesIterator(org.apache.tez.runtime.library.common.ValuesIterator) IOInterruptedException(org.apache.tez.runtime.library.api.IOInterruptedException) TezRawKeyValueIterator(org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator)

Example 9 with TezRawKeyValueIterator

use of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator in project tez by apache.

the class ReduceProcessor method runNewReducer.

void runNewReducer(JobConf job, final MRTaskReporter reporter, OrderedGroupedInputLegacy input, RawComparator comparator, Class keyClass, Class valueClass, final KeyValueWriter out) throws IOException, InterruptedException, ClassNotFoundException, TezException {
    // make a task context so we can get the classes
    org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = getTaskAttemptContext();
    // make a reducer
    org.apache.hadoop.mapreduce.Reducer reducer = (org.apache.hadoop.mapreduce.Reducer) ReflectionUtils.newInstance(taskContext.getReducerClass(), job);
    // wrap value iterator to report progress.
    final TezRawKeyValueIterator rawIter = input.getIterator();
    TezRawKeyValueIterator rIter = new TezRawKeyValueIterator() {

        public void close() throws IOException {
            rawIter.close();
        }

        public DataInputBuffer getKey() throws IOException {
            return rawIter.getKey();
        }

        public Progress getProgress() {
            return rawIter.getProgress();
        }

        @Override
        public boolean isSameKey() throws IOException {
            return rawIter.isSameKey();
        }

        public DataInputBuffer getValue() throws IOException {
            return rawIter.getValue();
        }

        @Override
        public boolean hasNext() throws IOException {
            return rawIter.hasNext();
        }

        public boolean next() throws IOException {
            boolean ret = rawIter.next();
            reporter.setProgress(rawIter.getProgress().getProgress());
            return ret;
        }
    };
    org.apache.hadoop.mapreduce.RecordWriter trackedRW = new org.apache.hadoop.mapreduce.RecordWriter() {

        @Override
        public void write(Object key, Object value) throws IOException, InterruptedException {
            out.write(key, value);
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
        }
    };
    org.apache.hadoop.mapreduce.Reducer.Context reducerContext = createReduceContext(reducer, job, taskAttemptId, rIter, reduceInputKeyCounter, reduceInputValueCounter, trackedRW, committer, reporter, comparator, keyClass, valueClass);
    reducer.run(reducerContext);
    // Set progress to 1.0f if there was no exception,
    reporter.setProgress(1.0f);
    trackedRW.close(reducerContext);
}
Also used : TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Reducer(org.apache.hadoop.mapred.Reducer) TezRawKeyValueIterator(org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator)

Example 10 with TezRawKeyValueIterator

use of org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator in project tez by apache.

the class MRTask method createReduceContext.

protected static <INKEY, INVALUE, OUTKEY, OUTVALUE> org.apache.hadoop.mapreduce.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE>.Context createReduceContext(org.apache.hadoop.mapreduce.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE> reducer, Configuration job, TaskAttemptID taskId, final TezRawKeyValueIterator rIter, org.apache.hadoop.mapreduce.Counter inputKeyCounter, org.apache.hadoop.mapreduce.Counter inputValueCounter, org.apache.hadoop.mapreduce.RecordWriter<OUTKEY, OUTVALUE> output, org.apache.hadoop.mapreduce.OutputCommitter committer, org.apache.hadoop.mapreduce.StatusReporter reporter, RawComparator<INKEY> comparator, Class<INKEY> keyClass, Class<INVALUE> valueClass) throws IOException, InterruptedException {
    RawKeyValueIterator r = new RawKeyValueIterator() {

        @Override
        public boolean next() throws IOException {
            return rIter.next();
        }

        @Override
        public DataInputBuffer getValue() throws IOException {
            return rIter.getValue();
        }

        @Override
        public Progress getProgress() {
            return rIter.getProgress();
        }

        @Override
        public DataInputBuffer getKey() throws IOException {
            return rIter.getKey();
        }

        @Override
        public void close() throws IOException {
            rIter.close();
        }
    };
    org.apache.hadoop.mapreduce.ReduceContext<INKEY, INVALUE, OUTKEY, OUTVALUE> reduceContext = new ReduceContextImpl<INKEY, INVALUE, OUTKEY, OUTVALUE>(job, taskId, r, inputKeyCounter, inputValueCounter, output, committer, reporter, comparator, keyClass, valueClass);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Using key class: " + keyClass + ", valueClass: " + valueClass);
    }
    org.apache.hadoop.mapreduce.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE>.Context reducerContext = new WrappedReducer<INKEY, INVALUE, OUTKEY, OUTVALUE>().getReducerContext(reduceContext);
    return reducerContext;
}
Also used : ReduceContextImpl(org.apache.hadoop.mapreduce.task.ReduceContextImpl) TezRawKeyValueIterator(org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator) RawKeyValueIterator(org.apache.hadoop.mapred.RawKeyValueIterator) WrappedReducer(org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer)

Aggregations

TezRawKeyValueIterator (org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator)10 ArrayList (java.util.ArrayList)3 Path (org.apache.hadoop.fs.Path)3 Writer (org.apache.tez.runtime.library.common.sort.impl.IFile.Writer)3 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)2 FileChunk (org.apache.hadoop.io.FileChunk)2 RawKeyValueIterator (org.apache.hadoop.mapred.RawKeyValueIterator)2 Reducer (org.apache.hadoop.mapred.Reducer)2 WrappedReducer (org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer)2 ReduceContextImpl (org.apache.hadoop.mapreduce.task.ReduceContextImpl)2 TezIndexRecord (org.apache.tez.runtime.library.common.sort.impl.TezIndexRecord)2 DiskSegment (org.apache.tez.runtime.library.common.sort.impl.TezMerger.DiskSegment)2 Segment (org.apache.tez.runtime.library.common.sort.impl.TezMerger.Segment)2 TezSpillRecord (org.apache.tez.runtime.library.common.sort.impl.TezSpillRecord)2 IOException (java.io.IOException)1 ExecutionException (java.util.concurrent.ExecutionException)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)1 RawComparator (org.apache.hadoop.io.RawComparator)1 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)1