Search in sources :

Example 1 with KeyValuesReader

use of org.apache.tez.runtime.library.api.KeyValuesReader in project tez by apache.

the class TestSortedGroupedMergedInput method testSkippedKey3.

// Reads all values for a key, but doesn't trigger the last hasNext() call.
@Test(timeout = 5000)
public void testSkippedKey3() throws Exception {
    SortedTestKeyValuesReader kvsReader1 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3, 4 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 }, { 4, 4 } });
    SortedTestKeyValuesReader kvsReader2 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3, 4 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 }, { 4, 4 } });
    SortedTestKeyValuesReader kvsReader3 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3, 4 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 }, { 4, 4 } });
    SortedTestInput sInput1 = new SortedTestInput(kvsReader1);
    SortedTestInput sInput2 = new SortedTestInput(kvsReader2);
    SortedTestInput sInput3 = new SortedTestInput(kvsReader3);
    List<Input> sInputs = new LinkedList<Input>();
    sInputs.add(sInput1);
    sInputs.add(sInput2);
    sInputs.add(sInput3);
    OrderedGroupedMergedKVInput input = new OrderedGroupedMergedKVInput(createMergedInputContext(), sInputs);
    KeyValuesReader kvsReader = input.getReader();
    int keyCount = 0;
    while (kvsReader.next()) {
        keyCount++;
        if (keyCount == 2 || keyCount == 3) {
            continue;
        }
        Integer key = (Integer) kvsReader.getCurrentKey();
        assertEquals(Integer.valueOf(keyCount), key);
        Iterator<Object> valuesIter = kvsReader.getCurrentValues().iterator();
        int valCount = 0;
        while (valuesIter.hasNext()) {
            valCount++;
            Integer val = (Integer) valuesIter.next();
            assertEquals(Integer.valueOf(keyCount), val);
            if (keyCount == 1 && valCount == 6) {
                // Avoid last hasNext on iterator
                break;
            }
        }
        assertEquals(6, valCount);
    }
    getNextFromFinishedReader(kvsReader);
}
Also used : Input(org.apache.tez.runtime.api.Input) KeyValuesReader(org.apache.tez.runtime.library.api.KeyValuesReader) LinkedList(java.util.LinkedList) Test(org.junit.Test)

Example 2 with KeyValuesReader

use of org.apache.tez.runtime.library.api.KeyValuesReader in project tez by apache.

the class TestSortedGroupedMergedInput method testSimpleConcatenatedMergedKeyValuesInput.

@Test(timeout = 5000)
public void testSimpleConcatenatedMergedKeyValuesInput() throws Exception {
    SortedTestKeyValuesReader kvsReader1 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
    SortedTestKeyValuesReader kvsReader2 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
    SortedTestKeyValuesReader kvsReader3 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
    SortedTestInput sInput1 = new SortedTestInput(kvsReader1);
    SortedTestInput sInput2 = new SortedTestInput(kvsReader2);
    SortedTestInput sInput3 = new SortedTestInput(kvsReader3);
    List<Input> sInputs = new LinkedList<Input>();
    sInputs.add(sInput1);
    sInputs.add(sInput2);
    sInputs.add(sInput3);
    MergedInputContext mockContext = createMergedInputContext();
    ConcatenatedMergedKeyValuesInput input = new ConcatenatedMergedKeyValuesInput(mockContext, sInputs);
    KeyValuesReader kvsReader = input.getReader();
    int keyCount = 0;
    while (kvsReader.next()) {
        keyCount++;
        Integer key = (Integer) kvsReader.getCurrentKey();
        Iterator<Object> valuesIter = kvsReader.getCurrentValues().iterator();
        int valCount = 0;
        while (valuesIter.hasNext()) {
            valCount++;
            Integer val = (Integer) valuesIter.next();
        }
        assertEquals(2, valCount);
    }
    assertEquals(9, keyCount);
    // one for each reader change and one to exit
    verify(mockContext, times(4)).notifyProgress();
    getNextFromFinishedReader(kvsReader);
}
Also used : Input(org.apache.tez.runtime.api.Input) KeyValuesReader(org.apache.tez.runtime.library.api.KeyValuesReader) LinkedList(java.util.LinkedList) MergedInputContext(org.apache.tez.runtime.api.MergedInputContext) Test(org.junit.Test)

Example 3 with KeyValuesReader

use of org.apache.tez.runtime.library.api.KeyValuesReader in project tez by apache.

the class TestSortedGroupedMergedInput method testSimple.

@Test(timeout = 5000)
public void testSimple() throws Exception {
    SortedTestKeyValuesReader kvsReader1 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
    SortedTestKeyValuesReader kvsReader2 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
    SortedTestKeyValuesReader kvsReader3 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
    SortedTestInput sInput1 = new SortedTestInput(kvsReader1);
    SortedTestInput sInput2 = new SortedTestInput(kvsReader2);
    SortedTestInput sInput3 = new SortedTestInput(kvsReader3);
    List<Input> sInputs = new LinkedList<Input>();
    sInputs.add(sInput1);
    sInputs.add(sInput2);
    sInputs.add(sInput3);
    MergedInputContext mockContext = createMergedInputContext();
    OrderedGroupedMergedKVInput input = new OrderedGroupedMergedKVInput(mockContext, sInputs);
    KeyValuesReader kvsReader = input.getReader();
    int keyCount = 0;
    while (kvsReader.next()) {
        keyCount++;
        Integer key = (Integer) kvsReader.getCurrentKey();
        assertEquals(Integer.valueOf(keyCount), key);
        Iterator<Object> valuesIter = kvsReader.getCurrentValues().iterator();
        int valCount = 0;
        while (valuesIter.hasNext()) {
            valCount++;
            Integer val = (Integer) valuesIter.next();
            assertEquals(Integer.valueOf(keyCount), val);
        }
        assertEquals(6, valCount);
    }
    // one for each reader change and one to exit
    verify(mockContext, times(4)).notifyProgress();
    getNextFromFinishedReader(kvsReader);
}
Also used : Input(org.apache.tez.runtime.api.Input) KeyValuesReader(org.apache.tez.runtime.library.api.KeyValuesReader) LinkedList(java.util.LinkedList) MergedInputContext(org.apache.tez.runtime.api.MergedInputContext) Test(org.junit.Test)

Example 4 with KeyValuesReader

use of org.apache.tez.runtime.library.api.KeyValuesReader in project tez by apache.

the class TestSortedGroupedMergedInput method testSkippedKey2.

@Test(timeout = 5000)
public void testSkippedKey2() throws Exception {
    SortedTestKeyValuesReader kvsReader1 = new SortedTestKeyValuesReader(new int[] { 2, 4 }, new int[][] { { 2, 2 }, { 4, 4 } });
    SortedTestKeyValuesReader kvsReader2 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
    SortedTestKeyValuesReader kvsReader3 = new SortedTestKeyValuesReader(new int[] { 4, 5, 6, 7 }, new int[][] { { 4, 4 }, { 5, 5 }, { 6, 6 }, { 7, 7 } });
    SortedTestInput sInput1 = new SortedTestInput(kvsReader1);
    SortedTestInput sInput2 = new SortedTestInput(kvsReader2);
    SortedTestInput sInput3 = new SortedTestInput(kvsReader3);
    List<Input> sInputs = new LinkedList<Input>();
    sInputs.add(sInput1);
    sInputs.add(sInput2);
    sInputs.add(sInput3);
    OrderedGroupedMergedKVInput input = new OrderedGroupedMergedKVInput(createMergedInputContext(), sInputs);
    KeyValuesReader kvsReader = input.getReader();
    int keyCount = 0;
    while (kvsReader.next()) {
        keyCount++;
        if (keyCount == 4) {
            continue;
        }
        Integer key = (Integer) kvsReader.getCurrentKey();
        assertEquals(Integer.valueOf(keyCount), key);
        Iterator<Object> valuesIter = kvsReader.getCurrentValues().iterator();
        int valCount = 0;
        while (valuesIter.hasNext()) {
            valCount++;
            Integer val = (Integer) valuesIter.next();
            assertEquals(Integer.valueOf(keyCount), val);
        }
        if (keyCount == 1) {
            assertEquals(2, valCount);
        } else if (keyCount == 2) {
            assertEquals(4, valCount);
        } else if (keyCount == 3) {
            assertEquals(2, valCount);
        } else if (keyCount == 4) {
            fail("Key 4 should have been skipped");
        } else if (keyCount == 5 || keyCount == 6 || keyCount == 7) {
            assertEquals(2, valCount);
        } else {
            fail("Unexpected key");
        }
    }
    getNextFromFinishedReader(kvsReader);
}
Also used : Input(org.apache.tez.runtime.api.Input) KeyValuesReader(org.apache.tez.runtime.library.api.KeyValuesReader) LinkedList(java.util.LinkedList) Test(org.junit.Test)

Example 5 with KeyValuesReader

use of org.apache.tez.runtime.library.api.KeyValuesReader in project tez by apache.

the class ReduceProcessor method run.

@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
    this.inputs = _inputs;
    this.outputs = _outputs;
    progressHelper = new ProgressHelper(this.inputs, processorContext, this.getClass().getSimpleName());
    LOG.info("Running reduce: " + processorContext.getUniqueIdentifier());
    if (_outputs.size() <= 0 || _outputs.size() > 1) {
        throw new IOException("Invalid number of _outputs" + ", outputCount=" + _outputs.size());
    }
    if (_inputs.size() <= 0 || _inputs.size() > 1) {
        throw new IOException("Invalid number of _inputs" + ", inputCount=" + _inputs.size());
    }
    LogicalInput in = _inputs.values().iterator().next();
    in.start();
    List<Input> pendingInputs = new LinkedList<Input>();
    pendingInputs.add(in);
    processorContext.waitForAllInputsReady(pendingInputs);
    LOG.info("Input is ready for consumption. Starting Output");
    LogicalOutput out = _outputs.values().iterator().next();
    out.start();
    initTask(out);
    progressHelper.scheduleProgressTaskService(0, 100);
    this.statusUpdate();
    Class keyClass = ConfigUtils.getIntermediateInputKeyClass(jobConf);
    Class valueClass = ConfigUtils.getIntermediateInputValueClass(jobConf);
    LOG.info("Using keyClass: " + keyClass);
    LOG.info("Using valueClass: " + valueClass);
    RawComparator comparator = ConfigUtils.getInputKeySecondaryGroupingComparator(jobConf);
    LOG.info("Using comparator: " + comparator);
    reduceInputKeyCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_GROUPS);
    reduceInputValueCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_RECORDS);
    // Sanity check
    if (!(in instanceof OrderedGroupedInputLegacy)) {
        throw new IOException("Illegal input to reduce: " + in.getClass());
    }
    OrderedGroupedInputLegacy shuffleInput = (OrderedGroupedInputLegacy) in;
    KeyValuesReader kvReader = shuffleInput.getReader();
    KeyValueWriter kvWriter = null;
    if ((out instanceof MROutputLegacy)) {
        kvWriter = ((MROutputLegacy) out).getWriter();
    } else if ((out instanceof OrderedPartitionedKVOutput)) {
        kvWriter = ((OrderedPartitionedKVOutput) out).getWriter();
    } else {
        throw new IOException("Illegal output to reduce: " + in.getClass());
    }
    if (useNewApi) {
        try {
            runNewReducer(jobConf, mrReporter, shuffleInput, comparator, keyClass, valueClass, kvWriter);
        } catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe);
        }
    } else {
        runOldReducer(jobConf, mrReporter, kvReader, comparator, keyClass, valueClass, kvWriter);
    }
    done();
}
Also used : OrderedGroupedInputLegacy(org.apache.tez.runtime.library.input.OrderedGroupedInputLegacy) ProgressHelper(org.apache.tez.common.ProgressHelper) LogicalOutput(org.apache.tez.runtime.api.LogicalOutput) MROutputLegacy(org.apache.tez.mapreduce.output.MROutputLegacy) OrderedPartitionedKVOutput(org.apache.tez.runtime.library.output.OrderedPartitionedKVOutput) IOException(java.io.IOException) LinkedList(java.util.LinkedList) KeyValueWriter(org.apache.tez.runtime.library.api.KeyValueWriter) RawComparator(org.apache.hadoop.io.RawComparator) LogicalInput(org.apache.tez.runtime.api.LogicalInput) Input(org.apache.tez.runtime.api.Input) LogicalInput(org.apache.tez.runtime.api.LogicalInput) KeyValuesReader(org.apache.tez.runtime.library.api.KeyValuesReader)

Aggregations

KeyValuesReader (org.apache.tez.runtime.library.api.KeyValuesReader)13 LinkedList (java.util.LinkedList)9 Input (org.apache.tez.runtime.api.Input)9 Test (org.junit.Test)8 ArrayList (java.util.ArrayList)2 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)2 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)2 BinarySortableSerDe (org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe)2 BinarySortableDeserializeRead (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead)2 LazyBinaryDeserializeRead (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)2 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)2 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)2 MergedInputContext (org.apache.tez.runtime.api.MergedInputContext)2 KeyValueReader (org.apache.tez.runtime.library.api.KeyValueReader)2 IOException (java.io.IOException)1 VectorExpressionWriter (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter)1 RawComparator (org.apache.hadoop.io.RawComparator)1 ProgressHelper (org.apache.tez.common.ProgressHelper)1 MROutputLegacy (org.apache.tez.mapreduce.output.MROutputLegacy)1 LogicalInput (org.apache.tez.runtime.api.LogicalInput)1