use of org.apache.tez.runtime.library.api.KeyValuesReader in project tez by apache.
the class TestSortedGroupedMergedInput method testSkippedKey3.
// Reads all values for a key, but doesn't trigger the last hasNext() call.
@Test(timeout = 5000)
public void testSkippedKey3() throws Exception {
SortedTestKeyValuesReader kvsReader1 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3, 4 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 }, { 4, 4 } });
SortedTestKeyValuesReader kvsReader2 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3, 4 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 }, { 4, 4 } });
SortedTestKeyValuesReader kvsReader3 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3, 4 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 }, { 4, 4 } });
SortedTestInput sInput1 = new SortedTestInput(kvsReader1);
SortedTestInput sInput2 = new SortedTestInput(kvsReader2);
SortedTestInput sInput3 = new SortedTestInput(kvsReader3);
List<Input> sInputs = new LinkedList<Input>();
sInputs.add(sInput1);
sInputs.add(sInput2);
sInputs.add(sInput3);
OrderedGroupedMergedKVInput input = new OrderedGroupedMergedKVInput(createMergedInputContext(), sInputs);
KeyValuesReader kvsReader = input.getReader();
int keyCount = 0;
while (kvsReader.next()) {
keyCount++;
if (keyCount == 2 || keyCount == 3) {
continue;
}
Integer key = (Integer) kvsReader.getCurrentKey();
assertEquals(Integer.valueOf(keyCount), key);
Iterator<Object> valuesIter = kvsReader.getCurrentValues().iterator();
int valCount = 0;
while (valuesIter.hasNext()) {
valCount++;
Integer val = (Integer) valuesIter.next();
assertEquals(Integer.valueOf(keyCount), val);
if (keyCount == 1 && valCount == 6) {
// Avoid last hasNext on iterator
break;
}
}
assertEquals(6, valCount);
}
getNextFromFinishedReader(kvsReader);
}
use of org.apache.tez.runtime.library.api.KeyValuesReader in project tez by apache.
the class TestSortedGroupedMergedInput method testSimpleConcatenatedMergedKeyValuesInput.
@Test(timeout = 5000)
public void testSimpleConcatenatedMergedKeyValuesInput() throws Exception {
SortedTestKeyValuesReader kvsReader1 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
SortedTestKeyValuesReader kvsReader2 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
SortedTestKeyValuesReader kvsReader3 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
SortedTestInput sInput1 = new SortedTestInput(kvsReader1);
SortedTestInput sInput2 = new SortedTestInput(kvsReader2);
SortedTestInput sInput3 = new SortedTestInput(kvsReader3);
List<Input> sInputs = new LinkedList<Input>();
sInputs.add(sInput1);
sInputs.add(sInput2);
sInputs.add(sInput3);
MergedInputContext mockContext = createMergedInputContext();
ConcatenatedMergedKeyValuesInput input = new ConcatenatedMergedKeyValuesInput(mockContext, sInputs);
KeyValuesReader kvsReader = input.getReader();
int keyCount = 0;
while (kvsReader.next()) {
keyCount++;
Integer key = (Integer) kvsReader.getCurrentKey();
Iterator<Object> valuesIter = kvsReader.getCurrentValues().iterator();
int valCount = 0;
while (valuesIter.hasNext()) {
valCount++;
Integer val = (Integer) valuesIter.next();
}
assertEquals(2, valCount);
}
assertEquals(9, keyCount);
// one for each reader change and one to exit
verify(mockContext, times(4)).notifyProgress();
getNextFromFinishedReader(kvsReader);
}
use of org.apache.tez.runtime.library.api.KeyValuesReader in project tez by apache.
the class TestSortedGroupedMergedInput method testSimple.
@Test(timeout = 5000)
public void testSimple() throws Exception {
SortedTestKeyValuesReader kvsReader1 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
SortedTestKeyValuesReader kvsReader2 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
SortedTestKeyValuesReader kvsReader3 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
SortedTestInput sInput1 = new SortedTestInput(kvsReader1);
SortedTestInput sInput2 = new SortedTestInput(kvsReader2);
SortedTestInput sInput3 = new SortedTestInput(kvsReader3);
List<Input> sInputs = new LinkedList<Input>();
sInputs.add(sInput1);
sInputs.add(sInput2);
sInputs.add(sInput3);
MergedInputContext mockContext = createMergedInputContext();
OrderedGroupedMergedKVInput input = new OrderedGroupedMergedKVInput(mockContext, sInputs);
KeyValuesReader kvsReader = input.getReader();
int keyCount = 0;
while (kvsReader.next()) {
keyCount++;
Integer key = (Integer) kvsReader.getCurrentKey();
assertEquals(Integer.valueOf(keyCount), key);
Iterator<Object> valuesIter = kvsReader.getCurrentValues().iterator();
int valCount = 0;
while (valuesIter.hasNext()) {
valCount++;
Integer val = (Integer) valuesIter.next();
assertEquals(Integer.valueOf(keyCount), val);
}
assertEquals(6, valCount);
}
// one for each reader change and one to exit
verify(mockContext, times(4)).notifyProgress();
getNextFromFinishedReader(kvsReader);
}
use of org.apache.tez.runtime.library.api.KeyValuesReader in project tez by apache.
the class TestSortedGroupedMergedInput method testSkippedKey2.
@Test(timeout = 5000)
public void testSkippedKey2() throws Exception {
SortedTestKeyValuesReader kvsReader1 = new SortedTestKeyValuesReader(new int[] { 2, 4 }, new int[][] { { 2, 2 }, { 4, 4 } });
SortedTestKeyValuesReader kvsReader2 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
SortedTestKeyValuesReader kvsReader3 = new SortedTestKeyValuesReader(new int[] { 4, 5, 6, 7 }, new int[][] { { 4, 4 }, { 5, 5 }, { 6, 6 }, { 7, 7 } });
SortedTestInput sInput1 = new SortedTestInput(kvsReader1);
SortedTestInput sInput2 = new SortedTestInput(kvsReader2);
SortedTestInput sInput3 = new SortedTestInput(kvsReader3);
List<Input> sInputs = new LinkedList<Input>();
sInputs.add(sInput1);
sInputs.add(sInput2);
sInputs.add(sInput3);
OrderedGroupedMergedKVInput input = new OrderedGroupedMergedKVInput(createMergedInputContext(), sInputs);
KeyValuesReader kvsReader = input.getReader();
int keyCount = 0;
while (kvsReader.next()) {
keyCount++;
if (keyCount == 4) {
continue;
}
Integer key = (Integer) kvsReader.getCurrentKey();
assertEquals(Integer.valueOf(keyCount), key);
Iterator<Object> valuesIter = kvsReader.getCurrentValues().iterator();
int valCount = 0;
while (valuesIter.hasNext()) {
valCount++;
Integer val = (Integer) valuesIter.next();
assertEquals(Integer.valueOf(keyCount), val);
}
if (keyCount == 1) {
assertEquals(2, valCount);
} else if (keyCount == 2) {
assertEquals(4, valCount);
} else if (keyCount == 3) {
assertEquals(2, valCount);
} else if (keyCount == 4) {
fail("Key 4 should have been skipped");
} else if (keyCount == 5 || keyCount == 6 || keyCount == 7) {
assertEquals(2, valCount);
} else {
fail("Unexpected key");
}
}
getNextFromFinishedReader(kvsReader);
}
use of org.apache.tez.runtime.library.api.KeyValuesReader in project tez by apache.
the class ReduceProcessor method run.
@Override
public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception {
this.inputs = _inputs;
this.outputs = _outputs;
progressHelper = new ProgressHelper(this.inputs, processorContext, this.getClass().getSimpleName());
LOG.info("Running reduce: " + processorContext.getUniqueIdentifier());
if (_outputs.size() <= 0 || _outputs.size() > 1) {
throw new IOException("Invalid number of _outputs" + ", outputCount=" + _outputs.size());
}
if (_inputs.size() <= 0 || _inputs.size() > 1) {
throw new IOException("Invalid number of _inputs" + ", inputCount=" + _inputs.size());
}
LogicalInput in = _inputs.values().iterator().next();
in.start();
List<Input> pendingInputs = new LinkedList<Input>();
pendingInputs.add(in);
processorContext.waitForAllInputsReady(pendingInputs);
LOG.info("Input is ready for consumption. Starting Output");
LogicalOutput out = _outputs.values().iterator().next();
out.start();
initTask(out);
progressHelper.scheduleProgressTaskService(0, 100);
this.statusUpdate();
Class keyClass = ConfigUtils.getIntermediateInputKeyClass(jobConf);
Class valueClass = ConfigUtils.getIntermediateInputValueClass(jobConf);
LOG.info("Using keyClass: " + keyClass);
LOG.info("Using valueClass: " + valueClass);
RawComparator comparator = ConfigUtils.getInputKeySecondaryGroupingComparator(jobConf);
LOG.info("Using comparator: " + comparator);
reduceInputKeyCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_GROUPS);
reduceInputValueCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_RECORDS);
// Sanity check
if (!(in instanceof OrderedGroupedInputLegacy)) {
throw new IOException("Illegal input to reduce: " + in.getClass());
}
OrderedGroupedInputLegacy shuffleInput = (OrderedGroupedInputLegacy) in;
KeyValuesReader kvReader = shuffleInput.getReader();
KeyValueWriter kvWriter = null;
if ((out instanceof MROutputLegacy)) {
kvWriter = ((MROutputLegacy) out).getWriter();
} else if ((out instanceof OrderedPartitionedKVOutput)) {
kvWriter = ((OrderedPartitionedKVOutput) out).getWriter();
} else {
throw new IOException("Illegal output to reduce: " + in.getClass());
}
if (useNewApi) {
try {
runNewReducer(jobConf, mrReporter, shuffleInput, comparator, keyClass, valueClass, kvWriter);
} catch (ClassNotFoundException cnfe) {
throw new IOException(cnfe);
}
} else {
runOldReducer(jobConf, mrReporter, kvReader, comparator, keyClass, valueClass, kvWriter);
}
done();
}
Aggregations