use of org.apache.tez.runtime.library.api.KeyValuesReader in project hive by apache.
the class KeyValuesInputMerger method next.
/**
* @return true if there are more key-values and advances to next key-values
* @throws IOException
*/
@Override
public boolean next() throws IOException {
// add the previous nextKVReader back to queue
if (!nextKVReaders.isEmpty()) {
for (KeyValuesReader kvReader : nextKVReaders) {
addToQueue(kvReader);
}
nextKVReaders.clear();
}
KeyValuesReader nextKVReader = null;
// get the new nextKVReader with lowest key
nextKVReader = pQueue.poll();
if (nextKVReader != null) {
nextKVReaders.add(nextKVReader);
}
while (pQueue.peek() != null) {
KeyValuesReader equalValueKVReader = pQueue.poll();
if (pQueue.comparator().compare(nextKVReader, equalValueKVReader) == 0) {
nextKVReaders.add(equalValueKVReader);
} else {
pQueue.add(equalValueKVReader);
break;
}
}
return !(nextKVReaders.isEmpty());
}
use of org.apache.tez.runtime.library.api.KeyValuesReader in project hive by apache.
the class ReduceRecordSource method init.
void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, VectorizedRowBatchCtx batchContext, long vectorizedVertexNum, int vectorizedTestingReducerBatchSize) throws Exception {
this.vectorizedVertexNum = vectorizedVertexNum;
if (vectorizedTestingReducerBatchSize > VectorizedRowBatch.DEFAULT_SIZE) {
// For now, we don't go higher than the default batch size unless we do more work
// to verify every vectorized operator downstream can handle a larger batch size.
vectorizedTestingReducerBatchSize = VectorizedRowBatch.DEFAULT_SIZE;
}
this.vectorizedTestingReducerBatchSize = vectorizedTestingReducerBatchSize;
ObjectInspector keyObjectInspector;
this.reducer = reducer;
this.vectorized = vectorized;
this.keyTableDesc = keyTableDesc;
if (reader instanceof KeyValueReader) {
this.reader = new KeyValuesFromKeyValue((KeyValueReader) reader);
} else {
this.reader = new KeyValuesFromKeyValues((KeyValuesReader) reader);
}
this.handleGroupKey = handleGroupKey;
this.tag = tag;
try {
inputKeySerDe = ReflectionUtils.newInstance(keyTableDesc.getSerDeClass(), null);
inputKeySerDe.initialize(null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeySerDe.getObjectInspector();
if (vectorized) {
keyStructInspector = (StructObjectInspector) keyObjectInspector;
firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
}
// We should initialize the SerDe with the TypeInfo when available.
this.valueTableDesc = valueTableDesc;
inputValueSerDe = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getSerDeClass(), null);
inputValueSerDe.initialize(null, valueTableDesc.getProperties(), null);
valueObjectInspector = inputValueSerDe.getObjectInspector();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
if (vectorized) {
/* vectorization only works with struct object inspectors */
valueStructInspectors = (StructObjectInspector) valueObjectInspector;
final int totalColumns = firstValueColumnOffset + valueStructInspectors.getAllStructFieldRefs().size();
rowObjectInspector = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors);
batch = batchContext.createVectorizedRowBatch();
// Setup vectorized deserialization for the key and value.
BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeySerDe;
keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), (batchContext.getRowdataTypePhysicalVariations().length > firstValueColumnOffset) ? Arrays.copyOfRange(batchContext.getRowdataTypePhysicalVariations(), 0, firstValueColumnOffset) : batchContext.getRowdataTypePhysicalVariations(), /* useExternalBuffer */
true, binarySortableSerDe.getSortOrders(), binarySortableSerDe.getNullMarkers(), binarySortableSerDe.getNotNullMarkers()));
keyBinarySortableDeserializeToRow.init(0);
final int valuesSize = valueStructInspectors.getAllStructFieldRefs().size();
if (valuesSize > 0) {
valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspectors), (batchContext.getRowdataTypePhysicalVariations().length >= totalColumns) ? Arrays.copyOfRange(batchContext.getRowdataTypePhysicalVariations(), firstValueColumnOffset, totalColumns) : null, /* useExternalBuffer */
true));
valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
// Create data buffers for value bytes column vectors.
for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
ColumnVector colVector = batch.cols[i];
if (colVector instanceof BytesColumnVector) {
BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
bytesColumnVector.initBuffer();
}
}
}
} else {
ois.add(keyObjectInspector);
ois.add(valueObjectInspector);
rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
}
}
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
use of org.apache.tez.runtime.library.api.KeyValuesReader in project hive by apache.
the class ReduceRecordSource method init.
void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, VectorizedRowBatchCtx batchContext, long vectorizedVertexNum) throws Exception {
this.vectorizedVertexNum = vectorizedVertexNum;
ObjectInspector keyObjectInspector;
this.reducer = reducer;
this.vectorized = vectorized;
this.keyTableDesc = keyTableDesc;
if (reader instanceof KeyValueReader) {
this.reader = new KeyValuesFromKeyValue((KeyValueReader) reader);
} else {
this.reader = new KeyValuesFromKeyValues((KeyValuesReader) reader);
}
this.handleGroupKey = handleGroupKey;
this.tag = tag;
try {
inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeyDeserializer.getObjectInspector();
if (vectorized) {
keyStructInspector = (StructObjectInspector) keyObjectInspector;
firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
}
// We should initialize the SerDe with the TypeInfo when available.
this.valueTableDesc = valueTableDesc;
inputValueDeserializer = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputValueDeserializer, null, valueTableDesc.getProperties(), null);
valueObjectInspector = inputValueDeserializer.getObjectInspector();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
if (vectorized) {
/* vectorization only works with struct object inspectors */
valueStructInspectors = (StructObjectInspector) valueObjectInspector;
final int totalColumns = firstValueColumnOffset + valueStructInspectors.getAllStructFieldRefs().size();
valueStringWriters = new ArrayList<VectorExpressionWriter>(totalColumns);
valueStringWriters.addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(keyStructInspector)));
valueStringWriters.addAll(Arrays.asList(VectorExpressionWriterFactory.genVectorStructExpressionWritables(valueStructInspectors)));
rowObjectInspector = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors);
batch = batchContext.createVectorizedRowBatch();
// Setup vectorized deserialization for the key and value.
BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer;
keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), /* useExternalBuffer */
true, binarySortableSerDe.getSortOrders()));
keyBinarySortableDeserializeToRow.init(0);
final int valuesSize = valueStructInspectors.getAllStructFieldRefs().size();
if (valuesSize > 0) {
valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspectors), /* useExternalBuffer */
true));
valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
// Create data buffers for value bytes column vectors.
for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
ColumnVector colVector = batch.cols[i];
if (colVector instanceof BytesColumnVector) {
BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
bytesColumnVector.initBuffer();
}
}
}
} else {
ois.add(keyObjectInspector);
ois.add(valueObjectInspector);
rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
use of org.apache.tez.runtime.library.api.KeyValuesReader in project tez by apache.
the class TestSortedGroupedMergedInput method testSkippedKey.
@Test(timeout = 5000)
public void testSkippedKey() throws Exception {
SortedTestKeyValuesReader kvsReader1 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
SortedTestKeyValuesReader kvsReader2 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
SortedTestKeyValuesReader kvsReader3 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
SortedTestInput sInput1 = new SortedTestInput(kvsReader1);
SortedTestInput sInput2 = new SortedTestInput(kvsReader2);
SortedTestInput sInput3 = new SortedTestInput(kvsReader3);
List<Input> sInputs = new LinkedList<Input>();
sInputs.add(sInput1);
sInputs.add(sInput2);
sInputs.add(sInput3);
OrderedGroupedMergedKVInput input = new OrderedGroupedMergedKVInput(createMergedInputContext(), sInputs);
KeyValuesReader kvsReader = input.getReader();
int keyCount = 0;
while (kvsReader.next()) {
keyCount++;
if (keyCount == 2) {
continue;
}
Integer key = (Integer) kvsReader.getCurrentKey();
assertEquals(Integer.valueOf(keyCount), key);
Iterator<Object> valuesIter = kvsReader.getCurrentValues().iterator();
int valCount = 0;
while (valuesIter.hasNext()) {
valCount++;
Integer val = (Integer) valuesIter.next();
assertEquals(Integer.valueOf(keyCount), val);
}
assertEquals(6, valCount);
}
getNextFromFinishedReader(kvsReader);
}
use of org.apache.tez.runtime.library.api.KeyValuesReader in project tez by apache.
the class TestSortedGroupedMergedInput method testPartialValuesSkip.
@Test(timeout = 5000)
public void testPartialValuesSkip() throws Exception {
SortedTestKeyValuesReader kvsReader1 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
SortedTestKeyValuesReader kvsReader2 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
SortedTestKeyValuesReader kvsReader3 = new SortedTestKeyValuesReader(new int[] { 1, 2, 3 }, new int[][] { { 1, 1 }, { 2, 2 }, { 3, 3 } });
SortedTestInput sInput1 = new SortedTestInput(kvsReader1);
SortedTestInput sInput2 = new SortedTestInput(kvsReader2);
SortedTestInput sInput3 = new SortedTestInput(kvsReader3);
List<Input> sInputs = new LinkedList<Input>();
sInputs.add(sInput1);
sInputs.add(sInput2);
sInputs.add(sInput3);
OrderedGroupedMergedKVInput input = new OrderedGroupedMergedKVInput(createMergedInputContext(), sInputs);
KeyValuesReader kvsReader = input.getReader();
int keyCount = 0;
while (kvsReader.next()) {
keyCount++;
Integer key = (Integer) kvsReader.getCurrentKey();
assertEquals(Integer.valueOf(keyCount), key);
Iterator<Object> valuesIter = kvsReader.getCurrentValues().iterator();
int valCount = 0;
while (valuesIter.hasNext()) {
valCount++;
if (keyCount == 2 && valCount == 3) {
break;
}
Integer val = (Integer) valuesIter.next();
assertEquals(Integer.valueOf(keyCount), val);
}
if (keyCount == 2) {
assertEquals(3, valCount);
} else {
assertEquals(6, valCount);
}
}
getNextFromFinishedReader(kvsReader);
}
Aggregations