use of org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream in project hive by apache.
the class TestMapJoinOperator method createExpectedTestRowMultiSet.
/*
* Simulate the join by driving the test big table data by our test small table HashMap and
* create the expected output as a multi-set of TestRow (i.e. TestRow and occurrence count).
*/
private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescription testDesc, MapJoinTestData testData) throws HiveException {
RowTestObjectsMultiSet expectedTestRowMultiSet = new RowTestObjectsMultiSet();
VectorExtractRow vectorExtractRow = new VectorExtractRow();
vectorExtractRow.init(testDesc.bigTableKeyTypeInfos);
final int bigTableColumnCount = testDesc.bigTableTypeInfos.length;
Object[] bigTableRowObjects = new Object[bigTableColumnCount];
final int bigTableKeyColumnCount = testDesc.bigTableKeyTypeInfos.length;
Object[] bigTableKeyObjects = new Object[bigTableKeyColumnCount];
VectorBatchGenerateStream bigTableBatchStream = testData.getBigTableBatchStream();
VectorizedRowBatch batch = testData.getBigTableBatch();
bigTableBatchStream.reset();
while (bigTableBatchStream.isNext()) {
batch.reset();
bigTableBatchStream.fillNext(batch);
final int size = testData.bigTableBatch.size;
for (int r = 0; r < size; r++) {
vectorExtractRow.extractRow(testData.bigTableBatch, r, bigTableRowObjects);
// Form key object array
for (int k = 0; k < bigTableKeyColumnCount; k++) {
int keyColumnNum = testDesc.bigTableKeyColumnNums[k];
bigTableKeyObjects[k] = bigTableRowObjects[keyColumnNum];
bigTableKeyObjects[k] = ((PrimitiveObjectInspector) testDesc.bigTableObjectInspectors[keyColumnNum]).copyObject(bigTableKeyObjects[k]);
}
RowTestObjects testKey = new RowTestObjects(bigTableKeyObjects);
if (testData.smallTableKeyHashMap.containsKey(testKey)) {
int smallTableKeyIndex = testData.smallTableKeyHashMap.get(testKey);
switch(testDesc.vectorMapJoinVariation) {
case INNER:
case OUTER:
{
// One row per value.
ArrayList<RowTestObjects> valueList = testData.smallTableValues.get(smallTableKeyIndex);
final int valueCount = valueList.size();
for (int v = 0; v < valueCount; v++) {
Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
Object[] valueRow = valueList.get(v).getRow();
final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length;
final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length;
for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) {
outputObjects[bigTableRetainColumnNumsLength + o] = valueRow[testDesc.smallTableRetainValueColumnNums[o]];
}
addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
}
}
break;
case INNER_BIG_ONLY:
{
// Value count rows.
final int valueCount = testData.smallTableValueCounts.get(smallTableKeyIndex);
for (int v = 0; v < valueCount; v++) {
Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
}
}
break;
case LEFT_SEMI:
{
// One row (existence).
Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
}
break;
default:
throw new RuntimeException("Unknown operator variation " + testDesc.vectorMapJoinVariation);
}
} else {
if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER) {
// We need to add a non-match row with nulls for small table values.
Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length;
final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length;
for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) {
outputObjects[bigTableRetainColumnNumsLength + o] = null;
}
addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
}
}
}
}
return expectedTestRowMultiSet;
}
Aggregations