use of org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet in project hive by apache.
the class TestMapJoinOperator method executeTest.
private void executeTest(MapJoinTestDescription testDesc, MapJoinTestData testData) throws Exception {
RowTestObjectsMultiSet expectedTestRowMultiSet = createExpectedTestRowMultiSet(testDesc, testData);
// UNDONE: Inner count
System.out.println("*BENCHMARK* expectedTestRowMultiSet rowCount " + expectedTestRowMultiSet.getRowCount() + " totalCount " + expectedTestRowMultiSet.getTotalCount());
// Execute all implementation variations.
for (MapJoinTestImplementation mapJoinImplementation : MapJoinTestImplementation.values()) {
executeTestImplementation(mapJoinImplementation, testDesc, testData, expectedTestRowMultiSet);
}
}
use of org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet in project hive by apache.
the class TestMapJoinOperator method executeTestImplementation.
private void executeTestImplementation(MapJoinTestImplementation mapJoinImplementation, MapJoinTestDescription testDesc, MapJoinTestData testData, RowTestObjectsMultiSet expectedTestRowMultiSet) throws Exception {
System.out.println("*BENCHMARK* Starting " + mapJoinImplementation + " test");
// UNDONE: Parameterize for implementation variation?
MapJoinDesc mapJoinDesc = MapJoinTestConfig.createMapJoinDesc(testDesc);
final boolean isVectorOutput = isVectorOutput(mapJoinImplementation);
RowTestObjectsMultiSet outputTestRowMultiSet = new RowTestObjectsMultiSet();
Operator<? extends OperatorDesc> testCollectorOperator = (!isVectorOutput ? new TestMultiSetCollectorOperator(testDesc.outputObjectInspectors, outputTestRowMultiSet) : new TestMultiSetVectorCollectorOperator(testDesc.outputTypeInfos, testDesc.outputObjectInspectors, outputTestRowMultiSet));
MapJoinOperator operator = MapJoinTestConfig.createMapJoinImplementation(mapJoinImplementation, testDesc, testCollectorOperator, testData, mapJoinDesc);
if (!isVectorOutput) {
MapJoinTestData.driveBigTableData(testDesc, testData, operator);
} else {
MapJoinTestData.driveVectorBigTableData(testDesc, testData, operator);
}
System.out.println("*BENCHMARK* executeTestImplementation row count " + ((CountCollectorTestOperator) testCollectorOperator).getRowCount());
// Verify the output!
if (!expectedTestRowMultiSet.verify(outputTestRowMultiSet)) {
System.out.println("*BENCHMARK* verify failed for " + mapJoinImplementation);
} else {
System.out.println("*BENCHMARK* verify succeeded for " + mapJoinImplementation);
}
}
use of org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet in project hive by apache.
the class TestMapJoinOperator method createExpectedTestRowMultiSet.
/*
* Simulate the join by driving the test big table data by our test small table HashMap and
* create the expected output as a multi-set of TestRow (i.e. TestRow and occurrence count).
*/
private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescription testDesc, MapJoinTestData testData) throws HiveException {
RowTestObjectsMultiSet expectedTestRowMultiSet = new RowTestObjectsMultiSet();
VectorExtractRow vectorExtractRow = new VectorExtractRow();
vectorExtractRow.init(testDesc.bigTableKeyTypeInfos);
final int bigTableColumnCount = testDesc.bigTableTypeInfos.length;
Object[] bigTableRowObjects = new Object[bigTableColumnCount];
final int bigTableKeyColumnCount = testDesc.bigTableKeyTypeInfos.length;
Object[] bigTableKeyObjects = new Object[bigTableKeyColumnCount];
VectorBatchGenerateStream bigTableBatchStream = testData.getBigTableBatchStream();
VectorizedRowBatch batch = testData.getBigTableBatch();
bigTableBatchStream.reset();
while (bigTableBatchStream.isNext()) {
batch.reset();
bigTableBatchStream.fillNext(batch);
final int size = testData.bigTableBatch.size;
for (int r = 0; r < size; r++) {
vectorExtractRow.extractRow(testData.bigTableBatch, r, bigTableRowObjects);
// Form key object array
for (int k = 0; k < bigTableKeyColumnCount; k++) {
int keyColumnNum = testDesc.bigTableKeyColumnNums[k];
bigTableKeyObjects[k] = bigTableRowObjects[keyColumnNum];
bigTableKeyObjects[k] = ((PrimitiveObjectInspector) testDesc.bigTableObjectInspectors[keyColumnNum]).copyObject(bigTableKeyObjects[k]);
}
RowTestObjects testKey = new RowTestObjects(bigTableKeyObjects);
if (testData.smallTableKeyHashMap.containsKey(testKey)) {
int smallTableKeyIndex = testData.smallTableKeyHashMap.get(testKey);
switch(testDesc.vectorMapJoinVariation) {
case INNER:
case OUTER:
{
// One row per value.
ArrayList<RowTestObjects> valueList = testData.smallTableValues.get(smallTableKeyIndex);
final int valueCount = valueList.size();
for (int v = 0; v < valueCount; v++) {
Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
Object[] valueRow = valueList.get(v).getRow();
final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length;
final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length;
for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) {
outputObjects[bigTableRetainColumnNumsLength + o] = valueRow[testDesc.smallTableRetainValueColumnNums[o]];
}
addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
}
}
break;
case INNER_BIG_ONLY:
{
// Value count rows.
final int valueCount = testData.smallTableValueCounts.get(smallTableKeyIndex);
for (int v = 0; v < valueCount; v++) {
Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
}
}
break;
case LEFT_SEMI:
{
// One row (existence).
Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
}
break;
default:
throw new RuntimeException("Unknown operator variation " + testDesc.vectorMapJoinVariation);
}
} else {
if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER) {
// We need to add a non-match row with nulls for small table values.
Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length;
final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length;
for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) {
outputObjects[bigTableRetainColumnNumsLength + o] = null;
}
addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
}
}
}
}
return expectedTestRowMultiSet;
}
Aggregations