Search in sources :

Example 1 with RowTestObjectsMultiSet

use of org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet in project hive by apache.

the class TestMapJoinOperator method executeTest.

private void executeTest(MapJoinTestDescription testDesc, MapJoinTestData testData) throws Exception {
    RowTestObjectsMultiSet expectedTestRowMultiSet = createExpectedTestRowMultiSet(testDesc, testData);
    // UNDONE: Inner count
    System.out.println("*BENCHMARK* expectedTestRowMultiSet rowCount " + expectedTestRowMultiSet.getRowCount() + " totalCount " + expectedTestRowMultiSet.getTotalCount());
    // Execute all implementation variations.
    for (MapJoinTestImplementation mapJoinImplementation : MapJoinTestImplementation.values()) {
        executeTestImplementation(mapJoinImplementation, testDesc, testData, expectedTestRowMultiSet);
    }
}
Also used : RowTestObjectsMultiSet(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet) MapJoinTestImplementation(org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation)

Example 2 with RowTestObjectsMultiSet

use of org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet in project hive by apache.

the class TestMapJoinOperator method executeTestImplementation.

private void executeTestImplementation(MapJoinTestImplementation mapJoinImplementation, MapJoinTestDescription testDesc, MapJoinTestData testData, RowTestObjectsMultiSet expectedTestRowMultiSet) throws Exception {
    System.out.println("*BENCHMARK* Starting " + mapJoinImplementation + " test");
    // UNDONE: Parameterize for implementation variation?
    MapJoinDesc mapJoinDesc = MapJoinTestConfig.createMapJoinDesc(testDesc);
    final boolean isVectorOutput = isVectorOutput(mapJoinImplementation);
    RowTestObjectsMultiSet outputTestRowMultiSet = new RowTestObjectsMultiSet();
    Operator<? extends OperatorDesc> testCollectorOperator = (!isVectorOutput ? new TestMultiSetCollectorOperator(testDesc.outputObjectInspectors, outputTestRowMultiSet) : new TestMultiSetVectorCollectorOperator(testDesc.outputTypeInfos, testDesc.outputObjectInspectors, outputTestRowMultiSet));
    MapJoinOperator operator = MapJoinTestConfig.createMapJoinImplementation(mapJoinImplementation, testDesc, testCollectorOperator, testData, mapJoinDesc);
    if (!isVectorOutput) {
        MapJoinTestData.driveBigTableData(testDesc, testData, operator);
    } else {
        MapJoinTestData.driveVectorBigTableData(testDesc, testData, operator);
    }
    System.out.println("*BENCHMARK* executeTestImplementation row count " + ((CountCollectorTestOperator) testCollectorOperator).getRowCount());
    // Verify the output!
    if (!expectedTestRowMultiSet.verify(outputTestRowMultiSet)) {
        System.out.println("*BENCHMARK* verify failed for " + mapJoinImplementation);
    } else {
        System.out.println("*BENCHMARK* verify succeeded for " + mapJoinImplementation);
    }
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) CountCollectorTestOperator(org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator) RowTestObjectsMultiSet(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet)

Example 3 with RowTestObjectsMultiSet

use of org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet in project hive by apache.

the class TestMapJoinOperator method createExpectedTestRowMultiSet.

/*
   * Simulate the join by driving the test big table data by our test small table HashMap and
   * create the expected output as a multi-set of TestRow (i.e. TestRow and occurrence count).
   */
private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescription testDesc, MapJoinTestData testData) throws HiveException {
    RowTestObjectsMultiSet expectedTestRowMultiSet = new RowTestObjectsMultiSet();
    VectorExtractRow vectorExtractRow = new VectorExtractRow();
    vectorExtractRow.init(testDesc.bigTableKeyTypeInfos);
    final int bigTableColumnCount = testDesc.bigTableTypeInfos.length;
    Object[] bigTableRowObjects = new Object[bigTableColumnCount];
    final int bigTableKeyColumnCount = testDesc.bigTableKeyTypeInfos.length;
    Object[] bigTableKeyObjects = new Object[bigTableKeyColumnCount];
    VectorBatchGenerateStream bigTableBatchStream = testData.getBigTableBatchStream();
    VectorizedRowBatch batch = testData.getBigTableBatch();
    bigTableBatchStream.reset();
    while (bigTableBatchStream.isNext()) {
        batch.reset();
        bigTableBatchStream.fillNext(batch);
        final int size = testData.bigTableBatch.size;
        for (int r = 0; r < size; r++) {
            vectorExtractRow.extractRow(testData.bigTableBatch, r, bigTableRowObjects);
            // Form key object array
            for (int k = 0; k < bigTableKeyColumnCount; k++) {
                int keyColumnNum = testDesc.bigTableKeyColumnNums[k];
                bigTableKeyObjects[k] = bigTableRowObjects[keyColumnNum];
                bigTableKeyObjects[k] = ((PrimitiveObjectInspector) testDesc.bigTableObjectInspectors[keyColumnNum]).copyObject(bigTableKeyObjects[k]);
            }
            RowTestObjects testKey = new RowTestObjects(bigTableKeyObjects);
            if (testData.smallTableKeyHashMap.containsKey(testKey)) {
                int smallTableKeyIndex = testData.smallTableKeyHashMap.get(testKey);
                switch(testDesc.vectorMapJoinVariation) {
                    case INNER:
                    case OUTER:
                        {
                            // One row per value.
                            ArrayList<RowTestObjects> valueList = testData.smallTableValues.get(smallTableKeyIndex);
                            final int valueCount = valueList.size();
                            for (int v = 0; v < valueCount; v++) {
                                Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
                                addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
                                Object[] valueRow = valueList.get(v).getRow();
                                final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length;
                                final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length;
                                for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) {
                                    outputObjects[bigTableRetainColumnNumsLength + o] = valueRow[testDesc.smallTableRetainValueColumnNums[o]];
                                }
                                addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
                            }
                        }
                        break;
                    case INNER_BIG_ONLY:
                        {
                            // Value count rows.
                            final int valueCount = testData.smallTableValueCounts.get(smallTableKeyIndex);
                            for (int v = 0; v < valueCount; v++) {
                                Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
                                addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
                                addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
                            }
                        }
                        break;
                    case LEFT_SEMI:
                        {
                            // One row (existence).
                            Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
                            addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
                            addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
                        }
                        break;
                    default:
                        throw new RuntimeException("Unknown operator variation " + testDesc.vectorMapJoinVariation);
                }
            } else {
                if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER) {
                    // We need to add a non-match row with nulls for small table values.
                    Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
                    addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
                    final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length;
                    final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length;
                    for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) {
                        outputObjects[bigTableRetainColumnNumsLength + o] = null;
                    }
                    addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
                }
            }
        }
    }
    return expectedTestRowMultiSet;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) VectorBatchGenerateStream(org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream) ArrayList(java.util.ArrayList) RowTestObjectsMultiSet(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow) RowTestObjects(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects)

Aggregations

RowTestObjectsMultiSet (org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet)3 ArrayList (java.util.ArrayList)1 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)1 CountCollectorTestOperator (org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator)1 RowTestObjects (org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects)1 VectorExtractRow (org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)1 VectorMapJoinOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator)1 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)1 MapJoinTestImplementation (org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation)1 VectorBatchGenerateStream (org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream)1 MapJoinDesc (org.apache.hadoop.hive.ql.plan.MapJoinDesc)1 VectorMapJoinDesc (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc)1