Search in sources :

Example 1 with VectorBatchGenerateStream

use of org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream in project hive by apache.

the class TestMapJoinOperator method createExpectedTestRowMultiSet.

/*
   * Simulate the join by driving the test big table data by our test small table HashMap and
   * create the expected output as a multi-set of TestRow (i.e. TestRow and occurrence count).
   */
private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescription testDesc, MapJoinTestData testData) throws HiveException {
    RowTestObjectsMultiSet expectedTestRowMultiSet = new RowTestObjectsMultiSet();
    VectorExtractRow vectorExtractRow = new VectorExtractRow();
    vectorExtractRow.init(testDesc.bigTableKeyTypeInfos);
    final int bigTableColumnCount = testDesc.bigTableTypeInfos.length;
    Object[] bigTableRowObjects = new Object[bigTableColumnCount];
    final int bigTableKeyColumnCount = testDesc.bigTableKeyTypeInfos.length;
    Object[] bigTableKeyObjects = new Object[bigTableKeyColumnCount];
    VectorBatchGenerateStream bigTableBatchStream = testData.getBigTableBatchStream();
    VectorizedRowBatch batch = testData.getBigTableBatch();
    bigTableBatchStream.reset();
    while (bigTableBatchStream.isNext()) {
        batch.reset();
        bigTableBatchStream.fillNext(batch);
        final int size = testData.bigTableBatch.size;
        for (int r = 0; r < size; r++) {
            vectorExtractRow.extractRow(testData.bigTableBatch, r, bigTableRowObjects);
            // Form key object array
            for (int k = 0; k < bigTableKeyColumnCount; k++) {
                int keyColumnNum = testDesc.bigTableKeyColumnNums[k];
                bigTableKeyObjects[k] = bigTableRowObjects[keyColumnNum];
                bigTableKeyObjects[k] = ((PrimitiveObjectInspector) testDesc.bigTableObjectInspectors[keyColumnNum]).copyObject(bigTableKeyObjects[k]);
            }
            RowTestObjects testKey = new RowTestObjects(bigTableKeyObjects);
            if (testData.smallTableKeyHashMap.containsKey(testKey)) {
                int smallTableKeyIndex = testData.smallTableKeyHashMap.get(testKey);
                switch(testDesc.vectorMapJoinVariation) {
                    case INNER:
                    case OUTER:
                        {
                            // One row per value.
                            ArrayList<RowTestObjects> valueList = testData.smallTableValues.get(smallTableKeyIndex);
                            final int valueCount = valueList.size();
                            for (int v = 0; v < valueCount; v++) {
                                Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
                                addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
                                Object[] valueRow = valueList.get(v).getRow();
                                final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length;
                                final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length;
                                for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) {
                                    outputObjects[bigTableRetainColumnNumsLength + o] = valueRow[testDesc.smallTableRetainValueColumnNums[o]];
                                }
                                addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
                            }
                        }
                        break;
                    case INNER_BIG_ONLY:
                        {
                            // Value count rows.
                            final int valueCount = testData.smallTableValueCounts.get(smallTableKeyIndex);
                            for (int v = 0; v < valueCount; v++) {
                                Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
                                addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
                                addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
                            }
                        }
                        break;
                    case LEFT_SEMI:
                        {
                            // One row (existence).
                            Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
                            addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
                            addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
                        }
                        break;
                    default:
                        throw new RuntimeException("Unknown operator variation " + testDesc.vectorMapJoinVariation);
                }
            } else {
                if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER) {
                    // We need to add a non-match row with nulls for small table values.
                    Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
                    addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
                    final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length;
                    final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length;
                    for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) {
                        outputObjects[bigTableRetainColumnNumsLength + o] = null;
                    }
                    addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
                }
            }
        }
    }
    return expectedTestRowMultiSet;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) VectorBatchGenerateStream(org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream) ArrayList(java.util.ArrayList) RowTestObjectsMultiSet(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow) RowTestObjects(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects)

Aggregations

ArrayList (java.util.ArrayList)1 RowTestObjects (org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects)1 RowTestObjectsMultiSet (org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet)1 VectorExtractRow (org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)1 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)1 VectorBatchGenerateStream (org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream)1