Search in sources :

Example 1 with VectorExtractRow

use of org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow in project hive by apache.

the class AggregationBase method doVectorTest.

protected static boolean doVectorTest(String aggregationName, TypeInfo typeInfo, GenericUDAFEvaluator evaluator, TypeInfo outputTypeInfo, GenericUDAFEvaluator.Mode udafEvaluatorMode, int maxKeyCount, List<String> columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> parameterList, VectorRandomBatchSource batchSource, Object[] results) throws Exception {
    HiveConf hiveConf = new HiveConf();
    VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
    ImmutablePair<VectorAggregationDesc, String> pair = Vectorizer.getVectorAggregationDesc(aggregationName, parameterList, evaluator, outputTypeInfo, udafEvaluatorMode, vectorizationContext);
    VectorAggregationDesc vecAggrDesc = pair.left;
    if (vecAggrDesc == null) {
        Assert.fail("No vector aggregation expression found for aggregationName " + aggregationName + " udafEvaluatorMode " + udafEvaluatorMode + " parameterList " + parameterList + " outputTypeInfo " + outputTypeInfo);
    }
    Class<? extends VectorAggregateExpression> vecAggrClass = vecAggrDesc.getVecAggrClass();
    Constructor<? extends VectorAggregateExpression> ctor = null;
    try {
        ctor = vecAggrClass.getConstructor(VectorAggregationDesc.class);
    } catch (Exception e) {
        throw new HiveException("Constructor " + vecAggrClass.getSimpleName() + "(VectorAggregationDesc) not available");
    }
    VectorAggregateExpression vecAggrExpr = null;
    try {
        vecAggrExpr = ctor.newInstance(vecAggrDesc);
    } catch (Exception e) {
        throw new HiveException("Failed to create " + vecAggrClass.getSimpleName() + "(VectorAggregationDesc) object ", e);
    }
    VectorExpression.doTransientInit(vecAggrExpr.getInputExpression(), hiveConf);
    // System.out.println("*VECTOR AGGREGATION EXPRESSION* " + vecAggrExpr.getClass().getSimpleName());
    /*
    System.out.println(
        "*DEBUG* typeInfo " + typeInfo.toString() +
        " aggregationTestMode VECTOR_MODE" +
        " vecAggrExpr " + vecAggrExpr.getClass().getSimpleName());
    */
    VectorRandomRowSource rowSource = batchSource.getRowSource();
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), /* dataColumnNums */
    null, /* partitionColumnCount */
    0, /* virtualColumnCount */
    0, /* neededVirtualColumns */
    null, vectorizationContext.getScratchColumnTypeNames(), vectorizationContext.getScratchDataTypePhysicalVariations());
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    // Last entry is for a NULL key.
    VectorAggregationBufferRow[] vectorAggregationBufferRows = new VectorAggregationBufferRow[maxKeyCount + 1];
    VectorAggregationBufferRow[] batchBufferRows;
    batchSource.resetBatchIteration();
    int rowIndex = 0;
    while (true) {
        if (!batchSource.fillNextBatch(batch)) {
            break;
        }
        LongColumnVector keyLongColVector = (LongColumnVector) batch.cols[0];
        batchBufferRows = new VectorAggregationBufferRow[VectorizedRowBatch.DEFAULT_SIZE];
        final int size = batch.size;
        boolean selectedInUse = batch.selectedInUse;
        int[] selected = batch.selected;
        for (int logical = 0; logical < size; logical++) {
            final int batchIndex = (selectedInUse ? selected[logical] : logical);
            final int keyAdjustedBatchIndex;
            if (keyLongColVector.isRepeating) {
                keyAdjustedBatchIndex = 0;
            } else {
                keyAdjustedBatchIndex = batchIndex;
            }
            final short key;
            if (keyLongColVector.noNulls || !keyLongColVector.isNull[keyAdjustedBatchIndex]) {
                key = (short) keyLongColVector.vector[keyAdjustedBatchIndex];
            } else {
                key = (short) maxKeyCount;
            }
            VectorAggregationBufferRow bufferRow = vectorAggregationBufferRows[key];
            if (bufferRow == null) {
                VectorAggregateExpression.AggregationBuffer aggregationBuffer = vecAggrExpr.getNewAggregationBuffer();
                aggregationBuffer.reset();
                VectorAggregateExpression.AggregationBuffer[] aggregationBuffers = new VectorAggregateExpression.AggregationBuffer[] { aggregationBuffer };
                bufferRow = new VectorAggregationBufferRow(aggregationBuffers);
                vectorAggregationBufferRows[key] = bufferRow;
            }
            batchBufferRows[logical] = bufferRow;
        }
        vecAggrExpr.aggregateInputSelection(batchBufferRows, 0, batch);
        rowIndex += batch.size;
    }
    String[] outputColumnNames = new String[] { "output" };
    TypeInfo[] outputTypeInfos = new TypeInfo[] { outputTypeInfo };
    VectorizedRowBatchCtx outputBatchContext = new VectorizedRowBatchCtx(outputColumnNames, outputTypeInfos, new DataTypePhysicalVariation[] { vecAggrExpr.getOutputDataTypePhysicalVariation() }, /* dataColumnNums */
    null, /* partitionColumnCount */
    0, /* virtualColumnCount */
    0, /* neededVirtualColumns */
    null, new String[0], new DataTypePhysicalVariation[0]);
    VectorizedRowBatch outputBatch = outputBatchContext.createVectorizedRowBatch();
    short[] keys = new short[VectorizedRowBatch.DEFAULT_SIZE];
    VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
    resultVectorExtractRow.init(new TypeInfo[] { outputTypeInfo }, new int[] { 0 });
    Object[] scrqtchRow = new Object[1];
    for (short key = 0; key < maxKeyCount + 1; key++) {
        VectorAggregationBufferRow vectorAggregationBufferRow = vectorAggregationBufferRows[key];
        if (vectorAggregationBufferRow != null) {
            if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
                extractResultObjects(outputBatch, keys, resultVectorExtractRow, outputTypeInfo, scrqtchRow, results);
                outputBatch.reset();
            }
            keys[outputBatch.size] = key;
            VectorAggregateExpression.AggregationBuffer aggregationBuffer = vectorAggregationBufferRow.getAggregationBuffer(0);
            vecAggrExpr.assignRowColumn(outputBatch, outputBatch.size++, 0, aggregationBuffer);
        }
    }
    if (outputBatch.size > 0) {
        extractResultObjects(outputBatch, keys, resultVectorExtractRow, outputTypeInfo, scrqtchRow, results);
    }
    return true;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) HiveConf(org.apache.hadoop.hive.conf.HiveConf) VectorAggregationBufferRow(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) VectorAggregateExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorAggregationDesc(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc) VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) AggregationBuffer(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer) VectorRandomRowSource(org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource)

Example 2 with VectorExtractRow

use of org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow in project hive by apache.

the class TestVectorBetweenIn method doVectorBetweenInTest.

private boolean doVectorBetweenInTest(TypeInfo typeInfo, BetweenInVariation betweenInVariation, List<Object> compareList, List<String> columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> children, GenericUDF udf, ExprNodeGenericFuncDesc exprDesc, BetweenInTestMode betweenInTestMode, VectorRandomBatchSource batchSource, ObjectInspector objectInspector, TypeInfo outputTypeInfo, Object[] resultObjects) throws Exception {
    HiveConf hiveConf = new HiveConf();
    if (betweenInTestMode == BetweenInTestMode.ADAPTOR) {
        hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true);
    }
    final boolean isFilter = betweenInVariation.isFilter;
    VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
    VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc, (isFilter ? VectorExpressionDescriptor.Mode.FILTER : VectorExpressionDescriptor.Mode.PROJECTION));
    vectorExpression.transientInit(hiveConf);
    if (betweenInTestMode == BetweenInTestMode.VECTOR_EXPRESSION) {
        String vecExprString = vectorExpression.toString();
        if (vectorExpression instanceof VectorUDFAdaptor) {
            System.out.println("*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + " betweenInTestMode " + betweenInTestMode + " betweenInVariation " + betweenInVariation + " vectorExpression " + vecExprString);
        } else if (dataTypePhysicalVariations[0] == DataTypePhysicalVariation.DECIMAL_64) {
            final String nameToCheck = vectorExpression.getClass().getSimpleName();
            if (!nameToCheck.contains("Decimal64")) {
                System.out.println("*EXPECTED DECIMAL_64 VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + " betweenInTestMode " + betweenInTestMode + " betweenInVariation " + betweenInVariation + " vectorExpression " + vecExprString);
            }
        }
    }
    // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
    /*
    System.out.println(
        "*DEBUG* typeInfo " + typeInfo.toString() +
        " betweenInTestMode " + betweenInTestMode +
        " betweenInVariation " + betweenInVariation +
        " vectorExpression " + vectorExpression.toString());
    */
    VectorRandomRowSource rowSource = batchSource.getRowSource();
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), /* dataColumnNums */
    null, /* partitionColumnCount */
    0, /* virtualColumnCount */
    0, /* neededVirtualColumns */
    null, vectorizationContext.getScratchColumnTypeNames(), vectorizationContext.getScratchDataTypePhysicalVariations());
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    VectorExtractRow resultVectorExtractRow = null;
    Object[] scrqtchRow = null;
    if (!isFilter) {
        resultVectorExtractRow = new VectorExtractRow();
        final int outputColumnNum = vectorExpression.getOutputColumnNum();
        resultVectorExtractRow.init(new TypeInfo[] { outputTypeInfo }, new int[] { outputColumnNum });
        scrqtchRow = new Object[1];
    }
    boolean copySelectedInUse = false;
    int[] copySelected = new int[VectorizedRowBatch.DEFAULT_SIZE];
    batchSource.resetBatchIteration();
    int rowIndex = 0;
    while (true) {
        if (!batchSource.fillNextBatch(batch)) {
            break;
        }
        final int originalBatchSize = batch.size;
        if (isFilter) {
            copySelectedInUse = batch.selectedInUse;
            if (batch.selectedInUse) {
                System.arraycopy(batch.selected, 0, copySelected, 0, originalBatchSize);
            }
        }
        // In filter mode, the batch size can be made smaller.
        vectorExpression.evaluate(batch);
        if (!isFilter) {
            extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, objectInspector, resultObjects);
        } else {
            final int currentBatchSize = batch.size;
            if (copySelectedInUse && batch.selectedInUse) {
                int selectIndex = 0;
                for (int i = 0; i < originalBatchSize; i++) {
                    final int originalBatchIndex = copySelected[i];
                    final boolean booleanResult;
                    if (selectIndex < currentBatchSize && batch.selected[selectIndex] == originalBatchIndex) {
                        booleanResult = true;
                        selectIndex++;
                    } else {
                        booleanResult = false;
                    }
                    resultObjects[rowIndex + i] = new BooleanWritable(booleanResult);
                }
            } else if (batch.selectedInUse) {
                int selectIndex = 0;
                for (int i = 0; i < originalBatchSize; i++) {
                    final boolean booleanResult;
                    if (selectIndex < currentBatchSize && batch.selected[selectIndex] == i) {
                        booleanResult = true;
                        selectIndex++;
                    } else {
                        booleanResult = false;
                    }
                    resultObjects[rowIndex + i] = new BooleanWritable(booleanResult);
                }
            } else if (currentBatchSize == 0) {
                // Whole batch got zapped.
                for (int i = 0; i < originalBatchSize; i++) {
                    resultObjects[rowIndex + i] = new BooleanWritable(false);
                }
            } else {
                // Every row kept.
                for (int i = 0; i < originalBatchSize; i++) {
                    resultObjects[rowIndex + i] = new BooleanWritable(true);
                }
            }
        }
        rowIndex += originalBatchSize;
    }
    return true;
}
Also used : VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorUDFAdaptor(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow) VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveConf(org.apache.hadoop.hive.conf.HiveConf) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) VectorRandomRowSource(org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource)

Example 3 with VectorExtractRow

use of org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow in project hive by apache.

the class TestVectorArithmetic method doVectorArithmeticTest.

private void doVectorArithmeticTest(TypeInfo typeInfo1, TypeInfo typeInfo2, List<String> columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> children, ExprNodeGenericFuncDesc exprDesc, Arithmetic arithmetic, ArithmeticTestMode arithmeticTestMode, ColumnScalarMode columnScalarMode, VectorRandomBatchSource batchSource, ObjectInspector objectInspector, TypeInfo outputTypeInfo, Object[] resultObjects) throws Exception {
    HiveConf hiveConf = new HiveConf();
    if (arithmeticTestMode == ArithmeticTestMode.ADAPTOR) {
        hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true);
        // Don't use DECIMAL_64 with the VectorUDFAdaptor.
        dataTypePhysicalVariations = null;
    }
    VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), dataTypePhysicalVariations == null ? null : Arrays.asList(dataTypePhysicalVariations), hiveConf);
    VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc);
    vectorExpression.transientInit(hiveConf);
    if (arithmeticTestMode == ArithmeticTestMode.VECTOR_EXPRESSION && vectorExpression instanceof VectorUDFAdaptor) {
        System.out.println("*NO NATIVE VECTOR EXPRESSION* typeInfo1 " + typeInfo1.toString() + " typeInfo2 " + typeInfo2.toString() + " arithmeticTestMode " + arithmeticTestMode + " columnScalarMode " + columnScalarMode + " vectorExpression " + vectorExpression.toString());
    }
    String[] outputScratchTypeNames = vectorizationContext.getScratchColumnTypeNames();
    DataTypePhysicalVariation[] outputDataTypePhysicalVariations = vectorizationContext.getScratchDataTypePhysicalVariations();
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, typeInfos, dataTypePhysicalVariations, /* dataColumnNums */
    null, /* partitionColumnCount */
    0, /* virtualColumnCount */
    0, /* neededVirtualColumns */
    null, outputScratchTypeNames, outputDataTypePhysicalVariations);
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
    resultVectorExtractRow.init(new TypeInfo[] { outputTypeInfo }, new int[] { vectorExpression.getOutputColumnNum() });
    Object[] scrqtchRow = new Object[1];
    // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
    /*
    System.out.println(
        "*DEBUG* typeInfo1 " + typeInfo1.toString() +
        " typeInfo2 " + typeInfo2.toString() +
        " arithmeticTestMode " + arithmeticTestMode +
        " columnScalarMode " + columnScalarMode +
        " vectorExpression " + vectorExpression.toString());
    */
    batchSource.resetBatchIteration();
    int rowIndex = 0;
    while (true) {
        if (!batchSource.fillNextBatch(batch)) {
            break;
        }
        vectorExpression.evaluate(batch);
        extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, objectInspector, resultObjects);
        rowIndex += batch.size;
    }
}
Also used : VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorUDFAdaptor(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow) VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DataTypePhysicalVariation(org.apache.hadoop.hive.common.type.DataTypePhysicalVariation) HiveConf(org.apache.hadoop.hive.conf.HiveConf) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 4 with VectorExtractRow

use of org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow in project hive by apache.

the class TestVectorStructField method doVectorStructFieldTest.

private void doVectorStructFieldTest(TypeInfo typeInfo, List<String> columns, String[] columnNames, TypeInfo[] typeInfos, DataTypePhysicalVariation[] dataTypePhysicalVariations, List<ExprNodeDesc> children, ExprNodeFieldDesc exprNodeFieldDesc, StructFieldTestMode negativeTestMode, VectorRandomBatchSource batchSource, ObjectInspector objectInspector, TypeInfo outputTypeInfo, Object[] resultObjects) throws Exception {
    HiveConf hiveConf = new HiveConf();
    VectorizationContext vectorizationContext = new VectorizationContext("name", columns, Arrays.asList(typeInfos), Arrays.asList(dataTypePhysicalVariations), hiveConf);
    VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprNodeFieldDesc);
    vectorExpression.transientInit(hiveConf);
    if (negativeTestMode == StructFieldTestMode.VECTOR_EXPRESSION && vectorExpression instanceof VectorUDFAdaptor) {
        System.out.println("*NO NATIVE VECTOR EXPRESSION* typeInfo " + typeInfo.toString() + " negativeTestMode " + negativeTestMode + " vectorExpression " + vectorExpression.toString());
    }
    String[] outputScratchTypeNames = vectorizationContext.getScratchColumnTypeNames();
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(columnNames, typeInfos, dataTypePhysicalVariations, /* dataColumnNums */
    null, /* partitionColumnCount */
    0, /* virtualColumnCount */
    0, /* neededVirtualColumns */
    null, outputScratchTypeNames, null);
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
    resultVectorExtractRow.init(new TypeInfo[] { outputTypeInfo }, new int[] { vectorExpression.getOutputColumnNum() });
    Object[] scrqtchRow = new Object[1];
    // System.out.println("*VECTOR EXPRESSION* " + vectorExpression.getClass().getSimpleName());
    /*
    System.out.println(
        "*DEBUG* typeInfo " + typeInfo.toString() +
        " negativeTestMode " + negativeTestMode +
        " vectorExpression " + vectorExpression.toString());
    */
    batchSource.resetBatchIteration();
    int rowIndex = 0;
    while (true) {
        if (!batchSource.fillNextBatch(batch)) {
            break;
        }
        vectorExpression.evaluate(batch);
        extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, objectInspector, resultObjects);
        rowIndex += batch.size;
    }
}
Also used : VectorizedRowBatchCtx(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) HiveConf(org.apache.hadoop.hive.conf.HiveConf) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) VectorUDFAdaptor(org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)

Example 5 with VectorExtractRow

use of org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow in project hive by apache.

the class TestMapJoinOperator method createExpectedTestRowMultiSet.

/*
   * Simulate the join by driving the test big table data by our test small table HashMap and
   * create the expected output as a multi-set of TestRow (i.e. TestRow and occurrence count).
   */
private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescription testDesc, MapJoinTestData testData) throws HiveException {
    RowTestObjectsMultiSet expectedTestRowMultiSet = new RowTestObjectsMultiSet();
    VectorExtractRow vectorExtractRow = new VectorExtractRow();
    vectorExtractRow.init(testDesc.bigTableTypeInfos);
    final int bigTableColumnCount = testDesc.bigTableTypeInfos.length;
    Object[] bigTableRowObjects = new Object[bigTableColumnCount];
    final int bigTableKeyColumnCount = testDesc.bigTableKeyTypeInfos.length;
    Object[] bigTableKeyObjects = new Object[bigTableKeyColumnCount];
    VectorRandomBatchSource bigTableBatchSource = testData.getBigTableBatchSource();
    VectorizedRowBatch batch = testData.getBigTableBatch();
    bigTableBatchSource.resetBatchIteration();
    while (bigTableBatchSource.fillNextBatch(batch)) {
        final int size = testData.bigTableBatch.size;
        for (int r = 0; r < size; r++) {
            vectorExtractRow.extractRow(testData.bigTableBatch, r, bigTableRowObjects);
            // Form key object array
            // NULLs may be present in {FULL|LEFT|RIGHT} OUTER joins.
            boolean hasAnyNulls = false;
            for (int k = 0; k < bigTableKeyColumnCount; k++) {
                int keyColumnNum = testDesc.bigTableKeyColumnNums[k];
                Object keyObject = bigTableRowObjects[keyColumnNum];
                if (keyObject == null) {
                    hasAnyNulls = true;
                }
                bigTableKeyObjects[k] = keyObject;
                bigTableKeyObjects[k] = ((PrimitiveObjectInspector) testDesc.bigTableObjectInspectors[keyColumnNum]).copyObject(bigTableKeyObjects[k]);
            }
            RowTestObjects testKey = new RowTestObjects(bigTableKeyObjects);
            if (testData.smallTableKeyHashMap.containsKey(testKey) && !hasAnyNulls) {
                int smallTableKeyIndex = testData.smallTableKeyHashMap.get(testKey);
                switch(testDesc.vectorMapJoinVariation) {
                    case INNER:
                    case OUTER:
                    case FULL_OUTER:
                        {
                            // One row per value.
                            ArrayList<RowTestObjects> valueList = testData.smallTableValues.get(smallTableKeyIndex);
                            final int valueCount = valueList.size();
                            for (int v = 0; v < valueCount; v++) {
                                Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
                                addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
                                int outputColumnNum = testDesc.bigTableRetainColumnNums.length;
                                final int smallTableRetainKeyColumnNumsLength = testDesc.smallTableRetainKeyColumnNums.length;
                                for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) {
                                    outputObjects[outputColumnNum++] = bigTableKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]];
                                }
                                Object[] valueRow = valueList.get(v).getRow();
                                final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length;
                                for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) {
                                    outputObjects[outputColumnNum++] = valueRow[testDesc.smallTableRetainValueColumnNums[o]];
                                }
                                addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, RowTestObjectsMultiSet.RowFlag.REGULAR);
                            }
                        }
                        break;
                    case INNER_BIG_ONLY:
                    case LEFT_SEMI:
                    case LEFT_ANTI:
                        {
                            Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
                            addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
                            int outputColumnNum = testDesc.bigTableRetainColumnNums.length;
                            final int smallTableRetainKeyColumnNumsLength = testDesc.smallTableRetainKeyColumnNums.length;
                            for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) {
                                outputObjects[outputColumnNum++] = bigTableKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]];
                            }
                            addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, RowTestObjectsMultiSet.RowFlag.REGULAR);
                        }
                        break;
                    default:
                        throw new RuntimeException("Unknown operator variation " + testDesc.vectorMapJoinVariation);
                }
            } else {
                if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) {
                    // We need to add a non-match row with nulls for small table values.
                    Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
                    addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
                    int outputColumnNum = testDesc.bigTableRetainColumnNums.length;
                    final int smallTableRetainKeyColumnNumsLength = testDesc.smallTableRetainKeyColumnNums.length;
                    for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) {
                        outputObjects[outputColumnNum++] = null;
                    }
                    final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length;
                    for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) {
                        outputObjects[outputColumnNum++] = null;
                    }
                    addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, RowTestObjectsMultiSet.RowFlag.LEFT_OUTER);
                }
            }
        }
    }
    if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) {
        System.out.println("*BENCHMARK* ----------------------------------------------------------------------");
        System.out.println("*BENCHMARK* FULL OUTER non-match key count " + testData.fullOuterAdditionalSmallTableKeys.size());
        // Fill in non-match Small Table key results.
        for (RowTestObjects smallTableKey : testData.fullOuterAdditionalSmallTableKeys) {
            // System.out.println(
            // "*BENCHMARK* fullOuterAdditionalSmallTableKey " + smallTableKey.toString());
            int smallTableKeyIndex = testData.smallTableKeyHashMap.get(smallTableKey);
            // One row per value.
            ArrayList<RowTestObjects> valueList = testData.smallTableValues.get(smallTableKeyIndex);
            final int valueCount = valueList.size();
            for (int v = 0; v < valueCount; v++) {
                Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
                // Non-match Small Table keys produce NULL Big Table columns.
                final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length;
                for (int o = 0; o < bigTableRetainColumnNumsLength; o++) {
                    outputObjects[o] = null;
                }
                int outputColumnNum = testDesc.bigTableRetainColumnNums.length;
                // The output result may include 0, 1, or more small key columns...
                Object[] smallKeyObjects = smallTableKey.getRow();
                final int smallTableRetainKeyColumnNumsLength = testDesc.smallTableRetainKeyColumnNums.length;
                for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) {
                    outputObjects[outputColumnNum++] = smallKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]];
                }
                Object[] valueRow = valueList.get(v).getRow();
                final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length;
                for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) {
                    outputObjects[outputColumnNum++] = valueRow[testDesc.smallTableRetainValueColumnNums[o]];
                }
                addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, RowTestObjectsMultiSet.RowFlag.FULL_OUTER);
            }
        }
    }
    return expectedTestRowMultiSet;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) VectorRandomBatchSource(org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource) ArrayList(java.util.ArrayList) RowTestObjectsMultiSet(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow) RowTestObjects(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects)

Aggregations

VectorExtractRow (org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)23 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)19 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)18 HiveConf (org.apache.hadoop.hive.conf.HiveConf)17 VectorUDFAdaptor (org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor)16 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)14 VectorizedRowBatchCtx (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx)12 VectorRandomRowSource (org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource)7 DataTypePhysicalVariation (org.apache.hadoop.hive.common.type.DataTypePhysicalVariation)6 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)5 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)4 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)4 BooleanWritable (org.apache.hadoop.io.BooleanWritable)4 RowTestObjects (org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects)2 VectorRandomBatchSource (org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)2 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)2 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)2 ArrayList (java.util.ArrayList)1