Search in sources :

Example 1 with VectorizationContextRegion

use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion in project hive by apache.

the class Vectorizer method doProcessChild.

private Operator<? extends OperatorDesc> doProcessChild(Operator<? extends OperatorDesc> child, Operator<? extends OperatorDesc> vectorParent, boolean isReduce, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws VectorizerCannotVectorizeException {
    // Use vector parent to get VectorizationContext.
    final VectorizationContext vContext;
    if (vectorParent instanceof VectorizationContextRegion) {
        vContext = ((VectorizationContextRegion) vectorParent).getOutputVectorizationContext();
    } else {
        vContext = ((VectorizationOperator) vectorParent).getInputVectorizationContext();
    }
    Operator<? extends OperatorDesc> vectorChild;
    try {
        vectorChild = validateAndVectorizeOperator(child, vContext, isReduce, isTezOrSpark, vectorTaskColumnInfo);
    } catch (HiveException e) {
        String issue = "exception: " + VectorizationContext.getStackTraceAsSingleLine(e);
        setNodeIssue(issue);
        throw new VectorizerCannotVectorizeException();
    }
    return vectorChild;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorizationContextRegion(org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion)

Example 2 with VectorizationContextRegion

use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion in project hive by apache.

the class MapJoinTestConfig method addFullOuterIntercept.

public static CountCollectorTestOperator addFullOuterIntercept(MapJoinTestImplementation mapJoinImplementation, MapJoinTestDescription testDesc, RowTestObjectsMultiSet outputTestRowMultiSet, MapJoinTestData testData, MapJoinOperator mapJoinOperator, MapJoinTableContainer mapJoinTableContainer, MapJoinTableContainerSerDe mapJoinTableContainerSerDe) throws SerDeException, IOException, HiveException {
    MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf();
    // For FULL OUTER MapJoin, we require all Big Keys to be present in the output result.
    // The first N output columns are the Big Table key columns.
    Map<Byte, List<ExprNodeDesc>> keyMap = mapJoinDesc.getKeys();
    List<ExprNodeDesc> bigTableKeyExprs = keyMap.get((byte) 0);
    final int bigTableKeySize = bigTableKeyExprs.size();
    Map<Byte, List<Integer>> retainMap = mapJoinDesc.getRetainList();
    List<Integer> bigTableRetainList = retainMap.get((byte) 0);
    final int bigTableRetainSize = bigTableRetainList.size();
    List<String> outputColumnNameList = mapJoinDesc.getOutputColumnNames();
    String[] mapJoinOutputColumnNames = outputColumnNameList.toArray(new String[0]);
    // Use a utility method to get the MapJoin output TypeInfo.
    TypeInfo[] mapJoinOutputTypeInfos = VectorMapJoinBaseOperator.getOutputTypeInfos(mapJoinDesc);
    final boolean isVectorOutput = MapJoinTestConfig.isVectorOutput(mapJoinImplementation);
    /*
     * Always create a row-mode SelectOperator.  If we are vector-mode, next we will use its
     * expressions and replace it with a VectorSelectOperator.
     */
    Operator<SelectDesc> selectOperator = makeInterceptSelectOperator(mapJoinOperator, bigTableKeySize, bigTableRetainSize, mapJoinOutputColumnNames, mapJoinOutputTypeInfos);
    List<String> selectOutputColumnNameList = ((SelectDesc) selectOperator.getConf()).getOutputColumnNames();
    String[] selectOutputColumnNames = selectOutputColumnNameList.toArray(new String[0]);
    if (isVectorOutput) {
        selectOperator = vectorizeInterceptSelectOperator(mapJoinOperator, bigTableKeySize, bigTableRetainSize, selectOperator);
    }
    /*
     * Create test description just for FULL OUTER INTERCEPT with different
     */
    MapJoinTestDescription interceptTestDesc = new MapJoinTestDescription(testDesc.hiveConf, testDesc.vectorMapJoinVariation, selectOutputColumnNames, Arrays.copyOf(mapJoinOutputTypeInfos, bigTableRetainSize), testDesc.bigTableKeyColumnNums, testDesc.smallTableValueTypeInfos, testDesc.smallTableRetainKeyColumnNums, testDesc.smallTableGenerationParameters, testDesc.mapJoinPlanVariation);
    MapJoinDesc intersectMapJoinDesc = createMapJoinDesc(interceptTestDesc, /* isFullOuterIntersect */
    true);
    /*
     * Create FULL OUTER INTERSECT MapJoin operator.
     */
    CreateMapJoinResult interceptCreateMapJoinResult = createMapJoinImplementation(mapJoinImplementation, interceptTestDesc, testData, intersectMapJoinDesc);
    MapJoinOperator intersectMapJoinOperator = interceptCreateMapJoinResult.mapJoinOperator;
    MapJoinTableContainer intersectMapJoinTableContainer = interceptCreateMapJoinResult.mapJoinTableContainer;
    MapJoinTableContainerSerDe interceptMapJoinTableContainerSerDe = interceptCreateMapJoinResult.mapJoinTableContainerSerDe;
    connectOperators(mapJoinOperator, selectOperator);
    connectOperators(selectOperator, intersectMapJoinOperator);
    CountCollectorTestOperator interceptTestCollectorOperator;
    if (!isVectorOutput) {
        interceptTestCollectorOperator = new TestMultiSetCollectorOperator(interceptTestDesc.outputObjectInspectors, outputTestRowMultiSet);
    } else {
        VectorizationContext vContext = ((VectorizationContextRegion) intersectMapJoinOperator).getOutputVectorizationContext();
        int[] intersectProjectionColumns = ArrayUtils.toPrimitive(vContext.getProjectedColumns().toArray(new Integer[0]));
        interceptTestCollectorOperator = new TestMultiSetVectorCollectorOperator(intersectProjectionColumns, interceptTestDesc.outputTypeInfos, interceptTestDesc.outputObjectInspectors, outputTestRowMultiSet);
    }
    connectOperators(intersectMapJoinOperator, interceptTestCollectorOperator);
    // Setup the FULL OUTER INTERSECT MapJoin's inputObjInspector to include the Small Table, etc.
    intersectMapJoinOperator.setInputObjInspectors(interceptTestDesc.inputObjectInspectors);
    // Now, invoke initializeOp methods from the root MapJoin operator.
    mapJoinOperator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors);
    // Fixup the mapJoinTables container references to our test data.
    mapJoinOperator.setTestMapJoinTableContainer(1, mapJoinTableContainer, mapJoinTableContainerSerDe);
    intersectMapJoinOperator.setTestMapJoinTableContainer(1, intersectMapJoinTableContainer, interceptMapJoinTableContainerSerDe);
    return interceptTestCollectorOperator;
}
Also used : VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) List(java.util.List) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) VectorizationContextRegion(org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) CountCollectorTestOperator(org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 3 with VectorizationContextRegion

use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion in project hive by apache.

the class MapJoinTestConfig method vectorizeInterceptSelectOperator.

private static Operator<SelectDesc> vectorizeInterceptSelectOperator(MapJoinOperator mapJoinOperator, int bigTableKeySize, int bigTableRetainSize, Operator<SelectDesc> selectOperator) throws HiveException {
    MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf();
    VectorizationContext vOutContext = ((VectorizationContextRegion) mapJoinOperator).getOutputVectorizationContext();
    SelectDesc selectDesc = (SelectDesc) selectOperator.getConf();
    List<ExprNodeDesc> selectExprs = selectDesc.getColList();
    VectorExpression[] selectVectorExpr = new VectorExpression[bigTableRetainSize];
    for (int i = 0; i < bigTableRetainSize; i++) {
        TypeInfo typeInfo = selectExprs.get(i).getTypeInfo();
        if (i < bigTableKeySize) {
            // Big Table key.
            selectVectorExpr[i] = vOutContext.getVectorExpression(selectExprs.get(i));
        } else {
            // For vector-mode, for test purposes we substitute a NO-OP (we don't want to modify
            // the batch).
            // FULL OUTER INTERCEPT does not look at non-key columns.
            NoOpExpression noOpExpression = new NoOpExpression(i);
            noOpExpression.setInputTypeInfos(typeInfo);
            noOpExpression.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
            noOpExpression.setOutputTypeInfo(typeInfo);
            noOpExpression.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE);
            selectVectorExpr[i] = noOpExpression;
        }
    }
    System.out.println("*BENCHMARK* VectorSelectOperator selectVectorExpr " + Arrays.toString(selectVectorExpr));
    int[] projectedColumns = ArrayUtils.toPrimitive(vOutContext.getProjectedColumns().subList(0, bigTableRetainSize).toArray(new Integer[0]));
    System.out.println("*BENCHMARK* VectorSelectOperator projectedColumns " + Arrays.toString(projectedColumns));
    VectorSelectDesc vectorSelectDesc = new VectorSelectDesc();
    vectorSelectDesc.setSelectExpressions(selectVectorExpr);
    vectorSelectDesc.setProjectedOutputColumns(projectedColumns);
    Operator<SelectDesc> vectorSelectOperator = OperatorFactory.getVectorOperator(selectOperator.getCompilationOpContext(), selectDesc, vOutContext, vectorSelectDesc);
    return vectorSelectOperator;
}
Also used : MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) VectorizationContextRegion(org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion)

Example 4 with VectorizationContextRegion

use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion in project hive by apache.

the class FakeCaptureVectorToRowOutputOperator method initializeOp.

@Override
public void initializeOp(Configuration conf) throws HiveException {
    super.initializeOp(conf);
    VectorizationContextRegion vectorizationContextRegion = (VectorizationContextRegion) op;
    VectorizationContext outputVectorizationContext = vectorizationContextRegion.getOutputVectorizationContext();
    outputTypeInfos = outputVectorizationContext.getInitialTypeInfos();
    final int outputLength = outputTypeInfos.length;
    outputObjectInspectors = new ObjectInspector[outputLength];
    for (int i = 0; i < outputLength; i++) {
        TypeInfo typeInfo = outputTypeInfos[i];
        outputObjectInspectors[i] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
    }
    vectorExtractRow = new VectorExtractRow();
    vectorExtractRow.init(outputTypeInfos);
}
Also used : VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow) VectorizationContextRegion(org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion)

Example 5 with VectorizationContextRegion

use of org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion in project hive by apache.

the class TestMapJoinOperator method executeTestImplementation.

private void executeTestImplementation(MapJoinTestImplementation mapJoinImplementation, MapJoinTestDescription testDesc, MapJoinTestData testData, RowTestObjectsMultiSet expectedTestRowMultiSet, String title) throws Exception {
    System.out.println("*BENCHMARK* Starting implementation " + mapJoinImplementation + " variation " + testDesc.vectorMapJoinVariation + " title " + title);
    // UNDONE: Parameterize for implementation variation?
    MapJoinDesc mapJoinDesc = MapJoinTestConfig.createMapJoinDesc(testDesc);
    final boolean isVectorOutput = MapJoinTestConfig.isVectorOutput(mapJoinImplementation);
    RowTestObjectsMultiSet outputTestRowMultiSet = new RowTestObjectsMultiSet();
    CreateMapJoinResult result = MapJoinTestConfig.createMapJoinImplementation(mapJoinImplementation, testDesc, testData, mapJoinDesc);
    MapJoinOperator mapJoinOperator = result.mapJoinOperator;
    MapJoinTableContainer mapJoinTableContainer = result.mapJoinTableContainer;
    MapJoinTableContainerSerDe mapJoinTableContainerSerDe = result.mapJoinTableContainerSerDe;
    CountCollectorTestOperator testCollectorOperator;
    if (!isVectorOutput) {
        testCollectorOperator = new TestMultiSetCollectorOperator(testDesc.outputObjectInspectors, outputTestRowMultiSet);
    } else {
        VectorizationContext vOutContext = ((VectorizationContextRegion) mapJoinOperator).getOutputVectorizationContext();
        testCollectorOperator = new TestMultiSetVectorCollectorOperator(ArrayUtils.toPrimitive(vOutContext.getProjectedColumns().toArray(new Integer[0])), testDesc.outputTypeInfos, testDesc.outputObjectInspectors, outputTestRowMultiSet);
    }
    MapJoinTestConfig.connectOperators(mapJoinOperator, testCollectorOperator);
    CountCollectorTestOperator interceptTestCollectorOperator = null;
    if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && !mapJoinDesc.isDynamicPartitionHashJoin()) {
        if (mapJoinImplementation == MapJoinTestImplementation.ROW_MODE_HASH_MAP) {
            // Not supported.
            return;
        }
        // Wire in FULL OUTER Intercept.
        interceptTestCollectorOperator = MapJoinTestConfig.addFullOuterIntercept(mapJoinImplementation, testDesc, outputTestRowMultiSet, testData, mapJoinOperator, mapJoinTableContainer, mapJoinTableContainerSerDe);
    } else {
        // Invoke initializeOp methods.
        mapJoinOperator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors);
        // Fixup the mapJoinTables.
        mapJoinOperator.setTestMapJoinTableContainer(1, mapJoinTableContainer, mapJoinTableContainerSerDe);
    }
    if (!isVectorOutput) {
        MapJoinTestData.driveBigTableData(testDesc, testData, mapJoinOperator);
    } else {
        MapJoinTestData.driveVectorBigTableData(testDesc, testData, mapJoinOperator);
    }
    if (!testCollectorOperator.getIsClosed()) {
        Assert.fail("collector operator not closed");
    }
    if (testCollectorOperator.getIsAborted()) {
        Assert.fail("collector operator aborted");
    }
    if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && !mapJoinDesc.isDynamicPartitionHashJoin()) {
        if (!interceptTestCollectorOperator.getIsClosed()) {
            Assert.fail("intercept collector operator not closed");
        }
        if (interceptTestCollectorOperator.getIsAborted()) {
            Assert.fail("intercept collector operator aborted");
        }
    }
    System.out.println("*BENCHMARK* executeTestImplementation row count " + testCollectorOperator.getRowCount());
    // Verify the output!
    String option = "";
    if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) {
        option = " mapJoinPlanVariation " + testDesc.mapJoinPlanVariation.name();
    }
    if (!expectedTestRowMultiSet.verify(outputTestRowMultiSet, "expected", "actual")) {
        System.out.println("*BENCHMARK* " + title + " verify failed" + " for implementation " + mapJoinImplementation + " variation " + testDesc.vectorMapJoinVariation + option);
        expectedTestRowMultiSet.displayDifferences(outputTestRowMultiSet, "expected", "actual");
    } else {
        System.out.println("*BENCHMARK* " + title + " verify succeeded " + " for implementation " + mapJoinImplementation + " variation " + testDesc.vectorMapJoinVariation + option);
    }
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) TestMultiSetVectorCollectorOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetVectorCollectorOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) CountCollectorTestOperator(org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator) CreateMapJoinResult(org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) RowTestObjectsMultiSet(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet) MapJoinTableContainerSerDe(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe) TestMultiSetCollectorOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetCollectorOperator) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) VectorizationContextRegion(org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion)

Aggregations

VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)5 VectorizationContextRegion (org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion)5 MapJoinDesc (org.apache.hadoop.hive.ql.plan.MapJoinDesc)3 VectorMapJoinDesc (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc)3 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)3 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)3 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)2 MapJoinTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)2 MapJoinTableContainerSerDe (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe)2 CountCollectorTestOperator (org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator)2 VectorMapJoinOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)2 VectorSelectDesc (org.apache.hadoop.hive.ql.plan.VectorSelectDesc)2 ArrayList (java.util.ArrayList)1 List (java.util.List)1 RowTestObjectsMultiSet (org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet)1 VectorExtractRow (org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)1 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)1 CreateMapJoinResult (org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult)1