Search in sources :

Example 1 with SupportedFunctionType

use of org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType in project hive by apache.

the class Vectorizer method createVectorPTFInfo.

/*
   * Create the additional vectorization PTF information needed by the VectorPTFOperator during
   * execution.
   */
private static VectorPTFInfo createVectorPTFInfo(Operator<? extends OperatorDesc> ptfOp, PTFDesc ptfDesc, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
    PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
    ArrayList<ColumnInfo> outputSignature = ptfOp.getSchema().getSignature();
    final int outputSize = outputSignature.size();
    boolean isPartitionOrderBy = vectorPTFDesc.getIsPartitionOrderBy();
    ExprNodeDesc[] orderExprNodeDescs = vectorPTFDesc.getOrderExprNodeDescs();
    ExprNodeDesc[] partitionExprNodeDescs = vectorPTFDesc.getPartitionExprNodeDescs();
    String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
    final int evaluatorCount = evaluatorFunctionNames.length;
    WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
    List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists();
    /*
     * Output columns.
     */
    int[] outputColumnProjectionMap = new int[outputSize];
    // Evaluator results are first.
    for (int i = 0; i < evaluatorCount; i++) {
        ColumnInfo colInfo = outputSignature.get(i);
        TypeInfo typeInfo = colInfo.getType();
        final int outputColumnNum;
        outputColumnNum = vContext.allocateScratchColumn(typeInfo);
        outputColumnProjectionMap[i] = outputColumnNum;
    }
    // Followed by key and non-key input columns (some may be missing).
    for (int i = evaluatorCount; i < outputSize; i++) {
        ColumnInfo colInfo = outputSignature.get(i);
        outputColumnProjectionMap[i] = vContext.getInputColumnIndex(colInfo.getInternalName());
    }
    /*
     * Partition and order by.
     */
    int[] partitionColumnMap;
    Type[] partitionColumnVectorTypes;
    VectorExpression[] partitionExpressions;
    if (!isPartitionOrderBy) {
        partitionColumnMap = null;
        partitionColumnVectorTypes = null;
        partitionExpressions = null;
    } else {
        final int partitionKeyCount = partitionExprNodeDescs.length;
        partitionColumnMap = new int[partitionKeyCount];
        partitionColumnVectorTypes = new Type[partitionKeyCount];
        partitionExpressions = new VectorExpression[partitionKeyCount];
        for (int i = 0; i < partitionKeyCount; i++) {
            VectorExpression partitionExpression = vContext.getVectorExpression(partitionExprNodeDescs[i]);
            TypeInfo typeInfo = partitionExpression.getOutputTypeInfo();
            Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
            partitionColumnVectorTypes[i] = columnVectorType;
            partitionColumnMap[i] = partitionExpression.getOutputColumnNum();
            partitionExpressions[i] = partitionExpression;
        }
    }
    final int orderKeyCount = orderExprNodeDescs.length;
    int[] orderColumnMap = new int[orderKeyCount];
    Type[] orderColumnVectorTypes = new Type[orderKeyCount];
    VectorExpression[] orderExpressions = new VectorExpression[orderKeyCount];
    for (int i = 0; i < orderKeyCount; i++) {
        VectorExpression orderExpression = vContext.getVectorExpression(orderExprNodeDescs[i]);
        TypeInfo typeInfo = orderExpression.getOutputTypeInfo();
        Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
        orderColumnVectorTypes[i] = columnVectorType;
        orderColumnMap[i] = orderExpression.getOutputColumnNum();
        orderExpressions[i] = orderExpression;
    }
    ArrayList<Integer> keyInputColumns = new ArrayList<Integer>();
    ArrayList<Integer> nonKeyInputColumns = new ArrayList<Integer>();
    determineKeyAndNonKeyInputColumnMap(outputColumnProjectionMap, isPartitionOrderBy, orderColumnMap, partitionColumnMap, evaluatorCount, keyInputColumns, nonKeyInputColumns);
    int[] keyInputColumnMap = ArrayUtils.toPrimitive(keyInputColumns.toArray(new Integer[0]));
    int[] nonKeyInputColumnMap = ArrayUtils.toPrimitive(nonKeyInputColumns.toArray(new Integer[0]));
    VectorExpression[] evaluatorInputExpressions = new VectorExpression[evaluatorCount];
    Type[] evaluatorInputColumnVectorTypes = new Type[evaluatorCount];
    for (int i = 0; i < evaluatorCount; i++) {
        String functionName = evaluatorFunctionNames[i];
        WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
        SupportedFunctionType functionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
        List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i];
        VectorExpression inputVectorExpression;
        final Type columnVectorType;
        if (exprNodeDescList != null) {
            // Validation has limited evaluatorInputExprNodeDescLists to size 1.
            ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
            // Determine input vector expression using the VectorizationContext.
            inputVectorExpression = vContext.getVectorExpression(exprNodeDesc);
            TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
            PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
            columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
        } else {
            inputVectorExpression = null;
            columnVectorType = ColumnVector.Type.NONE;
        }
        evaluatorInputExpressions[i] = inputVectorExpression;
        evaluatorInputColumnVectorTypes[i] = columnVectorType;
    }
    VectorPTFInfo vectorPTFInfo = new VectorPTFInfo();
    vectorPTFInfo.setOutputColumnMap(outputColumnProjectionMap);
    vectorPTFInfo.setPartitionColumnMap(partitionColumnMap);
    vectorPTFInfo.setPartitionColumnVectorTypes(partitionColumnVectorTypes);
    vectorPTFInfo.setPartitionExpressions(partitionExpressions);
    vectorPTFInfo.setOrderColumnMap(orderColumnMap);
    vectorPTFInfo.setOrderColumnVectorTypes(orderColumnVectorTypes);
    vectorPTFInfo.setOrderExpressions(orderExpressions);
    vectorPTFInfo.setEvaluatorInputExpressions(evaluatorInputExpressions);
    vectorPTFInfo.setEvaluatorInputColumnVectorTypes(evaluatorInputColumnVectorTypes);
    vectorPTFInfo.setKeyInputColumnMap(keyInputColumnMap);
    vectorPTFInfo.setNonKeyInputColumnMap(nonKeyInputColumnMap);
    return vectorPTFInfo;
}
Also used : VectorPTFInfo(org.apache.hadoop.hive.ql.plan.VectorPTFInfo) ArrayList(java.util.ArrayList) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) UDFToInteger(org.apache.hadoop.hive.ql.udf.UDFToInteger) InConstantType(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType) HashTableImplementationType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) VectorDeserializeType(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType) SupportedFunctionType(org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType) OperatorType(org.apache.hadoop.hive.ql.plan.api.OperatorType) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) SupportedFunctionType(org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 2 with SupportedFunctionType

use of org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType in project hive by apache.

the class Vectorizer method validatePTFOperator.

private boolean validatePTFOperator(PTFOperator op, VectorizationContext vContext, VectorPTFDesc vectorPTFDesc) throws HiveException {
    if (!isPtfVectorizationEnabled) {
        setNodeIssue("Vectorization of PTF is not enabled (" + HiveConf.ConfVars.HIVE_VECTORIZATION_PTF_ENABLED.varname + " IS false)");
        return false;
    }
    PTFDesc ptfDesc = (PTFDesc) op.getConf();
    boolean isMapSide = ptfDesc.isMapSide();
    if (isMapSide) {
        setOperatorIssue("PTF Mapper not supported");
        return false;
    }
    List<Operator<? extends OperatorDesc>> ptfParents = op.getParentOperators();
    if (ptfParents != null && ptfParents.size() > 0) {
        Operator<? extends OperatorDesc> ptfParent = op.getParentOperators().get(0);
        if (!(ptfParent instanceof ReduceSinkOperator)) {
            boolean isReduceShufflePtf = false;
            if (ptfParent instanceof SelectOperator) {
                ptfParents = ptfParent.getParentOperators();
                if (ptfParents == null || ptfParents.size() == 0) {
                    isReduceShufflePtf = true;
                } else {
                    ptfParent = ptfParent.getParentOperators().get(0);
                    isReduceShufflePtf = (ptfParent instanceof ReduceSinkOperator);
                }
            }
            if (!isReduceShufflePtf) {
                setOperatorIssue("Only PTF directly under reduce-shuffle is supported");
                return false;
            }
        }
    }
    boolean forNoop = ptfDesc.forNoop();
    if (forNoop) {
        setOperatorIssue("NOOP not supported");
        return false;
    }
    boolean forWindowing = ptfDesc.forWindowing();
    if (!forWindowing) {
        setOperatorIssue("Windowing required");
        return false;
    }
    PartitionedTableFunctionDef funcDef = ptfDesc.getFuncDef();
    boolean isWindowTableFunctionDef = (funcDef instanceof WindowTableFunctionDef);
    if (!isWindowTableFunctionDef) {
        setOperatorIssue("Must be a WindowTableFunctionDef");
        return false;
    }
    try {
        createVectorPTFDesc(op, ptfDesc, vContext, vectorPTFDesc, vectorizedPTFMaxMemoryBufferingBatchCount);
    } catch (HiveException e) {
        setOperatorIssue("exception: " + VectorizationContext.getStackTraceAsSingleLine(e));
        return false;
    }
    // Output columns ok?
    String[] outputColumnNames = vectorPTFDesc.getOutputColumnNames();
    TypeInfo[] outputTypeInfos = vectorPTFDesc.getOutputTypeInfos();
    final int outputCount = outputColumnNames.length;
    for (int i = 0; i < outputCount; i++) {
        String typeName = outputTypeInfos[i].getTypeName();
        boolean ret = validateDataType(typeName, VectorExpressionDescriptor.Mode.PROJECTION, /* allowComplex */
        false);
        if (!ret) {
            setExpressionIssue("PTF Output Columns", "Data type " + typeName + " of column " + outputColumnNames[i] + " not supported");
            return false;
        }
    }
    boolean isPartitionOrderBy = vectorPTFDesc.getIsPartitionOrderBy();
    String[] evaluatorFunctionNames = vectorPTFDesc.getEvaluatorFunctionNames();
    final int count = evaluatorFunctionNames.length;
    WindowFrameDef[] evaluatorWindowFrameDefs = vectorPTFDesc.getEvaluatorWindowFrameDefs();
    List<ExprNodeDesc>[] evaluatorInputExprNodeDescLists = vectorPTFDesc.getEvaluatorInputExprNodeDescLists();
    for (int i = 0; i < count; i++) {
        String functionName = evaluatorFunctionNames[i];
        SupportedFunctionType supportedFunctionType = VectorPTFDesc.supportedFunctionsMap.get(functionName);
        if (supportedFunctionType == null) {
            setOperatorIssue(functionName + " not in supported functions " + VectorPTFDesc.supportedFunctionNames);
            return false;
        }
        WindowFrameDef windowFrameDef = evaluatorWindowFrameDefs[i];
        if (!windowFrameDef.isStartUnbounded()) {
            setOperatorIssue(functionName + " only UNBOUNDED start frame is supported");
            return false;
        }
        switch(windowFrameDef.getWindowType()) {
            case RANGE:
                if (!windowFrameDef.getEnd().isCurrentRow()) {
                    setOperatorIssue(functionName + " only CURRENT ROW end frame is supported for RANGE");
                    return false;
                }
                break;
            case ROWS:
                if (!windowFrameDef.isEndUnbounded()) {
                    setOperatorIssue(functionName + " UNBOUNDED end frame is not supported for ROWS window type");
                    return false;
                }
                break;
            default:
                throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType());
        }
        List<ExprNodeDesc> exprNodeDescList = evaluatorInputExprNodeDescLists[i];
        if (exprNodeDescList != null && exprNodeDescList.size() > 1) {
            setOperatorIssue("More than 1 argument expression of aggregation function " + functionName);
            return false;
        }
        if (exprNodeDescList != null) {
            ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
            if (containsLeadLag(exprNodeDesc)) {
                setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName);
                return false;
            }
            if (supportedFunctionType != SupportedFunctionType.COUNT && supportedFunctionType != SupportedFunctionType.DENSE_RANK && supportedFunctionType != SupportedFunctionType.RANK) {
                // COUNT, DENSE_RANK, and RANK do not care about column types.  The rest do.
                TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
                Category category = typeInfo.getCategory();
                boolean isSupportedType;
                if (category != Category.PRIMITIVE) {
                    isSupportedType = false;
                } else {
                    ColumnVector.Type colVecType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
                    switch(colVecType) {
                        case LONG:
                        case DOUBLE:
                        case DECIMAL:
                            isSupportedType = true;
                            break;
                        default:
                            isSupportedType = false;
                            break;
                    }
                }
                if (!isSupportedType) {
                    setOperatorIssue(typeInfo.getTypeName() + " data type not supported in argument expression of aggregation function " + functionName);
                    return false;
                }
            }
        }
    }
    return true;
}
Also used : VectorReduceSinkLongOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator) VectorReduceSinkStringOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator) VectorMapJoinInnerBigOnlyMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyMultiKeyOperator) VectorMapJoinLeftSemiMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiMultiKeyOperator) VectorReduceSinkObjectHashOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkObjectHashOperator) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) VectorizationOperator(org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator) VectorMapJoinInnerMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerMultiKeyOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorPTFOperator(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator) VectorReduceSinkEmptyKeyOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkEmptyKeyOperator) VectorMapJoinInnerStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerStringOperator) VectorMapJoinOuterLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator) VectorMapJoinLeftSemiStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiStringOperator) VectorMapJoinLeftSemiLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiLongOperator) VectorReduceSinkMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator) VectorMapJoinInnerBigOnlyLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyLongOperator) VectorMapJoinInnerBigOnlyStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyStringOperator) VectorMapJoinOuterStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator) VectorMapJoinInnerLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerLongOperator) VectorMapJoinOuterMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) WindowFrameDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef) VectorPTFDesc(org.apache.hadoop.hive.ql.plan.VectorPTFDesc) PTFDesc(org.apache.hadoop.hive.ql.plan.PTFDesc) SupportedFunctionType(org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType) AbstractOperatorDesc(org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Aggregations

ArrayList (java.util.ArrayList)2 List (java.util.List)2 Type (org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 SupportedFunctionType (org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType)2 PartitionedTableFunctionDef (org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef)2 WindowFrameDef (org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef)2 UDFToString (org.apache.hadoop.hive.ql.udf.UDFToString)2 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)2 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)2 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)1 VectorMapJoinOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator)1 VectorMapJoinOuterFilteredOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator)1 InConstantType (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType)1 VectorizationOperator (org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator)1 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)1 VectorMapJoinInnerBigOnlyLongOperator (org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyLongOperator)1 VectorMapJoinInnerBigOnlyMultiKeyOperator (org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyMultiKeyOperator)1 VectorMapJoinInnerBigOnlyStringOperator (org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyStringOperator)1 VectorMapJoinInnerLongOperator (org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerLongOperator)1