Search in sources :

Example 1 with VectorAggregationDesc

use of org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc in project hive by apache.

the class Vectorizer method doVectorizeGroupByOperator.

/*
   * NOTE: The VectorGroupByDesc has already been allocated and will be updated here.
   */
private static ImmutablePair<Operator<? extends OperatorDesc>, String> doVectorizeGroupByOperator(Operator<? extends OperatorDesc> groupByOp, VectorizationContext vContext, VectorGroupByDesc vectorGroupByDesc) throws HiveException {
    GroupByDesc groupByDesc = (GroupByDesc) groupByOp.getConf();
    List<ExprNodeDesc> keysDesc = groupByDesc.getKeys();
    // For now, we don't support group by on DECIMAL_64 keys.
    VectorExpression[] vecKeyExpressions = vContext.getVectorExpressionsUpConvertDecimal64(keysDesc);
    ArrayList<AggregationDesc> aggrDesc = groupByDesc.getAggregators();
    final int size = aggrDesc.size();
    VectorAggregationDesc[] vecAggrDescs = new VectorAggregationDesc[size];
    int[] projectedOutputColumns = new int[size];
    for (int i = 0; i < size; ++i) {
        AggregationDesc aggDesc = aggrDesc.get(i);
        ImmutablePair<VectorAggregationDesc, String> pair = getVectorAggregationDesc(aggDesc, vContext);
        if (pair.left == null) {
            return new ImmutablePair<Operator<? extends OperatorDesc>, String>(null, pair.right);
        }
        vecAggrDescs[i] = pair.left;
        // GroupBy generates a new vectorized row batch...
        projectedOutputColumns[i] = i;
    }
    vectorGroupByDesc.setKeyExpressions(vecKeyExpressions);
    vectorGroupByDesc.setVecAggrDescs(vecAggrDescs);
    vectorGroupByDesc.setProjectedOutputColumns(projectedOutputColumns);
    Operator<GroupByDesc> vectorOp = OperatorFactory.getVectorOperator(groupByOp.getCompilationOpContext(), groupByDesc, vContext, vectorGroupByDesc);
    return new ImmutablePair<Operator<? extends OperatorDesc>, String>(vectorOp, null);
}
Also used : UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) VectorAggregationDesc(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) VectorAggregationDesc(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 2 with VectorAggregationDesc

use of org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc in project hive by apache.

the class Vectorizer method validateAndVectorizeOperator.

public Operator<? extends OperatorDesc> validateAndVectorizeOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, boolean isReduce, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws HiveException, VectorizerCannotVectorizeException {
    Operator<? extends OperatorDesc> vectorOp = null;
    // This "global" allows various validation methods to set the "not vectorized" reason.
    currentOperator = op;
    boolean isNative;
    try {
        switch(op.getType()) {
            case MAPJOIN:
                {
                    if (op instanceof MapJoinOperator) {
                        if (!validateMapJoinOperator((MapJoinOperator) op)) {
                            throw new VectorizerCannotVectorizeException();
                        }
                    } else if (op instanceof SMBMapJoinOperator) {
                        if (!validateSMBMapJoinOperator((SMBMapJoinOperator) op)) {
                            throw new VectorizerCannotVectorizeException();
                        }
                    } else {
                        setOperatorNotSupported(op);
                        throw new VectorizerCannotVectorizeException();
                    }
                    if (op instanceof MapJoinOperator) {
                        MapJoinDesc desc = (MapJoinDesc) op.getConf();
                        VectorMapJoinDesc vectorMapJoinDesc = new VectorMapJoinDesc();
                        boolean specialize = canSpecializeMapJoin(op, desc, isTezOrSpark, vContext, vectorMapJoinDesc);
                        if (!specialize) {
                            Class<? extends Operator<?>> opClass = null;
                            // *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered...
                            List<ExprNodeDesc> bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable());
                            boolean isOuterAndFiltered = (!desc.isNoOuterJoin() && bigTableFilters.size() > 0);
                            if (!isOuterAndFiltered) {
                                opClass = VectorMapJoinOperator.class;
                            } else {
                                opClass = VectorMapJoinOuterFilteredOperator.class;
                            }
                            vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), desc, vContext, vectorMapJoinDesc);
                            isNative = false;
                        } else {
                            // TEMPORARY Until Native Vector Map Join with Hybrid passes tests...
                            // HiveConf.setBoolVar(physicalContext.getConf(),
                            // HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false);
                            vectorOp = specializeMapJoinOperator(op, vContext, desc, vectorMapJoinDesc);
                            isNative = true;
                            if (vectorTaskColumnInfo != null) {
                                VectorMapJoinInfo vectorMapJoinInfo = vectorMapJoinDesc.getVectorMapJoinInfo();
                                if (usesVectorUDFAdaptor(vectorMapJoinDesc.getAllBigTableKeyExpressions())) {
                                    vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                                }
                                if (usesVectorUDFAdaptor(vectorMapJoinDesc.getAllBigTableValueExpressions())) {
                                    vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                                }
                            }
                        }
                    } else {
                        Preconditions.checkState(op instanceof SMBMapJoinOperator);
                        SMBJoinDesc smbJoinSinkDesc = (SMBJoinDesc) op.getConf();
                        VectorSMBJoinDesc vectorSMBJoinDesc = new VectorSMBJoinDesc();
                        vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), smbJoinSinkDesc, vContext, vectorSMBJoinDesc);
                        isNative = false;
                    }
                }
                break;
            case REDUCESINK:
                {
                    if (!validateReduceSinkOperator((ReduceSinkOperator) op)) {
                        throw new VectorizerCannotVectorizeException();
                    }
                    ReduceSinkDesc reduceDesc = (ReduceSinkDesc) op.getConf();
                    VectorReduceSinkDesc vectorReduceSinkDesc = new VectorReduceSinkDesc();
                    boolean specialize = canSpecializeReduceSink(reduceDesc, isTezOrSpark, vContext, vectorReduceSinkDesc);
                    if (!specialize) {
                        vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), reduceDesc, vContext, vectorReduceSinkDesc);
                        isNative = false;
                    } else {
                        vectorOp = specializeReduceSinkOperator(op, vContext, reduceDesc, vectorReduceSinkDesc);
                        isNative = true;
                        if (vectorTaskColumnInfo != null) {
                            VectorReduceSinkInfo vectorReduceSinkInfo = vectorReduceSinkDesc.getVectorReduceSinkInfo();
                            if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkKeyExpressions())) {
                                vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                            }
                            if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkValueExpressions())) {
                                vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                            }
                        }
                    }
                }
                break;
            case FILTER:
                {
                    if (!validateFilterOperator((FilterOperator) op)) {
                        throw new VectorizerCannotVectorizeException();
                    }
                    VectorFilterDesc vectorFilterDesc = new VectorFilterDesc();
                    vectorOp = vectorizeFilterOperator(op, vContext, vectorFilterDesc);
                    isNative = true;
                    if (vectorTaskColumnInfo != null) {
                        VectorExpression vectorPredicateExpr = vectorFilterDesc.getPredicateExpression();
                        if (usesVectorUDFAdaptor(vectorPredicateExpr)) {
                            vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                        }
                    }
                }
                break;
            case SELECT:
                {
                    if (!validateSelectOperator((SelectOperator) op)) {
                        throw new VectorizerCannotVectorizeException();
                    }
                    VectorSelectDesc vectorSelectDesc = new VectorSelectDesc();
                    vectorOp = vectorizeSelectOperator(op, vContext, vectorSelectDesc);
                    isNative = true;
                    if (vectorTaskColumnInfo != null) {
                        VectorExpression[] vectorSelectExprs = vectorSelectDesc.getSelectExpressions();
                        if (usesVectorUDFAdaptor(vectorSelectExprs)) {
                            vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                        }
                    }
                }
                break;
            case GROUPBY:
                {
                    // The validateGroupByOperator method will update vectorGroupByDesc.
                    VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc();
                    if (!validateGroupByOperator((GroupByOperator) op, isReduce, isTezOrSpark, vectorGroupByDesc)) {
                        throw new VectorizerCannotVectorizeException();
                    }
                    ImmutablePair<Operator<? extends OperatorDesc>, String> pair = doVectorizeGroupByOperator(op, vContext, vectorGroupByDesc);
                    if (pair.left == null) {
                        setOperatorIssue(pair.right);
                        throw new VectorizerCannotVectorizeException();
                    }
                    vectorOp = pair.left;
                    isNative = false;
                    if (vectorTaskColumnInfo != null) {
                        VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions();
                        if (usesVectorUDFAdaptor(vecKeyExpressions)) {
                            vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                        }
                        VectorAggregationDesc[] vecAggrDescs = vectorGroupByDesc.getVecAggrDescs();
                        for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) {
                            if (usesVectorUDFAdaptor(vecAggrDesc.getInputExpression())) {
                                vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                            }
                        }
                    }
                }
                break;
            case FILESINK:
                {
                    if (!validateFileSinkOperator((FileSinkOperator) op)) {
                        throw new VectorizerCannotVectorizeException();
                    }
                    FileSinkDesc fileSinkDesc = (FileSinkDesc) op.getConf();
                    VectorFileSinkDesc vectorFileSinkDesc = new VectorFileSinkDesc();
                    vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), fileSinkDesc, vContext, vectorFileSinkDesc);
                    isNative = false;
                }
                break;
            case LIMIT:
                {
                    // No validation.
                    LimitDesc limitDesc = (LimitDesc) op.getConf();
                    VectorLimitDesc vectorLimitDesc = new VectorLimitDesc();
                    vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), limitDesc, vContext, vectorLimitDesc);
                    isNative = true;
                }
                break;
            case EVENT:
                {
                    // No validation.
                    AppMasterEventDesc eventDesc = (AppMasterEventDesc) op.getConf();
                    VectorAppMasterEventDesc vectorEventDesc = new VectorAppMasterEventDesc();
                    vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), eventDesc, vContext, vectorEventDesc);
                    isNative = true;
                }
                break;
            case PTF:
                {
                    // The validatePTFOperator method will update vectorPTFDesc.
                    VectorPTFDesc vectorPTFDesc = new VectorPTFDesc();
                    if (!validatePTFOperator((PTFOperator) op, vContext, vectorPTFDesc)) {
                        throw new VectorizerCannotVectorizeException();
                    }
                    vectorOp = vectorizePTFOperator(op, vContext, vectorPTFDesc);
                    isNative = true;
                }
                break;
            case HASHTABLESINK:
                {
                    // No validation.
                    SparkHashTableSinkDesc sparkHashTableSinkDesc = (SparkHashTableSinkDesc) op.getConf();
                    VectorSparkHashTableSinkDesc vectorSparkHashTableSinkDesc = new VectorSparkHashTableSinkDesc();
                    vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkHashTableSinkDesc, vContext, vectorSparkHashTableSinkDesc);
                    isNative = true;
                }
                break;
            case SPARKPRUNINGSINK:
                {
                    // No validation.
                    SparkPartitionPruningSinkDesc sparkPartitionPruningSinkDesc = (SparkPartitionPruningSinkDesc) op.getConf();
                    VectorSparkPartitionPruningSinkDesc vectorSparkPartitionPruningSinkDesc = new VectorSparkPartitionPruningSinkDesc();
                    vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkPartitionPruningSinkDesc, vContext, vectorSparkPartitionPruningSinkDesc);
                    // need to maintain the unique ID so that target map works can
                    // read the output
                    ((SparkPartitionPruningSinkOperator) vectorOp).setUniqueId(((SparkPartitionPruningSinkOperator) op).getUniqueId());
                    isNative = true;
                }
                break;
            default:
                setOperatorNotSupported(op);
                throw new VectorizerCannotVectorizeException();
        }
    } catch (HiveException e) {
        setOperatorIssue(e.getMessage());
        throw new VectorizerCannotVectorizeException();
    }
    Preconditions.checkState(vectorOp != null);
    if (vectorTaskColumnInfo != null && !isNative) {
        vectorTaskColumnInfo.setAllNative(false);
    }
    LOG.debug("vectorizeOperator " + vectorOp.getClass().getName());
    LOG.debug("vectorizeOperator " + vectorOp.getConf().getClass().getName());
    return vectorOp;
}
Also used : VectorReduceSinkLongOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator) VectorReduceSinkStringOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator) VectorMapJoinInnerBigOnlyMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyMultiKeyOperator) VectorMapJoinLeftSemiMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiMultiKeyOperator) VectorReduceSinkObjectHashOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkObjectHashOperator) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) VectorizationOperator(org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator) VectorMapJoinInnerMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerMultiKeyOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorPTFOperator(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator) VectorReduceSinkEmptyKeyOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkEmptyKeyOperator) VectorMapJoinInnerStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerStringOperator) VectorMapJoinOuterLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator) VectorMapJoinLeftSemiStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiStringOperator) VectorMapJoinLeftSemiLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiLongOperator) VectorReduceSinkMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator) VectorMapJoinInnerBigOnlyLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyLongOperator) VectorMapJoinInnerBigOnlyStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyStringOperator) VectorMapJoinOuterStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator) VectorMapJoinInnerLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerLongOperator) VectorMapJoinOuterMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator) VectorAppMasterEventDesc(org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc) AppMasterEventDesc(org.apache.hadoop.hive.ql.plan.AppMasterEventDesc) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SMBJoinDesc(org.apache.hadoop.hive.ql.plan.SMBJoinDesc) VectorSMBJoinDesc(org.apache.hadoop.hive.ql.plan.VectorSMBJoinDesc) VectorFileSinkDesc(org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorFileSinkDesc(org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc) VectorReduceSinkInfo(org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo) VectorSparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkPartitionPruningSinkDesc) SparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc) VectorAppMasterEventDesc(org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc) ArrayList(java.util.ArrayList) List(java.util.List) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) VectorReduceSinkDesc(org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorFilterDesc(org.apache.hadoop.hive.ql.plan.VectorFilterDesc) SparkHashTableSinkDesc(org.apache.hadoop.hive.ql.plan.SparkHashTableSinkDesc) VectorSparkHashTableSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) VectorSparkHashTableSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc) VectorSparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkPartitionPruningSinkDesc) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) VectorSMBJoinDesc(org.apache.hadoop.hive.ql.plan.VectorSMBJoinDesc) VectorReduceSinkDesc(org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc) VectorLimitDesc(org.apache.hadoop.hive.ql.plan.VectorLimitDesc) VectorAggregationDesc(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc) VectorLimitDesc(org.apache.hadoop.hive.ql.plan.VectorLimitDesc) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) VectorPTFDesc(org.apache.hadoop.hive.ql.plan.VectorPTFDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 3 with VectorAggregationDesc

use of org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc in project hive by apache.

the class Vectorizer method getVectorAggregationDesc.

private static ImmutablePair<VectorAggregationDesc, String> getVectorAggregationDesc(AggregationDesc aggrDesc, VectorizationContext vContext) throws HiveException {
    String aggregateName = aggrDesc.getGenericUDAFName();
    ArrayList<ExprNodeDesc> parameterList = aggrDesc.getParameters();
    final int parameterCount = parameterList.size();
    final GenericUDAFEvaluator.Mode udafEvaluatorMode = aggrDesc.getMode();
    /*
     * Look at evaluator to get output type info.
     */
    GenericUDAFEvaluator evaluator = aggrDesc.getGenericUDAFEvaluator();
    ArrayList<ExprNodeDesc> parameters = aggrDesc.getParameters();
    ObjectInspector[] parameterObjectInspectors = new ObjectInspector[parameterCount];
    for (int i = 0; i < parameterCount; i++) {
        TypeInfo typeInfo = parameters.get(i).getTypeInfo();
        parameterObjectInspectors[i] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo);
    }
    // The only way to get the return object inspector (and its return type) is to
    // initialize it...
    ObjectInspector returnOI = evaluator.init(aggrDesc.getMode(), parameterObjectInspectors);
    VectorizedUDAFs annotation = AnnotationUtils.getAnnotation(evaluator.getClass(), VectorizedUDAFs.class);
    if (annotation == null) {
        String issue = "Evaluator " + evaluator.getClass().getSimpleName() + " does not have a " + "vectorized UDAF annotation (aggregation: \"" + aggregateName + "\"). " + "Vectorization not supported";
        return new ImmutablePair<VectorAggregationDesc, String>(null, issue);
    }
    final Class<? extends VectorAggregateExpression>[] vecAggrClasses = annotation.value();
    final TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(returnOI.getTypeName());
    // Not final since it may change later due to DECIMAL_64.
    ColumnVector.Type outputColVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(outputTypeInfo);
    /*
     * Determine input type info.
     */
    final TypeInfo inputTypeInfo;
    // Not final since it may change later due to DECIMAL_64.
    VectorExpression inputExpression;
    ColumnVector.Type inputColVectorType;
    if (parameterCount == 0) {
        // COUNT(*)
        inputTypeInfo = null;
        inputColVectorType = null;
        inputExpression = null;
    } else if (parameterCount == 1) {
        ExprNodeDesc exprNodeDesc = parameterList.get(0);
        inputTypeInfo = exprNodeDesc.getTypeInfo();
        if (inputTypeInfo == null) {
            String issue = "Aggregations with null parameter type not supported " + aggregateName + "(" + parameterList.toString() + ")";
            return new ImmutablePair<VectorAggregationDesc, String>(null, issue);
        }
        /*
       * Determine an *initial* input vector expression.
       *
       * Note: we may have to convert it later from DECIMAL_64 to regular decimal.
       */
        inputExpression = vContext.getVectorExpression(exprNodeDesc, VectorExpressionDescriptor.Mode.PROJECTION);
        if (inputExpression == null) {
            String issue = "Parameter expression " + exprNodeDesc.toString() + " not supported " + aggregateName + "(" + parameterList.toString() + ")";
            return new ImmutablePair<VectorAggregationDesc, String>(null, issue);
        }
        if (inputExpression.getOutputTypeInfo() == null) {
            String issue = "Parameter expression " + exprNodeDesc.toString() + " with null type not supported " + aggregateName + "(" + parameterList.toString() + ")";
            return new ImmutablePair<VectorAggregationDesc, String>(null, issue);
        }
        inputColVectorType = inputExpression.getOutputColumnVectorType();
    } else {
        // No multi-parameter aggregations supported.
        String issue = "Aggregations with > 1 parameter are not supported " + aggregateName + "(" + parameterList.toString() + ")";
        return new ImmutablePair<VectorAggregationDesc, String>(null, issue);
    }
    /*
     * When we have DECIMAL_64 as the input parameter then we have to see if there is a special
     * vector UDAF for it.  If not we will need to convert the input parameter.
     */
    if (inputTypeInfo != null && inputColVectorType == ColumnVector.Type.DECIMAL_64) {
        if (outputColVectorType == ColumnVector.Type.DECIMAL) {
            DecimalTypeInfo outputDecimalTypeInfo = (DecimalTypeInfo) outputTypeInfo;
            if (HiveDecimalWritable.isPrecisionDecimal64(outputDecimalTypeInfo.getPrecision())) {
                // Try with DECIMAL_64 input and DECIMAL_64 output.
                final Class<? extends VectorAggregateExpression> vecAggrClass = findVecAggrClass(vecAggrClasses, aggregateName, inputColVectorType, ColumnVector.Type.DECIMAL_64, udafEvaluatorMode);
                if (vecAggrClass != null) {
                    final VectorAggregationDesc vecAggrDesc = new VectorAggregationDesc(aggrDesc, evaluator, inputTypeInfo, inputColVectorType, inputExpression, outputTypeInfo, ColumnVector.Type.DECIMAL_64, vecAggrClass);
                    return new ImmutablePair<VectorAggregationDesc, String>(vecAggrDesc, null);
                }
            }
            // Try with regular DECIMAL output type.
            final Class<? extends VectorAggregateExpression> vecAggrClass = findVecAggrClass(vecAggrClasses, aggregateName, inputColVectorType, outputColVectorType, udafEvaluatorMode);
            if (vecAggrClass != null) {
                final VectorAggregationDesc vecAggrDesc = new VectorAggregationDesc(aggrDesc, evaluator, inputTypeInfo, inputColVectorType, inputExpression, outputTypeInfo, outputColVectorType, vecAggrClass);
                return new ImmutablePair<VectorAggregationDesc, String>(vecAggrDesc, null);
            }
            // No support for DECIMAL_64 input.  We must convert.
            inputExpression = vContext.wrapWithDecimal64ToDecimalConversion(inputExpression);
            inputColVectorType = ColumnVector.Type.DECIMAL;
        // Fall through...
        } else {
            // Try with with DECIMAL_64 input and desired output type.
            final Class<? extends VectorAggregateExpression> vecAggrClass = findVecAggrClass(vecAggrClasses, aggregateName, inputColVectorType, outputColVectorType, udafEvaluatorMode);
            if (vecAggrClass != null) {
                final VectorAggregationDesc vecAggrDesc = new VectorAggregationDesc(aggrDesc, evaluator, inputTypeInfo, inputColVectorType, inputExpression, outputTypeInfo, outputColVectorType, vecAggrClass);
                return new ImmutablePair<VectorAggregationDesc, String>(vecAggrDesc, null);
            }
            // No support for DECIMAL_64 input.  We must convert.
            inputExpression = vContext.wrapWithDecimal64ToDecimalConversion(inputExpression);
            inputColVectorType = ColumnVector.Type.DECIMAL;
        // Fall through...
        }
    }
    /*
     * Look for normal match.
     */
    Class<? extends VectorAggregateExpression> vecAggrClass = findVecAggrClass(vecAggrClasses, aggregateName, inputColVectorType, outputColVectorType, udafEvaluatorMode);
    if (vecAggrClass != null) {
        final VectorAggregationDesc vecAggrDesc = new VectorAggregationDesc(aggrDesc, evaluator, inputTypeInfo, inputColVectorType, inputExpression, outputTypeInfo, outputColVectorType, vecAggrClass);
        return new ImmutablePair<VectorAggregationDesc, String>(vecAggrDesc, null);
    }
    // No match?
    String issue = "Vector aggregation : \"" + aggregateName + "\" " + "for input type: " + (inputColVectorType == null ? "any" : "\"" + inputColVectorType) + "\" " + "and output type: \"" + outputColVectorType + "\" " + "and mode: " + udafEvaluatorMode + " not supported for " + "evaluator " + evaluator.getClass().getSimpleName();
    return new ImmutablePair<VectorAggregationDesc, String>(null, issue);
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) VectorAggregateExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) VectorizedUDAFs(org.apache.hadoop.hive.ql.exec.vector.VectorizedUDAFs) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) VectorAggregationDesc(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 4 with VectorAggregationDesc

use of org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc in project hive by apache.

the class TestVectorizer method testAggregateOnUDF.

@Test
public void testAggregateOnUDF() throws HiveException, VectorizerCannotVectorizeException {
    ExprNodeColumnDesc colExprA = new ExprNodeColumnDesc(Integer.class, "col1", "T", false);
    ExprNodeColumnDesc colExprB = new ExprNodeColumnDesc(Integer.class, "col2", "T", false);
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    children.add(colExprA);
    ExprNodeGenericFuncDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, new GenericUDFAbs(), children);
    ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
    params.add(exprNodeDesc);
    List<ObjectInspector> paramOIs = new ArrayList<ObjectInspector>();
    paramOIs.add(exprNodeDesc.getWritableObjectInspector());
    AggregationDesc aggDesc = new AggregationDesc("sum", FunctionRegistry.getGenericUDAFEvaluator("sum", paramOIs, false, false), params, false, GenericUDAFEvaluator.Mode.PARTIAL1);
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    outputColumnNames.add("_col0");
    GroupByDesc desc = new GroupByDesc();
    VectorGroupByDesc vectorDesc = new VectorGroupByDesc();
    vectorDesc.setProcessingMode(ProcessingMode.HASH);
    vectorDesc.setVecAggrDescs(new VectorAggregationDesc[] { new VectorAggregationDesc(aggDesc, new GenericUDAFSum.GenericUDAFSumLong(), TypeInfoFactory.longTypeInfo, ColumnVector.Type.LONG, null, TypeInfoFactory.longTypeInfo, ColumnVector.Type.LONG, VectorUDAFCountStar.class) });
    desc.setOutputColumnNames(outputColumnNames);
    ArrayList<AggregationDesc> aggDescList = new ArrayList<AggregationDesc>();
    aggDescList.add(aggDesc);
    desc.setAggregators(aggDescList);
    ArrayList<ExprNodeDesc> grpByKeys = new ArrayList<ExprNodeDesc>();
    grpByKeys.add(colExprB);
    desc.setKeys(grpByKeys);
    Operator<? extends OperatorDesc> gbyOp = OperatorFactory.get(new CompilationOpContext(), desc);
    desc.setMode(GroupByDesc.Mode.HASH);
    VectorizationContext ctx = new VectorizationContext("name", Arrays.asList(new String[] { "col1", "col2" }));
    Vectorizer v = new Vectorizer();
    v.testSetCurrentBaseWork(new MapWork());
    VectorGroupByOperator vectorOp = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(gbyOp, ctx, vectorDesc);
    Assert.assertEquals(VectorUDAFSumLong.class, vectorDesc.getVecAggrDescs()[0].getVecAggrClass());
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) VectorAggregationDesc(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByOperator(org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator) VectorAggregationDesc(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc) VectorUDAFCountStar(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar) GenericUDAFSumLong(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum.GenericUDAFSumLong) Test(org.junit.Test)

Aggregations

VectorAggregationDesc (org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc)4 ImmutablePair (org.apache.commons.lang3.tuple.ImmutablePair)3 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)3 ArrayList (java.util.ArrayList)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 VectorGroupByDesc (org.apache.hadoop.hive.ql.plan.VectorGroupByDesc)2 UDFToString (org.apache.hadoop.hive.ql.udf.UDFToString)2 List (java.util.List)1 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)1 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)1 Type (org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type)1 VectorGroupByOperator (org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator)1 VectorMapJoinOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator)1 VectorMapJoinOuterFilteredOperator (org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator)1 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)1 VectorizationOperator (org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator)1 VectorizedUDAFs (org.apache.hadoop.hive.ql.exec.vector.VectorizedUDAFs)1 VectorAggregateExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression)1 VectorUDAFCountStar (org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar)1 VectorMapJoinInnerBigOnlyLongOperator (org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyLongOperator)1