Search in sources :

Example 1 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class Vectorizer method canSpecializeMapJoin.

private boolean canSpecializeMapJoin(Operator<? extends OperatorDesc> op, MapJoinDesc desc, boolean isTezOrSpark, VectorizationContext vContext, VectorMapJoinInfo vectorMapJoinInfo) throws HiveException {
    Preconditions.checkState(op instanceof MapJoinOperator);
    // Allocate a VectorReduceSinkDesc initially with implementation type NONE so EXPLAIN
    // can report this operator was vectorized, but not native.  And, the conditions.
    VectorMapJoinDesc vectorDesc = new VectorMapJoinDesc();
    desc.setVectorDesc(vectorDesc);
    boolean isVectorizationMapJoinNativeEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED);
    String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
    boolean oneMapJoinCondition = (desc.getConds().length == 1);
    boolean hasNullSafes = onExpressionHasNullSafes(desc);
    byte posBigTable = (byte) desc.getPosBigTable();
    // Since we want to display all the met and not met conditions in EXPLAIN, we determine all
    // information first....
    List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable);
    VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressions(keyDesc);
    final int allBigTableKeyExpressionsLength = allBigTableKeyExpressions.length;
    // Assume.
    boolean supportsKeyTypes = true;
    HashSet<String> notSupportedKeyTypes = new HashSet<String>();
    // Since a key expression can be a calculation and the key will go into a scratch column,
    // we need the mapping and type information.
    int[] bigTableKeyColumnMap = new int[allBigTableKeyExpressionsLength];
    String[] bigTableKeyColumnNames = new String[allBigTableKeyExpressionsLength];
    TypeInfo[] bigTableKeyTypeInfos = new TypeInfo[allBigTableKeyExpressionsLength];
    ArrayList<VectorExpression> bigTableKeyExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] bigTableKeyExpressions;
    for (int i = 0; i < allBigTableKeyExpressionsLength; i++) {
        VectorExpression ve = allBigTableKeyExpressions[i];
        if (!IdentityExpression.isColumnOnly(ve)) {
            bigTableKeyExpressionsList.add(ve);
        }
        bigTableKeyColumnMap[i] = ve.getOutputColumn();
        ExprNodeDesc exprNode = keyDesc.get(i);
        bigTableKeyColumnNames[i] = exprNode.toString();
        TypeInfo typeInfo = exprNode.getTypeInfo();
        // same check used in HashTableLoader.
        if (!MapJoinKey.isSupportedField(typeInfo)) {
            supportsKeyTypes = false;
            Category category = typeInfo.getCategory();
            notSupportedKeyTypes.add((category != Category.PRIMITIVE ? category.toString() : ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory().toString()));
        }
        bigTableKeyTypeInfos[i] = typeInfo;
    }
    if (bigTableKeyExpressionsList.size() == 0) {
        bigTableKeyExpressions = null;
    } else {
        bigTableKeyExpressions = bigTableKeyExpressionsList.toArray(new VectorExpression[0]);
    }
    List<ExprNodeDesc> bigTableExprs = desc.getExprs().get(posBigTable);
    VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(bigTableExprs);
    boolean isFastHashTableEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED);
    // Especially since LLAP is prone to turn it off in the MapJoinDesc in later
    // physical optimizer stages...
    boolean isHybridHashJoin = desc.isHybridHashJoin();
    /*
     * Populate vectorMapJoininfo.
     */
    /*
     * Similarly, we need a mapping since a value expression can be a calculation and the value
     * will go into a scratch column.
     */
    int[] bigTableValueColumnMap = new int[allBigTableValueExpressions.length];
    String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length];
    TypeInfo[] bigTableValueTypeInfos = new TypeInfo[allBigTableValueExpressions.length];
    ArrayList<VectorExpression> bigTableValueExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] bigTableValueExpressions;
    for (int i = 0; i < bigTableValueColumnMap.length; i++) {
        VectorExpression ve = allBigTableValueExpressions[i];
        if (!IdentityExpression.isColumnOnly(ve)) {
            bigTableValueExpressionsList.add(ve);
        }
        bigTableValueColumnMap[i] = ve.getOutputColumn();
        ExprNodeDesc exprNode = bigTableExprs.get(i);
        bigTableValueColumnNames[i] = exprNode.toString();
        bigTableValueTypeInfos[i] = exprNode.getTypeInfo();
    }
    if (bigTableValueExpressionsList.size() == 0) {
        bigTableValueExpressions = null;
    } else {
        bigTableValueExpressions = bigTableValueExpressionsList.toArray(new VectorExpression[0]);
    }
    vectorMapJoinInfo.setBigTableKeyColumnMap(bigTableKeyColumnMap);
    vectorMapJoinInfo.setBigTableKeyColumnNames(bigTableKeyColumnNames);
    vectorMapJoinInfo.setBigTableKeyTypeInfos(bigTableKeyTypeInfos);
    vectorMapJoinInfo.setBigTableKeyExpressions(bigTableKeyExpressions);
    vectorMapJoinInfo.setBigTableValueColumnMap(bigTableValueColumnMap);
    vectorMapJoinInfo.setBigTableValueColumnNames(bigTableValueColumnNames);
    vectorMapJoinInfo.setBigTableValueTypeInfos(bigTableValueTypeInfos);
    vectorMapJoinInfo.setBigTableValueExpressions(bigTableValueExpressions);
    /*
     * Small table information.
     */
    VectorColumnOutputMapping bigTableRetainedMapping = new VectorColumnOutputMapping("Big Table Retained Mapping");
    VectorColumnOutputMapping bigTableOuterKeyMapping = new VectorColumnOutputMapping("Big Table Outer Key Mapping");
    // The order of the fields in the LazyBinary small table value must be used, so
    // we use the source ordering flavor for the mapping.
    VectorColumnSourceMapping smallTableMapping = new VectorColumnSourceMapping("Small Table Mapping");
    Byte[] order = desc.getTagOrder();
    Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]);
    boolean isOuterJoin = !desc.getNoOuterJoin();
    /*
     * Gather up big and small table output result information from the MapJoinDesc.
     */
    List<Integer> bigTableRetainList = desc.getRetainList().get(posBigTable);
    int bigTableRetainSize = bigTableRetainList.size();
    int[] smallTableIndices;
    int smallTableIndicesSize;
    List<ExprNodeDesc> smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable);
    if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) {
        smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable);
        smallTableIndicesSize = smallTableIndices.length;
    } else {
        smallTableIndices = null;
        smallTableIndicesSize = 0;
    }
    List<Integer> smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable);
    int smallTableRetainSize = smallTableRetainList.size();
    int smallTableResultSize = 0;
    if (smallTableIndicesSize > 0) {
        smallTableResultSize = smallTableIndicesSize;
    } else if (smallTableRetainSize > 0) {
        smallTableResultSize = smallTableRetainSize;
    }
    /*
     * Determine the big table retained mapping first so we can optimize out (with
     * projection) copying inner join big table keys in the subsequent small table results section.
     */
    // We use a mapping object here so we can build the projection in any order and
    // get the ordered by 0 to n-1 output columns at the end.
    //
    // Also, to avoid copying a big table key into the small table result area for inner joins,
    // we reference it with the projection so there can be duplicate output columns
    // in the projection.
    VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping");
    int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize);
    for (int i = 0; i < bigTableRetainSize; i++) {
        // Since bigTableValueExpressions may do a calculation and produce a scratch column, we
        // need to map to the right batch column.
        int retainColumn = bigTableRetainList.get(i);
        int batchColumnIndex = bigTableValueColumnMap[retainColumn];
        TypeInfo typeInfo = bigTableValueTypeInfos[i];
        // With this map we project the big table batch to make it look like an output batch.
        projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo);
        // Collect columns we copy from the big table batch to the overflow batch.
        if (!bigTableRetainedMapping.containsOutputColumn(batchColumnIndex)) {
            // Tolerate repeated use of a big table column.
            bigTableRetainedMapping.add(batchColumnIndex, batchColumnIndex, typeInfo);
        }
        nextOutputColumn++;
    }
    /*
     * Now determine the small table results.
     */
    boolean smallTableExprVectorizes = true;
    int firstSmallTableOutputColumn;
    firstSmallTableOutputColumn = (order[0] == posBigTable ? bigTableRetainSize : 0);
    int smallTableOutputCount = 0;
    nextOutputColumn = firstSmallTableOutputColumn;
    // Small table indices has more information (i.e. keys) than retain, so use it if it exists...
    String[] bigTableRetainedNames;
    if (smallTableIndicesSize > 0) {
        smallTableOutputCount = smallTableIndicesSize;
        bigTableRetainedNames = new String[smallTableOutputCount];
        for (int i = 0; i < smallTableIndicesSize; i++) {
            if (smallTableIndices[i] >= 0) {
                // Zero and above numbers indicate a big table key is needed for
                // small table result "area".
                int keyIndex = smallTableIndices[i];
                // Since bigTableKeyExpressions may do a calculation and produce a scratch column, we
                // need to map the right column.
                int batchKeyColumn = bigTableKeyColumnMap[keyIndex];
                bigTableRetainedNames[i] = bigTableKeyColumnNames[keyIndex];
                TypeInfo typeInfo = bigTableKeyTypeInfos[keyIndex];
                if (!isOuterJoin) {
                    // Optimize inner join keys of small table results.
                    // Project the big table key into the small table result "area".
                    projectionMapping.add(nextOutputColumn, batchKeyColumn, typeInfo);
                    if (!bigTableRetainedMapping.containsOutputColumn(batchKeyColumn)) {
                        // If necessary, copy the big table key into the overflow batch's small table
                        // result "area".
                        bigTableRetainedMapping.add(batchKeyColumn, batchKeyColumn, typeInfo);
                    }
                } else {
                    // For outer joins, since the small table key can be null when there is no match,
                    // we must have a physical (scratch) column for those keys.  We cannot use the
                    // projection optimization used by inner joins above.
                    int scratchColumn = vContext.allocateScratchColumn(typeInfo);
                    projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
                    bigTableRetainedMapping.add(batchKeyColumn, scratchColumn, typeInfo);
                    bigTableOuterKeyMapping.add(batchKeyColumn, scratchColumn, typeInfo);
                }
            } else {
                // Negative numbers indicate a column to be (deserialize) read from the small table's
                // LazyBinary value row.
                int smallTableValueIndex = -smallTableIndices[i] - 1;
                ExprNodeDesc smallTableExprNode = smallTableExprs.get(i);
                if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) {
                    clearNotVectorizedReason();
                    smallTableExprVectorizes = false;
                }
                bigTableRetainedNames[i] = smallTableExprNode.toString();
                TypeInfo typeInfo = smallTableExprNode.getTypeInfo();
                // Make a new big table scratch column for the small table value.
                int scratchColumn = vContext.allocateScratchColumn(typeInfo);
                projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
                smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo);
            }
            nextOutputColumn++;
        }
    } else if (smallTableRetainSize > 0) {
        smallTableOutputCount = smallTableRetainSize;
        bigTableRetainedNames = new String[smallTableOutputCount];
        for (int i = 0; i < smallTableRetainSize; i++) {
            int smallTableValueIndex = smallTableRetainList.get(i);
            ExprNodeDesc smallTableExprNode = smallTableExprs.get(i);
            if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) {
                clearNotVectorizedReason();
                smallTableExprVectorizes = false;
            }
            bigTableRetainedNames[i] = smallTableExprNode.toString();
            // Make a new big table scratch column for the small table value.
            TypeInfo typeInfo = smallTableExprNode.getTypeInfo();
            int scratchColumn = vContext.allocateScratchColumn(typeInfo);
            projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
            smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo);
            nextOutputColumn++;
        }
    } else {
        bigTableRetainedNames = new String[0];
    }
    boolean useOptimizedTable = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
    // Remember the condition variables for EXPLAIN regardless of whether we specialize or not.
    vectorDesc.setUseOptimizedTable(useOptimizedTable);
    vectorDesc.setIsVectorizationMapJoinNativeEnabled(isVectorizationMapJoinNativeEnabled);
    vectorDesc.setEngine(engine);
    vectorDesc.setOneMapJoinCondition(oneMapJoinCondition);
    vectorDesc.setHasNullSafes(hasNullSafes);
    vectorDesc.setSmallTableExprVectorizes(smallTableExprVectorizes);
    vectorDesc.setIsFastHashTableEnabled(isFastHashTableEnabled);
    vectorDesc.setIsHybridHashJoin(isHybridHashJoin);
    vectorDesc.setSupportsKeyTypes(supportsKeyTypes);
    if (!supportsKeyTypes) {
        vectorDesc.setNotSupportedKeyTypes(new ArrayList(notSupportedKeyTypes));
    }
    // Check common conditions for both Optimized and Fast Hash Tables.
    // Assume.
    boolean result = true;
    if (!useOptimizedTable || !isVectorizationMapJoinNativeEnabled || !isTezOrSpark || !oneMapJoinCondition || hasNullSafes || !smallTableExprVectorizes) {
        result = false;
    }
    if (!isFastHashTableEnabled) {
        // Check optimized-only hash table restrictions.
        if (!supportsKeyTypes) {
            result = false;
        }
    } else {
        if (isHybridHashJoin) {
            result = false;
        }
    }
    // Convert dynamic arrays and maps to simple arrays.
    bigTableRetainedMapping.finalize();
    bigTableOuterKeyMapping.finalize();
    smallTableMapping.finalize();
    vectorMapJoinInfo.setBigTableRetainedMapping(bigTableRetainedMapping);
    vectorMapJoinInfo.setBigTableOuterKeyMapping(bigTableOuterKeyMapping);
    vectorMapJoinInfo.setSmallTableMapping(smallTableMapping);
    projectionMapping.finalize();
    // Verify we added an entry for each output.
    assert projectionMapping.isSourceSequenceGood();
    vectorMapJoinInfo.setProjectionMapping(projectionMapping);
    return result;
}
Also used : VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) ArrayList(java.util.ArrayList) VectorColumnOutputMapping(org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VectorColumnSourceMapping(org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UDFToInteger(org.apache.hadoop.hive.ql.udf.UDFToInteger) UDFToByte(org.apache.hadoop.hive.ql.udf.UDFToByte) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 2 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class SemanticAnalyzer method genGroupByPlanGroupByOperator1.

/**
   * Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
   * The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
   *
   * @param parseInfo
   * @param dest
   * @param reduceSinkOperatorInfo
   * @param mode
   *          The mode of the aggregation (MERGEPARTIAL, PARTIAL2)
   * @param genericUDAFEvaluators
   *          The mapping from Aggregation StringTree to the
   *          genericUDAFEvaluator.
   * @param groupingSets
   *          list of grouping sets
   * @param groupingSetsPresent
   *          whether grouping sets are present in this query
   * @param groupingSetsNeedAdditionalMRJob
   *          whether grouping sets are consumed by this group by
   * @return the new GroupByOperator
   */
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, List<Integer> groupingSets, boolean groupingSetsPresent, boolean groupingSetsNeedAdditionalMRJob) throws SemanticException {
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    RowResolver groupByInputRowResolver = opParseCtx.get(reduceSinkOperatorInfo).getRowResolver();
    RowResolver groupByOutputRowResolver = new RowResolver();
    groupByOutputRowResolver.setIsExprResolver(true);
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < grpByExprs.size(); ++i) {
        ASTNode grpbyExpr = grpByExprs.get(i);
        ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);
        if (exprInfo == null) {
            throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
        }
        groupByKeys.add(new ExprNodeColumnDesc(exprInfo));
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
        groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo);
        addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo, groupByOutputRowResolver);
        colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
    }
    // This is only needed if a new grouping set key is being created
    int groupingSetsPosition = -1;
    // For grouping sets, add a dummy grouping key
    if (groupingSetsPresent) {
        groupingSetsPosition = groupByKeys.size();
        // This function is called for GroupBy2 to add grouping id as part of the groupby keys
        if (!groupingSetsNeedAdditionalMRJob) {
            addGroupingSetKey(groupByKeys, groupByInputRowResolver, groupByOutputRowResolver, outputColumnNames, colExprMap);
        } else {
            // The grouping set has not yet been processed. Create a new grouping key
            // Consider the query: select a,b, count(1) from T group by a,b with cube;
            // where it is being executed in 2 map-reduce jobs
            // The plan for 1st MR is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink
            // GroupBy1/ReduceSink worked as if grouping sets were not present
            // This function is called for GroupBy2 to create new rows for grouping sets
            // For each input row (a,b), 4 rows are created for the example above:
            // (a,b), (a,null), (null, b), (null, null)
            createNewGroupingKey(groupByKeys, outputColumnNames, groupByOutputRowResolver, colExprMap);
        }
    }
    HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
    // get the last colName for the reduce KEY
    // it represents the column name corresponding to distinct aggr, if any
    String lastKeyColName = null;
    List<ExprNodeDesc> reduceValues = null;
    if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
        List<String> inputKeyCols = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
        if (inputKeyCols.size() > 0) {
            lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
        }
        reduceValues = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getValueCols();
    }
    int numDistinctUDFs = 0;
    boolean containsDistinctAggr = false;
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
        ASTNode value = entry.getValue();
        String aggName = unescapeIdentifier(value.getChild(0).getText());
        ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI);
        containsDistinctAggr = containsDistinctAggr || isDistinct;
        // side, so always look for the parameters: d+e
        if (isDistinct) {
            // 0 is the function name
            for (int i = 1; i < value.getChildCount(); i++) {
                ASTNode paraExpr = (ASTNode) value.getChild(i);
                ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(paraExpr);
                if (paraExprInfo == null) {
                    throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr));
                }
                String paraExpression = paraExprInfo.getInternalName();
                assert (paraExpression != null);
                if (isDistinct && lastKeyColName != null) {
                    // if aggr is distinct, the parameter is name is constructed as
                    // KEY.lastKeyColName:<tag>._colx
                    paraExpression = Utilities.ReduceField.KEY.name() + "." + lastKeyColName + ":" + numDistinctUDFs + "." + getColumnInternalName(i - 1);
                }
                ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol());
                ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(paraExprInfo.getInternalName(), reduceValues);
                if (reduceValue != null) {
                    // this parameter is a constant
                    expr = reduceValue;
                }
                aggParameters.add(expr);
            }
        } else {
            ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(value);
            if (paraExprInfo == null) {
                throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
            }
            String paraExpression = paraExprInfo.getInternalName();
            assert (paraExpression != null);
            aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol()));
        }
        if (isDistinct) {
            numDistinctUDFs++;
        }
        Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
        GenericUDAFEvaluator genericUDAFEvaluator = null;
        genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
        assert (genericUDAFEvaluator != null);
        GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
        aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, (mode != GroupByDesc.Mode.FINAL && isDistinct), amode));
        String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
        outputColumnNames.add(field);
        groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    // Nothing special needs to be done for grouping sets if
    // this is the final group by operator, and multiple rows corresponding to the
    // grouping sets have been generated upstream.
    // However, if an addition MR job has been created to handle grouping sets,
    // additional rows corresponding to grouping sets need to be created here.
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, groupByMemoryUsage, memoryThreshold, groupingSets, groupingSetsPresent && groupingSetsNeedAdditionalMRJob, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver.getColumnInfos()), reduceSinkOperatorInfo), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap)

Example 3 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class SemanticAnalyzer method genGroupByPlanMapGroupByOperator.

/**
   * Generate the map-side GroupByOperator for the Query Block
   * (qb.getParseInfo().getXXX(dest)). The new GroupByOperator will be a child
   * of the inputOperatorInfo.
   *
   * @param mode
   *          The mode of the aggregation (HASH)
   * @param genericUDAFEvaluators
   *          If not null, this function will store the mapping from Aggregation
   *          StringTree to the genericUDAFEvaluator in this parameter, so it
   *          can be used in the next-stage GroupBy aggregations.
   * @return the new GroupByOperator
   */
@SuppressWarnings("nls")
private Operator genGroupByPlanMapGroupByOperator(QB qb, String dest, List<ASTNode> grpByExprs, Operator inputOperatorInfo, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, List<Integer> groupingSetKeys, boolean groupingSetsPresent) throws SemanticException {
    RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver();
    QBParseInfo parseInfo = qb.getParseInfo();
    RowResolver groupByOutputRowResolver = new RowResolver();
    groupByOutputRowResolver.setIsExprResolver(true);
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < grpByExprs.size(); ++i) {
        ASTNode grpbyExpr = grpByExprs.get(i);
        ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, groupByInputRowResolver);
        if ((grpByExprNode instanceof ExprNodeColumnDesc) && ExprNodeDescUtils.indexOf(grpByExprNode, groupByKeys) >= 0) {
            // Skip duplicated grouping keys, it happens when define column alias.
            grpByExprs.remove(i--);
            continue;
        }
        groupByKeys.add(grpByExprNode);
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        groupByOutputRowResolver.putExpression(grpbyExpr, new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false));
        colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
    }
    // The grouping set key is present after the grouping keys, before the distinct keys
    int groupingSetsPosition = -1;
    // for the grouping set (corresponding to the rollup).
    if (groupingSetsPresent) {
        groupingSetsPosition = groupByKeys.size();
        createNewGroupingKey(groupByKeys, outputColumnNames, groupByOutputRowResolver, colExprMap);
    }
    // If there is a distinctFuncExp, add all parameters to the reduceKeys.
    if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) {
        List<ASTNode> list = parseInfo.getDistinctFuncExprsForClause(dest);
        for (ASTNode value : list) {
            // 0 is function name
            for (int i = 1; i < value.getChildCount(); i++) {
                ASTNode parameter = (ASTNode) value.getChild(i);
                if (groupByOutputRowResolver.getExpression(parameter) == null) {
                    ExprNodeDesc distExprNode = genExprNodeDesc(parameter, groupByInputRowResolver);
                    groupByKeys.add(distExprNode);
                    String field = getColumnInternalName(groupByKeys.size() - 1);
                    outputColumnNames.add(field);
                    groupByOutputRowResolver.putExpression(parameter, new ColumnInfo(field, distExprNode.getTypeInfo(), "", false));
                    colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
                }
            }
        }
    }
    // For each aggregation
    HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
    assert (aggregationTrees != null);
    boolean containsDistinctAggr = false;
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
        ASTNode value = entry.getValue();
        String aggName = unescapeIdentifier(value.getChild(0).getText());
        ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        // 0 is the function name
        for (int i = 1; i < value.getChildCount(); i++) {
            ASTNode paraExpr = (ASTNode) value.getChild(i);
            ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver);
            aggParameters.add(paraExprNode);
        }
        boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
        containsDistinctAggr = containsDistinctAggr || isDistinct;
        boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
        Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
        GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value, isDistinct, isAllColumns);
        assert (genericUDAFEvaluator != null);
        GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
        aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, amode));
        String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
        outputColumnNames.add(field);
        if (groupByOutputRowResolver.getExpression(value) == null) {
            groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
        }
        // GroupByOperators
        if (genericUDAFEvaluators != null) {
            genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
        }
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, groupingSetKeys, groupingSetsPresent, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver.getColumnInfos()), inputOperatorInfo), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap)

Example 4 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class VectorizationContext method getAggregatorExpression.

public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc) throws HiveException {
    ArrayList<ExprNodeDesc> paramDescList = desc.getParameters();
    VectorExpression[] vectorParams = new VectorExpression[paramDescList.size()];
    for (int i = 0; i < paramDescList.size(); ++i) {
        ExprNodeDesc exprDesc = paramDescList.get(i);
        vectorParams[i] = this.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
    }
    String aggregateName = desc.getGenericUDAFName();
    VectorExpressionDescriptor.ArgumentType inputType = VectorExpressionDescriptor.ArgumentType.NONE;
    if (paramDescList.size() > 0) {
        ExprNodeDesc inputExpr = paramDescList.get(0);
        inputType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(inputExpr.getTypeString());
        if (inputType == VectorExpressionDescriptor.ArgumentType.NONE) {
            throw new HiveException("No vector argument type for Hive type name " + inputExpr.getTypeString());
        }
    }
    GenericUDAFEvaluator.Mode udafEvaluatorMode = desc.getMode();
    for (AggregateDefinition aggDef : aggregatesDefinition) {
        if (aggregateName.equalsIgnoreCase(aggDef.getName()) && ((aggDef.getType() == VectorExpressionDescriptor.ArgumentType.NONE && inputType == VectorExpressionDescriptor.ArgumentType.NONE) || (aggDef.getType().isSameTypeOrFamily(inputType)))) {
            // A null means all modes are ok.
            GenericUDAFEvaluator.Mode aggDefUdafEvaluatorMode = aggDef.getUdafEvaluatorMode();
            if (aggDefUdafEvaluatorMode != null && aggDefUdafEvaluatorMode != udafEvaluatorMode) {
                continue;
            }
            Class<? extends VectorAggregateExpression> aggClass = aggDef.getAggClass();
            try {
                Constructor<? extends VectorAggregateExpression> ctor = aggClass.getConstructor(VectorExpression.class);
                VectorAggregateExpression aggExpr = ctor.newInstance(vectorParams.length > 0 ? vectorParams[0] : null);
                aggExpr.init(desc);
                return aggExpr;
            } catch (Exception e) {
                throw new HiveException("Internal exception for vector aggregate : \"" + aggregateName + "\" for type: \"" + inputType + "", e);
            }
        }
    }
    throw new HiveException("Vector aggregate not implemented: \"" + aggregateName + "\" for type: \"" + inputType.name() + " (UDAF evaluator mode = " + (udafEvaluatorMode == null ? "NULL" : udafEvaluatorMode.name()) + ")");
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorAggregateExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression) VectorUDAFMaxString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString) VectorUDAFMinString(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArgumentType(org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode)

Example 5 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class ConvertAstToSearchArg method findLiteral.

/**
   * Find the child that is the literal.
   * @param expr the parent node to check
   * @param type the type of the expression
   * @return the literal boxed if found or null
   */
private static Object findLiteral(Configuration conf, ExprNodeGenericFuncDesc expr, PredicateLeaf.Type type) {
    List<ExprNodeDesc> children = expr.getChildren();
    if (children.size() != 2) {
        return null;
    }
    Object result = null;
    for (ExprNodeDesc child : children) {
        Object currentResult = getLiteral(conf, child, type);
        if (currentResult != null) {
            // Both children in the expression should not be literal
            if (result != null) {
                return null;
            }
            result = currentResult;
        }
    }
    return result;
}
Also used : ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)585 ArrayList (java.util.ArrayList)318 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)264 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)181 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)167 HashMap (java.util.HashMap)137 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)137 Test (org.junit.Test)109 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)103 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)97 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)92 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)82 List (java.util.List)79 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)76 LinkedHashMap (java.util.LinkedHashMap)75 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)72 Operator (org.apache.hadoop.hive.ql.exec.Operator)70 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)61 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)61 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)57