Search in sources :

Example 36 with AggregationDesc

use of org.apache.hadoop.hive.ql.plan.AggregationDesc in project hive by apache.

the class TestVectorGroupByOperator method buildGroupByDescType.

private static Pair<GroupByDesc, VectorGroupByDesc> buildGroupByDescType(VectorizationContext ctx, String aggregate, GenericUDAFEvaluator.Mode mode, String column, TypeInfo dataType) {
    AggregationDesc agg = buildAggregationDesc(ctx, aggregate, mode, column, dataType);
    ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
    aggs.add(agg);
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    outputColumnNames.add("_col0");
    GroupByDesc desc = new GroupByDesc();
    VectorGroupByDesc vectorDesc = new VectorGroupByDesc();
    desc.setOutputColumnNames(outputColumnNames);
    desc.setAggregators(aggs);
    vectorDesc.setProcessingMode(ProcessingMode.GLOBAL);
    return new Pair<GroupByDesc, VectorGroupByDesc>(desc, vectorDesc);
}
Also used : ArrayList(java.util.ArrayList) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) Pair(org.apache.calcite.util.Pair)

Example 37 with AggregationDesc

use of org.apache.hadoop.hive.ql.plan.AggregationDesc in project hive by apache.

the class SemanticAnalyzer method genMapGroupByForSemijoin.

private Operator genMapGroupByForSemijoin(List<ASTNode> fields, Operator<?> input) throws SemanticException {
    RowResolver groupByInputRowResolver = opParseCtx.get(input).getRowResolver();
    RowResolver groupByOutputRowResolver = new RowResolver();
    List<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    List<String> outputColumnNames = new ArrayList<String>();
    List<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < fields.size(); ++i) {
        // get the group by keys to ColumnInfo
        ASTNode colName = fields.get(i);
        String[] nm;
        String[] nm2;
        ExprNodeDesc grpByExprNode = genExprNodeDesc(colName, groupByInputRowResolver);
        if (grpByExprNode instanceof ExprNodeColumnDesc) {
            // In most of the cases, this is a column reference
            ExprNodeColumnDesc columnExpr = (ExprNodeColumnDesc) grpByExprNode;
            nm = groupByInputRowResolver.reverseLookup(columnExpr.getColumn());
            nm2 = groupByInputRowResolver.getAlternateMappings(columnExpr.getColumn());
        } else if (grpByExprNode instanceof ExprNodeConstantDesc) {
            // However, it can be a constant too. In that case, we need to track
            // the column that it originated from in the input operator so we can
            // propagate the aliases.
            ExprNodeConstantDesc constantExpr = (ExprNodeConstantDesc) grpByExprNode;
            String inputCol = constantExpr.getFoldedFromCol();
            nm = groupByInputRowResolver.reverseLookup(inputCol);
            nm2 = groupByInputRowResolver.getAlternateMappings(inputCol);
        } else {
            // of the left semijoin
            return input;
        }
        groupByKeys.add(grpByExprNode);
        // generate output column names
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false);
        groupByOutputRowResolver.put(nm[0], nm[1], colInfo2);
        if (nm2 != null) {
            groupByOutputRowResolver.addMappingOnly(nm2[0], nm2[1], colInfo2);
        }
        groupByOutputRowResolver.putExpression(colName, colInfo2);
        // establish mapping from the output column to the input column
        colExprMap.put(field, grpByExprNode);
    }
    // Generate group-by operator
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    float minReductionHashAggr = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
    float minReductionHashAggrLowerBound = HiveConf.getFloatVar(conf, ConfVars.HIVEMAPAGGRHASHMINREDUCTIONLOWERBOUND);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.HASH, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, minReductionHashAggr, minReductionHashAggrLowerBound, null, false, -1, false), new RowSchema(groupByOutputRowResolver.getColumnInfos()), input), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 38 with AggregationDesc

use of org.apache.hadoop.hive.ql.plan.AggregationDesc in project hive by apache.

the class SemanticAnalyzer method genGroupByPlanGroupByOperator1.

/**
 * Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
 * The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
 *
 * @param parseInfo
 * @param dest
 * @param reduceSinkOperatorInfo
 * @param mode
 *          The mode of the aggregation (MERGEPARTIAL, PARTIAL2)
 * @param genericUDAFEvaluators
 *          The mapping from Aggregation StringTree to the
 *          genericUDAFEvaluator.
 * @param groupingSets
 *          list of grouping sets
 * @param groupingSetsPresent
 *          whether grouping sets are present in this query
 * @param groupingSetsNeedAdditionalMRJob
 *          whether grouping sets are consumed by this group by
 * @return the new GroupByOperator
 */
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, List<Long> groupingSets, boolean groupingSetsPresent, boolean groupingSetsNeedAdditionalMRJob) throws SemanticException {
    List<String> outputColumnNames = new ArrayList<String>();
    RowResolver groupByInputRowResolver = opParseCtx.get(reduceSinkOperatorInfo).getRowResolver();
    RowResolver groupByOutputRowResolver = new RowResolver();
    groupByOutputRowResolver.setIsExprResolver(true);
    List<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    List<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < grpByExprs.size(); ++i) {
        ASTNode grpbyExpr = grpByExprs.get(i);
        ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);
        if (exprInfo == null) {
            throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_COLUMN.getMsg(), grpbyExpr));
        }
        groupByKeys.add(new ExprNodeColumnDesc(exprInfo));
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
        groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo);
        addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo, groupByOutputRowResolver);
        colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
    }
    // This is only needed if a new grouping set key is being created
    int groupingSetsPosition = -1;
    // For grouping sets, add a dummy grouping key
    if (groupingSetsPresent) {
        groupingSetsPosition = groupByKeys.size();
        // This function is called for GroupBy2 to add grouping id as part of the groupby keys
        if (!groupingSetsNeedAdditionalMRJob) {
            addGroupingSetKey(groupByKeys, groupByInputRowResolver, groupByOutputRowResolver, outputColumnNames, colExprMap);
        } else {
            // The grouping set has not yet been processed. Create a new grouping key
            // Consider the query: select a,b, count(1) from T group by a,b with cube;
            // where it is being executed in 2 map-reduce jobs
            // The plan for 1st MR is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink
            // GroupBy1/ReduceSink worked as if grouping sets were not present
            // This function is called for GroupBy2 to create new rows for grouping sets
            // For each input row (a,b), 4 rows are created for the example above:
            // (a,b), (a,null), (null, b), (null, null)
            createNewGroupingKey(groupByKeys, outputColumnNames, groupByOutputRowResolver, colExprMap);
        }
    }
    Map<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
    // get the last colName for the reduce KEY
    // it represents the column name corresponding to distinct aggr, if any
    String lastKeyColName = null;
    List<ExprNodeDesc> reduceValues = null;
    if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
        List<String> inputKeyCols = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
        if (inputKeyCols.size() > 0) {
            lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
        }
        reduceValues = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getValueCols();
    }
    int numDistinctUDFs = 0;
    boolean containsDistinctAggr = false;
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
        ASTNode value = entry.getValue();
        String aggName = unescapeIdentifier(value.getChild(0).getText());
        List<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI);
        containsDistinctAggr = containsDistinctAggr || isDistinct;
        // side, so always look for the parameters: d+e
        if (isDistinct) {
            // 0 is the function name
            for (int i = 1; i < value.getChildCount(); i++) {
                ASTNode paraExpr = (ASTNode) value.getChild(i);
                ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(paraExpr);
                if (paraExprInfo == null) {
                    throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_COLUMN.getMsg(), paraExpr));
                }
                String paraExpression = paraExprInfo.getInternalName();
                assert (paraExpression != null);
                if (lastKeyColName != null) {
                    // if aggr is distinct, the parameter is name is constructed as
                    // KEY.lastKeyColName:<tag>._colx
                    paraExpression = Utilities.ReduceField.KEY.name() + "." + lastKeyColName + ":" + numDistinctUDFs + "." + getColumnInternalName(i - 1);
                }
                ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol());
                ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(paraExprInfo.getInternalName(), reduceValues);
                if (reduceValue != null) {
                    // this parameter is a constant
                    expr = reduceValue;
                }
                aggParameters.add(expr);
            }
        } else {
            ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(value);
            if (paraExprInfo == null) {
                throw new SemanticException(ASTErrorUtils.getMsg(ErrorMsg.INVALID_COLUMN.getMsg(), value));
            }
            String paraExpression = paraExprInfo.getInternalName();
            assert (paraExpression != null);
            aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol()));
        }
        if (isDistinct) {
            numDistinctUDFs++;
        }
        Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
        GenericUDAFEvaluator genericUDAFEvaluator = null;
        genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
        assert (genericUDAFEvaluator != null);
        GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
        aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, (mode != GroupByDesc.Mode.FINAL && isDistinct), amode));
        String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
        outputColumnNames.add(field);
        groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    float minReductionHashAggr = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
    float minReductionHashAggrLowerBound = HiveConf.getFloatVar(conf, ConfVars.HIVEMAPAGGRHASHMINREDUCTIONLOWERBOUND);
    // Nothing special needs to be done for grouping sets if
    // this is the final group by operator, and multiple rows corresponding to the
    // grouping sets have been generated upstream.
    // However, if an addition MR job has been created to handle grouping sets,
    // additional rows corresponding to grouping sets need to be created here.
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, groupByMemoryUsage, memoryThreshold, minReductionHashAggr, minReductionHashAggrLowerBound, groupingSets, groupingSetsPresent && groupingSetsNeedAdditionalMRJob, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver.getColumnInfos()), reduceSinkOperatorInfo), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap)

Example 39 with AggregationDesc

use of org.apache.hadoop.hive.ql.plan.AggregationDesc in project hive by apache.

the class SemiJoinReductionMerge method createGroupBy.

/**
 * Creates a group by operator with min, max, and bloomFilter aggregations for every column of the parent.
 * <p>
 * The method generates two kind of group by operators for intermediate and final aggregations respectively.
 * Intermediate aggregations require the parent operator to be a select operator while final aggregations assume that
 * the parent is a reduce sink operator.
 * </p>
 * <p>
 * Intermediate group by example.
 * <pre>
 * Input: SEL[fname, lname, age, hash(fname,lname,age)]
 * Output: GBY[min(fname),max(fname),min(lname),max(lname),min(age),max(age),bloom(hash)]
 * </pre>
 * </p>
 * <p>
 * Final group by example.
 * <pre>
 * Input: RS[fname_min,fname_max,lname_min,lname_max,age_min,age_max, hash_bloom]
 * Output: GBY[min(fname_min),max(fname_max),min(lname_min),max(lname_max),min(age_min),max(age_max),bloom(hash_bloom)]
 * </pre>
 * </p>
 */
private static GroupByOperator createGroupBy(SelectOperator selectOp, Operator<?> parentOp, GroupByDesc.Mode gbMode, long bloomEntriesHint, HiveConf hiveConf) {
    final List<ExprNodeDesc> params;
    final GenericUDAFEvaluator.Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(gbMode, false);
    switch(gbMode) {
        case FINAL:
            params = createGroupByAggregationParameters((ReduceSinkOperator) parentOp);
            break;
        case HASH:
            params = createGroupByAggregationParameters(selectOp);
            break;
        default:
            throw new AssertionError(gbMode.toString() + " is not supported");
    }
    List<AggregationDesc> gbAggs = new ArrayList<>();
    Deque<ExprNodeDesc> paramsCopy = new ArrayDeque<>(params);
    while (paramsCopy.size() > 1) {
        gbAggs.add(minAggregation(udafMode, paramsCopy.poll()));
        gbAggs.add(maxAggregation(udafMode, paramsCopy.poll()));
    }
    gbAggs.add(bloomFilterAggregation(udafMode, paramsCopy.poll(), selectOp, bloomEntriesHint, hiveConf));
    assert paramsCopy.size() == 0;
    List<String> gbOutputNames = new ArrayList<>(gbAggs.size());
    List<ColumnInfo> gbColInfos = new ArrayList<>(gbAggs.size());
    for (int i = 0; i < params.size(); i++) {
        String colName = HiveConf.getColumnInternalName(i);
        gbOutputNames.add(colName);
        final TypeInfo colType;
        if (i == params.size() - 1) {
            // Bloom type
            colType = TypeInfoFactory.binaryTypeInfo;
        } else {
            // Min/Max type
            colType = params.get(i).getTypeInfo();
        }
        gbColInfos.add(new ColumnInfo(colName, colType, "", false));
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(hiveConf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(hiveConf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    float minReductionHashAggr = HiveConf.getFloatVar(hiveConf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
    float minReductionHashAggrLowerBound = HiveConf.getFloatVar(hiveConf, HiveConf.ConfVars.HIVEMAPAGGRHASHMINREDUCTIONLOWERBOUND);
    GroupByDesc groupBy = new GroupByDesc(gbMode, gbOutputNames, Collections.emptyList(), gbAggs, false, groupByMemoryUsage, memoryThreshold, minReductionHashAggr, minReductionHashAggrLowerBound, null, false, -1, false);
    groupBy.setColumnExprMap(Collections.emptyMap());
    return (GroupByOperator) OperatorFactory.getAndMakeChild(groupBy, new RowSchema(gbColInfos), parentOp);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ArrayDeque(java.util.ArrayDeque) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 40 with AggregationDesc

use of org.apache.hadoop.hive.ql.plan.AggregationDesc in project hive by apache.

the class SemiJoinReductionMerge method bloomFilterAggregation.

private static AggregationDesc bloomFilterAggregation(GenericUDAFEvaluator.Mode mode, ExprNodeDesc col, SelectOperator source, long numEntriesHint, HiveConf conf) {
    GenericUDAFBloomFilterEvaluator bloomFilterEval = new GenericUDAFBloomFilterEvaluator();
    bloomFilterEval.setSourceOperator(source);
    bloomFilterEval.setMaxEntries(conf.getLongVar(HiveConf.ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES));
    bloomFilterEval.setMinEntries(conf.getLongVar(HiveConf.ConfVars.TEZ_MIN_BLOOM_FILTER_ENTRIES));
    bloomFilterEval.setFactor(conf.getFloatVar(HiveConf.ConfVars.TEZ_BLOOM_FILTER_FACTOR));
    bloomFilterEval.setHintEntries(numEntriesHint);
    List<ExprNodeDesc> p = Collections.singletonList(col);
    AggregationDesc bloom = new AggregationDesc("bloom_filter", bloomFilterEval, p, false, mode);
    // It is necessary to set the bloom filter evaluator otherwise there are runtime failures see HIVE-24018
    bloom.setGenericUDAFWritableEvaluator(bloomFilterEval);
    return bloom;
}
Also used : GenericUDAFBloomFilterEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

AggregationDesc (org.apache.hadoop.hive.ql.plan.AggregationDesc)40 ArrayList (java.util.ArrayList)36 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)36 GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)33 HashMap (java.util.HashMap)26 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)25 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)23 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)23 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)23 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)21 Operator (org.apache.hadoop.hive.ql.exec.Operator)19 Mode (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode)16 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)14 GenericUDAFEvaluator (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator)14 Map (java.util.Map)12 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)12 LinkedHashMap (java.util.LinkedHashMap)11 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)11 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)11 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)11