use of org.apache.hadoop.hive.ql.exec.GroupByOperator in project hive by apache.
the class SemanticAnalyzer method genGroupByPlanMapAggr2MR.
/**
* Generate a Group-By plan using a 2 map-reduce jobs. However, only 1
* group-by plan is generated if the query involves no grouping key and no
* distincts. In that case, the plan is same as generated by
* genGroupByPlanMapAggr1MR. Otherwise, the following plan is generated: First
* perform a map side partial aggregation (to reduce the amount of data). Then
* spray by the grouping key and distinct key (or a random number, if no
* distinct is present) in hope of getting a uniform distribution, and compute
* partial aggregates grouped by the reduction key (grouping key + distinct
* key). Evaluate partial aggregates first, and spray by the grouping key to
* compute actual aggregates in the second phase.
*
* The aggregation evaluation functions are as follows:
*
* No grouping sets:
* STAGE 1
* Group by Operator:
* grouping keys: group by expressions if no DISTINCT
* grouping keys: group by expressions + distinct keys if DISTINCT
* Mapper: iterate/terminatePartial (mode = HASH)
* Partitioning Key: random() if no DISTINCT
* grouping + distinct key if DISTINCT
* Sorting Key: grouping key if no DISTINCT
* grouping + distinct key if DISTINCT
* Reducer: iterate/terminatePartial if DISTINCT
* merge/terminatePartial if NO DISTINCT (mode = MERGEPARTIAL)
* Group by Operator:
* grouping keys: group by expressions
*
* STAGE 2
* Partitioning Key: grouping key
* Sorting Key: grouping key
* Reducer: merge/terminate (mode = FINAL)
*
* In the presence of grouping sets, the aggregation evaluation functions are as follows:
* STAGE 1
* Group by Operator:
* grouping keys: group by expressions + grouping id. if no DISTINCT
* grouping keys: group by expressions + + grouping id. + distinct keys if DISTINCT
* Mapper: iterate/terminatePartial (mode = HASH)
* Partitioning Key: random() if no DISTINCT
* grouping + grouping id. + distinct key if DISTINCT
* Sorting Key: grouping key + grouping id. if no DISTINCT
* grouping + grouping id. + distinct key if DISTINCT
* Reducer: iterate/terminatePartial if DISTINCT
* merge/terminatePartial if NO DISTINCT (mode = MERGEPARTIAL)
* Group by Operator:
* grouping keys: group by expressions + grouping id.
*
* STAGE 2
* Partitioning Key: grouping key
* Sorting Key: grouping key + grouping id.
* Reducer: merge/terminate (mode = FINAL)
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanMapAggr2MR(String dest, QB qb, Operator inputOperatorInfo) throws SemanticException {
QBParseInfo parseInfo = qb.getParseInfo();
ObjectPair<List<ASTNode>, List<Long>> grpByExprsGroupingSets = getGroupByGroupingSetsForClause(parseInfo, dest);
List<ASTNode> grpByExprs = grpByExprsGroupingSets.getFirst();
List<Long> groupingSets = grpByExprsGroupingSets.getSecond();
boolean groupingSetsPresent = !groupingSets.isEmpty();
if (groupingSetsPresent) {
checkExpressionsForGroupingSet(grpByExprs, parseInfo.getDistinctFuncExprsForClause(dest), parseInfo.getAggregationExprsForClause(dest), opParseCtx.get(inputOperatorInfo).getRowResolver());
int newMRJobGroupingSetsThreshold = conf.getIntVar(HiveConf.ConfVars.HIVE_NEW_JOB_GROUPING_SET_CARDINALITY);
// Turn off skew if an additional MR job is required anyway for grouping sets.
if (groupingSets.size() > newMRJobGroupingSetsThreshold) {
String errorMsg = "The number of rows per input row due to grouping sets is " + groupingSets.size();
throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_SKEW.getMsg(errorMsg));
}
}
// ////// Generate GroupbyOperator for a map-side partial aggregation
Map<String, GenericUDAFEvaluator> genericUDAFEvaluators = new LinkedHashMap<String, GenericUDAFEvaluator>();
GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanMapGroupByOperator(qb, dest, grpByExprs, inputOperatorInfo, GroupByDesc.Mode.HASH, genericUDAFEvaluators, groupingSets, groupingSetsPresent);
groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get(inputOperatorInfo).getRowResolver().getTableNames());
// For eg: select count(1) from T where t.ds = ....
if (!optimizeMapAggrGroupBy(dest, qb)) {
List<ASTNode> distinctFuncExprs = parseInfo.getDistinctFuncExprsForClause(dest);
// ////// Generate ReduceSink Operator
Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator(qb, dest, groupByOperatorInfo, grpByExprs, distinctFuncExprs.isEmpty() ? -1 : Integer.MAX_VALUE, false, -1, true, groupingSetsPresent);
// ////// Generate GroupbyOperator for a partial aggregation
Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator1(parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIALS, genericUDAFEvaluators, groupingSets, groupingSetsPresent, false);
int numReducers = -1;
if (grpByExprs.isEmpty()) {
numReducers = 1;
}
// ////// Generate ReduceSinkOperator2
Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR(parseInfo, dest, groupByOperatorInfo2, grpByExprs.size(), numReducers, groupingSetsPresent);
// ////// Generate GroupbyOperator3
return genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo2, GroupByDesc.Mode.FINAL, genericUDAFEvaluators, groupingSetsPresent);
} else {
// If there are no grouping keys, grouping sets cannot be present
assert !groupingSetsPresent;
// ////// Generate ReduceSink Operator
Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator(qb, dest, groupByOperatorInfo, grpByExprs, grpByExprs.size(), false, 1, true, groupingSetsPresent);
return genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.FINAL, genericUDAFEvaluators, false);
}
}
use of org.apache.hadoop.hive.ql.exec.GroupByOperator in project hive by apache.
the class SemanticAnalyzer method genGroupByPlan1ReduceMultiGBY.
@SuppressWarnings({ "nls" })
private Operator genGroupByPlan1ReduceMultiGBY(List<String> dests, QB qb, Operator input, Map<String, Operator> aliasToOpInfo) throws SemanticException {
QBParseInfo parseInfo = qb.getParseInfo();
ExprNodeDesc previous = null;
Operator selectInput = input;
// In order to facilitate partition pruning, or the where clauses together and put them at the
// top of the operator tree, this could also reduce the amount of data going to the reducer
List<ExprNodeDesc.ExprNodeDescEqualityWrapper> whereExpressions = new ArrayList<ExprNodeDesc.ExprNodeDescEqualityWrapper>();
for (String dest : dests) {
ObjectPair<List<ASTNode>, List<Long>> grpByExprsGroupingSets = getGroupByGroupingSetsForClause(parseInfo, dest);
List<Long> groupingSets = grpByExprsGroupingSets.getSecond();
if (!groupingSets.isEmpty()) {
throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOMAPAGGR_MULTIGBY.getMsg());
}
ASTNode whereExpr = parseInfo.getWhrForClause(dest);
if (whereExpr != null) {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
ExprNodeDesc current = genExprNodeDesc((ASTNode) whereExpr.getChild(0), inputRR);
// Check the list of where expressions already added so they aren't duplicated
ExprNodeDesc.ExprNodeDescEqualityWrapper currentWrapped = new ExprNodeDesc.ExprNodeDescEqualityWrapper(current);
if (!whereExpressions.contains(currentWrapped)) {
whereExpressions.add(currentWrapped);
} else {
continue;
}
if (previous == null) {
// If this is the first expression
previous = current;
continue;
}
GenericUDFOPOr or = new GenericUDFOPOr();
List<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(2);
expressions.add(current);
expressions.add(previous);
ExprNodeDesc orExpr = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, or, expressions);
previous = orExpr;
} else {
// If an expression does not have a where clause, there can be no common filter
previous = null;
break;
}
}
if (previous != null) {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
FilterDesc orFilterDesc = new FilterDesc(previous, false);
orFilterDesc.setGenerated(true);
selectInput = putOpInsertMap(OperatorFactory.getAndMakeChild(orFilterDesc, new RowSchema(inputRR.getColumnInfos()), input), inputRR);
}
// insert a select operator here used by the ColumnPruner to reduce
// the data to shuffle
Operator select = genSelectAllDesc(selectInput);
// Generate ReduceSinkOperator
ReduceSinkOperator reduceSinkOperatorInfo = genCommonGroupByPlanReduceSinkOperator(qb, dests, select);
// It is assumed throughout the code that a reducer has a single child, add a
// ForwardOperator so that we can add multiple filter/group by operators as children
RowResolver reduceSinkOperatorInfoRR = opParseCtx.get(reduceSinkOperatorInfo).getRowResolver();
Operator forwardOp = putOpInsertMap(OperatorFactory.getAndMakeChild(new ForwardDesc(), new RowSchema(reduceSinkOperatorInfoRR.getColumnInfos()), reduceSinkOperatorInfo), reduceSinkOperatorInfoRR);
Operator curr = forwardOp;
for (String dest : dests) {
curr = forwardOp;
if (parseInfo.getWhrForClause(dest) != null) {
ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest);
curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, forwardOp, aliasToOpInfo, false, true);
}
// Generate GroupbyOperator
Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo, dest, curr, reduceSinkOperatorInfo, GroupByDesc.Mode.COMPLETE, null);
// TODO: should we pass curr instead of null?
curr = genPostGroupByBodyPlan(groupByOperatorInfo, dest, qb, aliasToOpInfo, null);
}
return curr;
}
use of org.apache.hadoop.hive.ql.exec.GroupByOperator in project hive by apache.
the class SemanticAnalyzer method genGroupByPlanMapAggrNoSkew.
/**
* Generate a Group-By plan using 1 map-reduce job. First perform a map-side
* partial aggregation (to reduce the amount of data), at this point of time,
* we may turn off map-side partial aggregation based on its performance. Then
* spray by the group by key, and sort by the distinct key (if any), and
* compute aggregates based on actual aggregates
*
* The aggregation evaluation functions are as follows:
*
* No grouping sets:
* Group By Operator:
* grouping keys: group by expressions if no DISTINCT
* grouping keys: group by expressions + distinct keys if DISTINCT
* Mapper: iterate/terminatePartial (mode = HASH)
* Partitioning Key: grouping key
* Sorting Key: grouping key if no DISTINCT
* grouping + distinct key if DISTINCT
* Reducer: iterate/terminate if DISTINCT
* merge/terminate if NO DISTINCT (mode MERGEPARTIAL)
*
* Grouping Sets:
* Group By Operator:
* grouping keys: group by expressions + grouping id. if no DISTINCT
* grouping keys: group by expressions + grouping id. + distinct keys if DISTINCT
* Mapper: iterate/terminatePartial (mode = HASH)
* Partitioning Key: grouping key + grouping id.
* Sorting Key: grouping key + grouping id. if no DISTINCT
* grouping + grouping id. + distinct key if DISTINCT
* Reducer: iterate/terminate if DISTINCT
* merge/terminate if NO DISTINCT (mode MERGEPARTIAL)
*
* Grouping Sets with an additional MR job introduced (distincts are not allowed):
* Group By Operator:
* grouping keys: group by expressions
* Mapper: iterate/terminatePartial (mode = HASH)
* Partitioning Key: grouping key
* Sorting Key: grouping key
* Reducer: merge/terminate (mode MERGEPARTIAL)
* Group by Operator:
* grouping keys: group by expressions + add a new grouping id. key
*
* STAGE 2
* Partitioning Key: grouping key + grouping id.
* Sorting Key: grouping key + grouping id.
* Reducer: merge/terminate (mode = FINAL)
* Group by Operator:
* grouping keys: group by expressions + grouping id.
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanMapAggrNoSkew(String dest, QB qb, Operator inputOperatorInfo) throws SemanticException {
QBParseInfo parseInfo = qb.getParseInfo();
ObjectPair<List<ASTNode>, List<Long>> grpByExprsGroupingSets = getGroupByGroupingSetsForClause(parseInfo, dest);
List<ASTNode> grpByExprs = grpByExprsGroupingSets.getFirst();
List<Long> groupingSets = grpByExprsGroupingSets.getSecond();
boolean groupingSetsPresent = !groupingSets.isEmpty();
int newMRJobGroupingSetsThreshold = conf.getIntVar(HiveConf.ConfVars.HIVE_NEW_JOB_GROUPING_SET_CARDINALITY);
if (groupingSetsPresent) {
checkExpressionsForGroupingSet(grpByExprs, parseInfo.getDistinctFuncExprsForClause(dest), parseInfo.getAggregationExprsForClause(dest), opParseCtx.get(inputOperatorInfo).getRowResolver());
}
// ////// Generate GroupbyOperator for a map-side partial aggregation
Map<String, GenericUDAFEvaluator> genericUDAFEvaluators = new LinkedHashMap<String, GenericUDAFEvaluator>();
// Is the grouping sets data consumed in the current in MR job, or
// does it need an additional MR job
boolean groupingSetsNeedAdditionalMRJob = groupingSetsPresent && groupingSets.size() > newMRJobGroupingSetsThreshold ? true : false;
GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanMapGroupByOperator(qb, dest, grpByExprs, inputOperatorInfo, GroupByDesc.Mode.HASH, genericUDAFEvaluators, groupingSets, groupingSetsPresent && !groupingSetsNeedAdditionalMRJob);
groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get(inputOperatorInfo).getRowResolver().getTableNames());
int numReducers = -1;
// needed
if (grpByExprs.isEmpty()) {
numReducers = 1;
}
// ////// Generate ReduceSink Operator
boolean isDistinct = !qb.getParseInfo().getDistinctFuncExprsForClause(dest).isEmpty();
// Distincts are not allowed with an additional mr job
if (groupingSetsNeedAdditionalMRJob && isDistinct) {
String errorMsg = "The number of rows per input row due to grouping sets is " + groupingSets.size();
throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_DISTINCTS.getMsg(errorMsg));
}
Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator(qb, dest, groupByOperatorInfo, grpByExprs, grpByExprs.size(), true, numReducers, true, groupingSetsPresent && !groupingSetsNeedAdditionalMRJob);
// Does it require a new MR job for grouping sets
if (!groupingSetsPresent || !groupingSetsNeedAdditionalMRJob) {
// on the reducer.
return genGroupByPlanGroupByOperator1(parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.MERGEPARTIAL, genericUDAFEvaluators, groupingSets, groupingSetsPresent, groupingSetsNeedAdditionalMRJob);
} else {
// Add 'n' rows corresponding to the grouping sets. For each row, create 'n' rows,
// one for each grouping set key. Since map-side aggregation has already been performed,
// the number of rows would have been reduced. Moreover, the rows corresponding to the
// grouping keys come together, so there is a higher chance of finding the rows in the hash
// table.
Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator1(parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIALS, genericUDAFEvaluators, groupingSets, groupingSetsPresent, groupingSetsNeedAdditionalMRJob);
// ////// Generate ReduceSinkOperator2
Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR(parseInfo, dest, groupByOperatorInfo2, grpByExprs.size() + 1, numReducers, groupingSetsPresent);
// ////// Generate GroupbyOperator3
return genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo2, GroupByDesc.Mode.FINAL, genericUDAFEvaluators, groupingSetsPresent);
}
}
use of org.apache.hadoop.hive.ql.exec.GroupByOperator in project hive by apache.
the class SemanticAnalyzer method genGroupByPlanGroupByOperator1.
/**
* Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
* The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
*
* @param parseInfo
* @param dest
* @param reduceSinkOperatorInfo
* @param mode
* The mode of the aggregation (MERGEPARTIAL, PARTIAL2)
* @param genericUDAFEvaluators
* The mapping from Aggregation StringTree to the
* genericUDAFEvaluator.
* @param groupingSets
* list of grouping sets
* @param groupingSetsPresent
* whether grouping sets are present in this query
* @param groupingSetsNeedAdditionalMRJob
* whether grouping sets are consumed by this group by
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, List<Long> groupingSets, boolean groupingSetsPresent, boolean groupingSetsNeedAdditionalMRJob) throws SemanticException {
ArrayList<String> outputColumnNames = new ArrayList<String>();
RowResolver groupByInputRowResolver = opParseCtx.get(reduceSinkOperatorInfo).getRowResolver();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
groupByKeys.add(new ExprNodeColumnDesc(exprInfo));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo);
addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo, groupByOutputRowResolver);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// This is only needed if a new grouping set key is being created
int groupingSetsPosition = -1;
// For grouping sets, add a dummy grouping key
if (groupingSetsPresent) {
groupingSetsPosition = groupByKeys.size();
// This function is called for GroupBy2 to add grouping id as part of the groupby keys
if (!groupingSetsNeedAdditionalMRJob) {
addGroupingSetKey(groupByKeys, groupByInputRowResolver, groupByOutputRowResolver, outputColumnNames, colExprMap);
} else {
// The grouping set has not yet been processed. Create a new grouping key
// Consider the query: select a,b, count(1) from T group by a,b with cube;
// where it is being executed in 2 map-reduce jobs
// The plan for 1st MR is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink
// GroupBy1/ReduceSink worked as if grouping sets were not present
// This function is called for GroupBy2 to create new rows for grouping sets
// For each input row (a,b), 4 rows are created for the example above:
// (a,b), (a,null), (null, b), (null, null)
createNewGroupingKey(groupByKeys, outputColumnNames, groupByOutputRowResolver, colExprMap);
}
}
HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
// get the last colName for the reduce KEY
// it represents the column name corresponding to distinct aggr, if any
String lastKeyColName = null;
List<ExprNodeDesc> reduceValues = null;
if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
List<String> inputKeyCols = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
if (inputKeyCols.size() > 0) {
lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
}
reduceValues = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getValueCols();
}
int numDistinctUDFs = 0;
boolean containsDistinctAggr = false;
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
String aggName = unescapeIdentifier(value.getChild(0).getText());
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI);
containsDistinctAggr = containsDistinctAggr || isDistinct;
// side, so always look for the parameters: d+e
if (isDistinct) {
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(paraExpr);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
if (isDistinct && lastKeyColName != null) {
// if aggr is distinct, the parameter is name is constructed as
// KEY.lastKeyColName:<tag>._colx
paraExpression = Utilities.ReduceField.KEY.name() + "." + lastKeyColName + ":" + numDistinctUDFs + "." + getColumnInternalName(i - 1);
}
ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol());
ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(paraExprInfo.getInternalName(), reduceValues);
if (reduceValue != null) {
// this parameter is a constant
expr = reduceValue;
}
aggParameters.add(expr);
}
} else {
ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(value);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol()));
}
if (isDistinct) {
numDistinctUDFs++;
}
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = null;
genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, (mode != GroupByDesc.Mode.FINAL && isDistinct), amode));
String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
// Nothing special needs to be done for grouping sets if
// this is the final group by operator, and multiple rows corresponding to the
// grouping sets have been generated upstream.
// However, if an addition MR job has been created to handle grouping sets,
// additional rows corresponding to grouping sets need to be created here.
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, groupByMemoryUsage, memoryThreshold, groupingSets, groupingSetsPresent && groupingSetsNeedAdditionalMRJob, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver.getColumnInfos()), reduceSinkOperatorInfo), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
Aggregations