use of org.apache.hadoop.hive.ql.plan.AggregationDesc in project hive by apache.
the class SemanticAnalyzer method genGroupByPlanGroupByOperator1.
/**
* Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
* The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
*
* @param parseInfo
* @param dest
* @param reduceSinkOperatorInfo
* @param mode
* The mode of the aggregation (MERGEPARTIAL, PARTIAL2)
* @param genericUDAFEvaluators
* The mapping from Aggregation StringTree to the
* genericUDAFEvaluator.
* @param groupingSets
* list of grouping sets
* @param groupingSetsPresent
* whether grouping sets are present in this query
* @param groupingSetsNeedAdditionalMRJob
* whether grouping sets are consumed by this group by
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, List<Integer> groupingSets, boolean groupingSetsPresent, boolean groupingSetsNeedAdditionalMRJob) throws SemanticException {
ArrayList<String> outputColumnNames = new ArrayList<String>();
RowResolver groupByInputRowResolver = opParseCtx.get(reduceSinkOperatorInfo).getRowResolver();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
groupByKeys.add(new ExprNodeColumnDesc(exprInfo));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo);
addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo, groupByOutputRowResolver);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// This is only needed if a new grouping set key is being created
int groupingSetsPosition = -1;
// For grouping sets, add a dummy grouping key
if (groupingSetsPresent) {
groupingSetsPosition = groupByKeys.size();
// This function is called for GroupBy2 to add grouping id as part of the groupby keys
if (!groupingSetsNeedAdditionalMRJob) {
addGroupingSetKey(groupByKeys, groupByInputRowResolver, groupByOutputRowResolver, outputColumnNames, colExprMap);
} else {
// The grouping set has not yet been processed. Create a new grouping key
// Consider the query: select a,b, count(1) from T group by a,b with cube;
// where it is being executed in 2 map-reduce jobs
// The plan for 1st MR is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink
// GroupBy1/ReduceSink worked as if grouping sets were not present
// This function is called for GroupBy2 to create new rows for grouping sets
// For each input row (a,b), 4 rows are created for the example above:
// (a,b), (a,null), (null, b), (null, null)
createNewGroupingKey(groupByKeys, outputColumnNames, groupByOutputRowResolver, colExprMap);
}
}
HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
// get the last colName for the reduce KEY
// it represents the column name corresponding to distinct aggr, if any
String lastKeyColName = null;
List<ExprNodeDesc> reduceValues = null;
if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
List<String> inputKeyCols = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
if (inputKeyCols.size() > 0) {
lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
}
reduceValues = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getValueCols();
}
int numDistinctUDFs = 0;
boolean containsDistinctAggr = false;
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
String aggName = unescapeIdentifier(value.getChild(0).getText());
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI);
containsDistinctAggr = containsDistinctAggr || isDistinct;
// side, so always look for the parameters: d+e
if (isDistinct) {
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(paraExpr);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
if (isDistinct && lastKeyColName != null) {
// if aggr is distinct, the parameter is name is constructed as
// KEY.lastKeyColName:<tag>._colx
paraExpression = Utilities.ReduceField.KEY.name() + "." + lastKeyColName + ":" + numDistinctUDFs + "." + getColumnInternalName(i - 1);
}
ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol());
ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(paraExprInfo.getInternalName(), reduceValues);
if (reduceValue != null) {
// this parameter is a constant
expr = reduceValue;
}
aggParameters.add(expr);
}
} else {
ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(value);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol()));
}
if (isDistinct) {
numDistinctUDFs++;
}
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = null;
genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, (mode != GroupByDesc.Mode.FINAL && isDistinct), amode));
String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
// Nothing special needs to be done for grouping sets if
// this is the final group by operator, and multiple rows corresponding to the
// grouping sets have been generated upstream.
// However, if an addition MR job has been created to handle grouping sets,
// additional rows corresponding to grouping sets need to be created here.
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, groupByMemoryUsage, memoryThreshold, groupingSets, groupingSetsPresent && groupingSetsNeedAdditionalMRJob, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver.getColumnInfos()), reduceSinkOperatorInfo), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
use of org.apache.hadoop.hive.ql.plan.AggregationDesc in project hive by apache.
the class SemanticAnalyzer method genGroupByPlanMapGroupByOperator.
/**
* Generate the map-side GroupByOperator for the Query Block
* (qb.getParseInfo().getXXX(dest)). The new GroupByOperator will be a child
* of the inputOperatorInfo.
*
* @param mode
* The mode of the aggregation (HASH)
* @param genericUDAFEvaluators
* If not null, this function will store the mapping from Aggregation
* StringTree to the genericUDAFEvaluator in this parameter, so it
* can be used in the next-stage GroupBy aggregations.
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanMapGroupByOperator(QB qb, String dest, List<ASTNode> grpByExprs, Operator inputOperatorInfo, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, List<Integer> groupingSetKeys, boolean groupingSetsPresent) throws SemanticException {
RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver();
QBParseInfo parseInfo = qb.getParseInfo();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
ArrayList<String> outputColumnNames = new ArrayList<String>();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, groupByInputRowResolver);
if ((grpByExprNode instanceof ExprNodeColumnDesc) && ExprNodeDescUtils.indexOf(grpByExprNode, groupByKeys) >= 0) {
// Skip duplicated grouping keys, it happens when define column alias.
grpByExprs.remove(i--);
continue;
}
groupByKeys.add(grpByExprNode);
String field = getColumnInternalName(i);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(grpbyExpr, new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// The grouping set key is present after the grouping keys, before the distinct keys
int groupingSetsPosition = -1;
// for the grouping set (corresponding to the rollup).
if (groupingSetsPresent) {
groupingSetsPosition = groupByKeys.size();
createNewGroupingKey(groupByKeys, outputColumnNames, groupByOutputRowResolver, colExprMap);
}
// If there is a distinctFuncExp, add all parameters to the reduceKeys.
if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) {
List<ASTNode> list = parseInfo.getDistinctFuncExprsForClause(dest);
for (ASTNode value : list) {
// 0 is function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode parameter = (ASTNode) value.getChild(i);
if (groupByOutputRowResolver.getExpression(parameter) == null) {
ExprNodeDesc distExprNode = genExprNodeDesc(parameter, groupByInputRowResolver);
groupByKeys.add(distExprNode);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(parameter, new ColumnInfo(field, distExprNode.getTypeInfo(), "", false));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
}
}
}
// For each aggregation
HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
assert (aggregationTrees != null);
boolean containsDistinctAggr = false;
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
String aggName = unescapeIdentifier(value.getChild(0).getText());
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver);
aggParameters.add(paraExprNode);
}
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
containsDistinctAggr = containsDistinctAggr || isDistinct;
boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value, isDistinct, isAllColumns);
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, amode));
String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
outputColumnNames.add(field);
if (groupByOutputRowResolver.getExpression(value) == null) {
groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
}
// GroupByOperators
if (genericUDAFEvaluators != null) {
genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
}
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, groupingSetKeys, groupingSetsPresent, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver.getColumnInfos()), inputOperatorInfo), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
use of org.apache.hadoop.hive.ql.plan.AggregationDesc in project hive by apache.
the class RewriteQueryUsingAggregateIndexCtx method replaceGroupByOperatorProcess.
/**
* We need to replace the count(indexed_column_key) GenericUDAF aggregation
* function for group-by construct to "sum" GenericUDAF. This method creates a
* new operator tree for a sample query that creates a GroupByOperator with
* sum aggregation function and uses that GroupByOperator information to
* replace the original GroupByOperator aggregation information. It replaces
* the AggregationDesc (aggregation descriptor) of the old GroupByOperator
* with the new Aggregation Desc of the new GroupByOperator.
* @return
*/
private void replaceGroupByOperatorProcess(GroupByOperator operator, int index) throws SemanticException {
RewriteQueryUsingAggregateIndexCtx rewriteQueryCtx = this;
// We need to replace the GroupByOperator which is before RS
if (index == 0) {
// the query contains the sum aggregation GenericUDAF
String selReplacementCommand = "select sum(`" + rewriteQueryCtx.getAggregateFunction() + "`)" + " from `" + rewriteQueryCtx.getIndexName() + "` group by " + rewriteQueryCtx.getIndexKey() + " ";
// retrieve the operator tree for the query, and the required GroupByOperator from it
Operator<?> newOperatorTree = RewriteParseContextGenerator.generateOperatorTree(rewriteQueryCtx.getParseContext().getQueryState(), selReplacementCommand);
// we get our new GroupByOperator here
GroupByOperator newGbyOperator = OperatorUtils.findLastOperatorUpstream(newOperatorTree, GroupByOperator.class);
if (newGbyOperator == null) {
throw new SemanticException("Error replacing GroupBy operator.");
}
// we need this information to set the correct colList, outputColumnNames
// in SelectOperator
ExprNodeColumnDesc aggrExprNode = null;
// Construct the new AggregationDesc to get rid of the current
// internal names and replace them with new internal names
// as required by the operator tree
GroupByDesc newConf = newGbyOperator.getConf();
List<AggregationDesc> newAggrList = newConf.getAggregators();
if (newAggrList != null && newAggrList.size() > 0) {
for (AggregationDesc aggregationDesc : newAggrList) {
rewriteQueryCtx.setEval(aggregationDesc.getGenericUDAFEvaluator());
aggrExprNode = (ExprNodeColumnDesc) aggregationDesc.getParameters().get(0);
rewriteQueryCtx.setAggrExprNode(aggrExprNode);
}
}
// Now the GroupByOperator has the new AggregationList;
// sum(`_count_of_indexed_key`)
// instead of count(indexed_key)
GroupByDesc oldConf = operator.getConf();
oldConf.setAggregators((ArrayList<AggregationDesc>) newAggrList);
operator.setConf(oldConf);
} else {
// we just need to reset the GenericUDAFEvaluator and its name for this
// GroupByOperator whose parent is the ReduceSinkOperator
GroupByDesc childConf = operator.getConf();
List<AggregationDesc> childAggrList = childConf.getAggregators();
if (childAggrList != null && childAggrList.size() > 0) {
for (AggregationDesc aggregationDesc : childAggrList) {
List<ExprNodeDesc> paraList = aggregationDesc.getParameters();
List<ObjectInspector> parametersOIList = new ArrayList<ObjectInspector>();
for (ExprNodeDesc expr : paraList) {
parametersOIList.add(expr.getWritableObjectInspector());
}
GenericUDAFEvaluator evaluator = FunctionRegistry.getGenericUDAFEvaluator("sum", parametersOIList, false, false);
aggregationDesc.setGenericUDAFEvaluator(evaluator);
aggregationDesc.setGenericUDAFName("sum");
}
}
}
}
use of org.apache.hadoop.hive.ql.plan.AggregationDesc in project hive by apache.
the class HiveGBOpConvUtil method genMapSideGB.
@SuppressWarnings("unchecked")
private static OpAttr genMapSideGB(OpAttr inputOpAf, GBInfo gbAttrs) throws SemanticException {
ArrayList<String> outputColNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
Set<String> gbKeyColsAsNamesFrmIn = new HashSet<String>();
String colOutputName = null;
// 1. Build GB Keys, grouping set starting position
// 1.1 First Add original GB Keys
ArrayList<ExprNodeDesc> gbKeys = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < gbAttrs.gbKeys.size(); i++) {
gbKeys.add(gbAttrs.gbKeys.get(i));
colOutputName = SemanticAnalyzer.getColumnInternalName(i);
colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.gbKeyTypes.get(i), "", false));
outputColNames.add(colOutputName);
gbKeyColsAsNamesFrmIn.add(gbAttrs.gbKeyColNamesInInput.get(i));
colExprMap.put(colOutputName, gbKeys.get(i));
}
// 1.2. Adjust GroupingSet Position, GBKeys for GroupingSet Position if
// needed. NOTE: GroupingID is added to map side GB only if we don't GrpSet
// doesn't require additional MR Jobs
int groupingSetsPosition = -1;
boolean inclGrpID = inclGrpSetInMapSide(gbAttrs);
if (inclGrpID) {
groupingSetsPosition = gbKeys.size();
addGrpSetCol(true, null, false, gbKeys, outputColNames, colInfoLst, colExprMap);
}
// gen would have prevented it)
for (int i = 0; i < gbAttrs.distExprNodes.size(); i++) {
if (!gbKeyColsAsNamesFrmIn.contains(gbAttrs.distExprNames.get(i))) {
gbKeys.add(gbAttrs.distExprNodes.get(i));
colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() - 1);
colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.distExprTypes.get(i), "", false));
outputColNames.add(colOutputName);
gbKeyColsAsNamesFrmIn.add(gbAttrs.distExprNames.get(i));
colExprMap.put(colOutputName, gbKeys.get(gbKeys.size() - 1));
}
}
// 2. Build Aggregations
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
for (UDAFAttrs udafAttr : gbAttrs.udafAttrs) {
Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.HASH, udafAttr.isDistinctUDAF);
aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udafAttr.udafEvaluator, udafAttr.udafParams, udafAttr.isDistinctUDAF, amode));
GenericUDAFInfo udafInfo;
try {
udafInfo = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, amode, udafAttr.udafParams);
} catch (SemanticException e) {
throw new RuntimeException(e);
}
colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1);
colInfoLst.add(new ColumnInfo(colOutputName, udafInfo.returnType, "", false));
outputColNames.add(colOutputName);
}
// 3. Create GB
@SuppressWarnings("rawtypes") Operator gbOp = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.HASH, outputColNames, gbKeys, aggregations, false, gbAttrs.groupByMemoryUsage, gbAttrs.memoryThreshold, gbAttrs.grpSets, inclGrpID, groupingSetsPosition, gbAttrs.containsDistinctAggr), new RowSchema(colInfoLst), inputOpAf.inputs.get(0));
// 5. Setup Expr Col Map
// NOTE: UDAF is not included in ExprColMap
gbOp.setColumnExprMap(colExprMap);
return new OpAttr("", new HashSet<Integer>(), gbOp);
}
use of org.apache.hadoop.hive.ql.plan.AggregationDesc in project hive by apache.
the class SemanticAnalyzer method genGroupByPlanGroupByOperator2MR.
/**
* Generate the second GroupByOperator for the Group By Plan
* (parseInfo.getXXX(dest)). The new GroupByOperator will do the second
* aggregation based on the partial aggregation results.
*
* @param mode
* the mode of aggregation (FINAL)
* @param genericUDAFEvaluators
* The mapping from Aggregation StringTree to the
* genericUDAFEvaluator.
* @return the new GroupByOperator
* @throws SemanticException
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo2, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, boolean groupingSetsPresent) throws SemanticException {
RowResolver groupByInputRowResolver2 = opParseCtx.get(reduceSinkOperatorInfo2).getRowResolver();
RowResolver groupByOutputRowResolver2 = new RowResolver();
groupByOutputRowResolver2.setIsExprResolver(true);
ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
ArrayList<String> outputColumnNames = new ArrayList<String>();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver2.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
String expression = exprInfo.getInternalName();
groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), expression, exprInfo.getTabAlias(), exprInfo.getIsVirtualCol()));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
groupByOutputRowResolver2.putExpression(grpbyExpr, oColInfo);
addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo2, groupByOutputRowResolver2);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
int groupingSetsPosition = -1;
// For grouping sets, add a dummy grouping key
if (groupingSetsPresent) {
groupingSetsPosition = groupByKeys.size();
addGroupingSetKey(groupByKeys, groupByInputRowResolver2, groupByOutputRowResolver2, outputColumnNames, colExprMap);
}
HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
boolean containsDistinctAggr = false;
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
ASTNode value = entry.getValue();
ColumnInfo paraExprInfo = groupByInputRowResolver2.getExpression(value);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol()));
String aggName = unescapeIdentifier(value.getChild(0).getText());
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
containsDistinctAggr = containsDistinctAggr || isDistinct;
boolean isStar = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, (mode != GroupByDesc.Mode.FINAL && value.getToken().getType() == HiveParser.TOK_FUNCTIONDI), amode));
String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver2.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver2.getColumnInfos()), reduceSinkOperatorInfo2), groupByOutputRowResolver2);
op.setColumnExprMap(colExprMap);
return op;
}
Aggregations