use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class SemanticAnalyzer method processGroupingSetReduceSinkOperator.
// Process grouping set for the reduce sink operator
// For eg: consider: select key, value, count(1) from T group by key, value with rollup.
// Assuming map-side aggregation and no skew, the plan would look like:
//
// TableScan --> Select --> GroupBy1 --> ReduceSink --> GroupBy2 --> Select --> FileSink
//
// This function is called for ReduceSink to add the additional grouping keys introduced by
// GroupBy1 into the reduce keys.
private void processGroupingSetReduceSinkOperator(RowResolver reduceSinkInputRowResolver, RowResolver reduceSinkOutputRowResolver, List<ExprNodeDesc> reduceKeys, List<String> outputKeyColumnNames, Map<String, ExprNodeDesc> colExprMap) throws SemanticException {
// add a key for reduce sink
String groupingSetColumnName = reduceSinkInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName();
ExprNodeDesc inputExpr = new ExprNodeColumnDesc(VirtualColumn.GROUPINGID.getTypeInfo(), groupingSetColumnName, null, false);
reduceKeys.add(inputExpr);
outputKeyColumnNames.add(getColumnInternalName(reduceKeys.size() - 1));
String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1);
ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get(reduceKeys.size() - 1).getTypeInfo(), null, true);
reduceSinkOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), colInfo);
colExprMap.put(colInfo.getInternalName(), inputExpr);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class SemanticAnalyzer method genSelectAllDesc.
private Operator genSelectAllDesc(Operator input) throws SemanticException {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
ArrayList<String> columnNames = new ArrayList<String>();
Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < columns.size(); i++) {
ColumnInfo col = columns.get(i);
colList.add(new ExprNodeColumnDesc(col, true));
columnNames.add(col.getInternalName());
columnExprMap.put(col.getInternalName(), new ExprNodeColumnDesc(col, true));
}
RowResolver outputRR = inputRR.duplicate();
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, columnNames, true), outputRR.getRowSchema(), input), outputRR);
output.setColumnExprMap(columnExprMap);
return output;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class SemanticAnalyzer method genGroupByPlanGroupByOperator2MR.
/**
* Generate the second GroupByOperator for the Group By Plan
* (parseInfo.getXXX(dest)). The new GroupByOperator will do the second
* aggregation based on the partial aggregation results.
*
* @param mode
* the mode of aggregation (FINAL)
* @param genericUDAFEvaluators
* The mapping from Aggregation StringTree to the
* genericUDAFEvaluator.
* @return the new GroupByOperator
* @throws SemanticException
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo2, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, boolean groupingSetsPresent) throws SemanticException {
RowResolver groupByInputRowResolver2 = opParseCtx.get(reduceSinkOperatorInfo2).getRowResolver();
RowResolver groupByOutputRowResolver2 = new RowResolver();
groupByOutputRowResolver2.setIsExprResolver(true);
ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
ArrayList<String> outputColumnNames = new ArrayList<String>();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver2.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
String expression = exprInfo.getInternalName();
groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), expression, exprInfo.getTabAlias(), exprInfo.getIsVirtualCol()));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
groupByOutputRowResolver2.putExpression(grpbyExpr, oColInfo);
addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo2, groupByOutputRowResolver2);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
int groupingSetsPosition = -1;
// For grouping sets, add a dummy grouping key
if (groupingSetsPresent) {
groupingSetsPosition = groupByKeys.size();
addGroupingSetKey(groupByKeys, groupByInputRowResolver2, groupByOutputRowResolver2, outputColumnNames, colExprMap);
}
HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
boolean containsDistinctAggr = false;
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
ASTNode value = entry.getValue();
ColumnInfo paraExprInfo = groupByInputRowResolver2.getExpression(value);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol()));
String aggName = unescapeIdentifier(value.getChild(0).getText());
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
containsDistinctAggr = containsDistinctAggr || isDistinct;
boolean isStar = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, (mode != GroupByDesc.Mode.FINAL && value.getToken().getType() == HiveParser.TOK_FUNCTIONDI), amode));
String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver2.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver2.getColumnInfos()), reduceSinkOperatorInfo2), groupByOutputRowResolver2);
op.setColumnExprMap(colExprMap);
return op;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class SemanticAnalyzer method genLateralViewPlan.
private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) throws SemanticException {
RowResolver lvForwardRR = new RowResolver();
RowResolver source = opParseCtx.get(op).getRowResolver();
Map<String, ExprNodeDesc> lvfColExprMap = new HashMap<String, ExprNodeDesc>();
Map<String, ExprNodeDesc> selColExprMap = new HashMap<String, ExprNodeDesc>();
List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
List<String> colNames = new ArrayList<String>();
for (ColumnInfo col : source.getColumnInfos()) {
String[] tabCol = source.reverseLookup(col.getInternalName());
lvForwardRR.put(tabCol[0], tabCol[1], col);
ExprNodeDesc colExpr = new ExprNodeColumnDesc(col);
colList.add(colExpr);
colNames.add(colExpr.getName());
lvfColExprMap.put(col.getInternalName(), colExpr);
selColExprMap.put(col.getInternalName(), colExpr.clone());
}
Operator lvForward = putOpInsertMap(OperatorFactory.getAndMakeChild(new LateralViewForwardDesc(), new RowSchema(lvForwardRR.getColumnInfos()), op), lvForwardRR);
lvForward.setColumnExprMap(lvfColExprMap);
// The order in which the two paths are added is important. The
// lateral view join operator depends on having the select operator
// give it the row first.
// Get the all path by making a select(*).
RowResolver allPathRR = opParseCtx.get(lvForward).getRowResolver();
// Operator allPath = op;
SelectDesc sDesc = new SelectDesc(colList, colNames, false);
sDesc.setSelStarNoCompute(true);
Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild(sDesc, new RowSchema(allPathRR.getColumnInfos()), lvForward), allPathRR);
allPath.setColumnExprMap(selColExprMap);
int allColumns = allPathRR.getColumnInfos().size();
// Get the UDTF Path
QB blankQb = new QB(null, null, false);
Operator udtfPath = genSelectPlan(null, (ASTNode) lateralViewTree.getChild(0), blankQb, lvForward, null, lateralViewTree.getType() == HiveParser.TOK_LATERAL_VIEW_OUTER);
// add udtf aliases to QB
for (String udtfAlias : blankQb.getAliases()) {
qb.addAlias(udtfAlias);
}
RowResolver udtfPathRR = opParseCtx.get(udtfPath).getRowResolver();
// Merge the two into the lateral view join
// The cols of the merged result will be the combination of both the
// cols of the UDTF path and the cols of the all path. The internal
// names have to be changed to avoid conflicts
RowResolver lateralViewRR = new RowResolver();
ArrayList<String> outputInternalColNames = new ArrayList<String>();
// For PPD, we need a column to expression map so that during the walk,
// the processor knows how to transform the internal col names.
// Following steps are dependant on the fact that we called
// LVmerge.. in the above order
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
LVmergeRowResolvers(allPathRR, lateralViewRR, colExprMap, outputInternalColNames);
LVmergeRowResolvers(udtfPathRR, lateralViewRR, colExprMap, outputInternalColNames);
Operator lateralViewJoin = putOpInsertMap(OperatorFactory.getAndMakeChild(new LateralViewJoinDesc(allColumns, outputInternalColNames), new RowSchema(lateralViewRR.getColumnInfos()), allPath, udtfPath), lateralViewRR);
lateralViewJoin.setColumnExprMap(colExprMap);
return lateralViewJoin;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class SemanticAnalyzer method genGroupByPlanGroupByOperator.
/**
* Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
* The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
*
* @param mode
* The mode of the aggregation (PARTIAL1 or COMPLETE)
* @param genericUDAFEvaluators
* If not null, this function will store the mapping from Aggregation
* StringTree to the genericUDAFEvaluator in this parameter, so it
* can be used in the next-stage GroupBy aggregations.
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo, String dest, Operator input, ReduceSinkOperator rs, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators) throws SemanticException {
RowResolver groupByInputRowResolver = opParseCtx.get(input).getRowResolver();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
ArrayList<String> outputColumnNames = new ArrayList<String>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), exprInfo.getInternalName(), "", false));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), null, false);
groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo);
addAlternateGByKeyMappings(grpbyExpr, oColInfo, input, groupByOutputRowResolver);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// For each aggregation
HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
assert (aggregationTrees != null);
// get the last colName for the reduce KEY
// it represents the column name corresponding to distinct aggr, if any
String lastKeyColName = null;
List<String> inputKeyCols = rs.getConf().getOutputKeyColumnNames();
if (inputKeyCols.size() > 0) {
lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
}
List<ExprNodeDesc> reduceValues = rs.getConf().getValueCols();
int numDistinctUDFs = 0;
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
// This is the GenericUDAF name
String aggName = unescapeIdentifier(value.getChild(0).getText());
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
// Convert children to aggParameters
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(paraExpr);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
if (isDistinct && lastKeyColName != null) {
// if aggr is distinct, the parameter is name is constructed as
// KEY.lastKeyColName:<tag>._colx
paraExpression = Utilities.ReduceField.KEY.name() + "." + lastKeyColName + ":" + numDistinctUDFs + "." + getColumnInternalName(i - 1);
}
ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol());
ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(paraExprInfo.getInternalName(), reduceValues);
if (reduceValue != null) {
// this parameter is a constant
expr = reduceValue;
}
aggParameters.add(expr);
}
if (isDistinct) {
numDistinctUDFs++;
}
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value, isDistinct, isAllColumns);
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, amode));
String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
// GroupByOperators
if (genericUDAFEvaluators != null) {
genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
}
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, -1, numDistinctUDFs > 0), new RowSchema(groupByOutputRowResolver.getColumnInfos()), input), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
Aggregations