use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class TypeCheckProcFactory method processGByExpr.
/**
* Function to do groupby subexpression elimination. This is called by all the
* processors initially. As an example, consider the query select a+b,
* count(1) from T group by a+b; Then a+b is already precomputed in the group
* by operators key, so we substitute a+b in the select list with the internal
* column name of the a+b expression that appears in the in input row
* resolver.
*
* @param nd
* The node that is being inspected.
* @param procCtx
* The processor context.
*
* @return exprNodeColumnDesc.
*/
public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) throws SemanticException {
// We recursively create the exprNodeDesc. Base cases: when we encounter
// a column ref, we convert that into an exprNodeColumnDesc; when we
// encounter
// a constant, we convert that into an exprNodeConstantDesc. For others we
// just
// build the exprNodeFuncDesc with recursively built children.
ASTNode expr = (ASTNode) nd;
TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
// having key in (select .. where a = min(b.value)
if (!ctx.isUseCaching() && ctx.getOuterRR() == null) {
return null;
}
RowResolver input = ctx.getInputRR();
ExprNodeDesc desc = null;
if ((ctx == null) || (input == null) || (!ctx.getAllowGBExprElimination())) {
return null;
}
// If the current subExpression is pre-calculated, as in Group-By etc.
ColumnInfo colInfo = input.getExpression(expr);
// try outer row resolver
RowResolver outerRR = ctx.getOuterRR();
if (colInfo == null && outerRR != null) {
colInfo = outerRR.getExpression(expr);
}
if (colInfo != null) {
desc = new ExprNodeColumnDesc(colInfo);
ASTNode source = input.getExpressionSource(expr);
if (source != null) {
ctx.getUnparseTranslator().addCopyTranslation(expr, source);
}
return desc;
}
return desc;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class SemanticAnalyzer method genGroupByPlanGroupByOperator2MR.
/**
* Generate the second GroupByOperator for the Group By Plan
* (parseInfo.getXXX(dest)). The new GroupByOperator will do the second
* aggregation based on the partial aggregation results.
*
* @param mode
* the mode of aggregation (FINAL)
* @param genericUDAFEvaluators
* The mapping from Aggregation StringTree to the
* genericUDAFEvaluator.
* @return the new GroupByOperator
* @throws SemanticException
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo2, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, boolean groupingSetsPresent) throws SemanticException {
RowResolver groupByInputRowResolver2 = opParseCtx.get(reduceSinkOperatorInfo2).getRowResolver();
RowResolver groupByOutputRowResolver2 = new RowResolver();
groupByOutputRowResolver2.setIsExprResolver(true);
ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
ArrayList<String> outputColumnNames = new ArrayList<String>();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver2.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
String expression = exprInfo.getInternalName();
groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), expression, exprInfo.getTabAlias(), exprInfo.getIsVirtualCol()));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
groupByOutputRowResolver2.putExpression(grpbyExpr, oColInfo);
addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo2, groupByOutputRowResolver2);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
int groupingSetsPosition = -1;
// For grouping sets, add a dummy grouping key
if (groupingSetsPresent) {
groupingSetsPosition = groupByKeys.size();
addGroupingSetKey(groupByKeys, groupByInputRowResolver2, groupByOutputRowResolver2, outputColumnNames, colExprMap);
}
HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
boolean containsDistinctAggr = false;
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
ASTNode value = entry.getValue();
ColumnInfo paraExprInfo = groupByInputRowResolver2.getExpression(value);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol()));
String aggName = unescapeIdentifier(value.getChild(0).getText());
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
containsDistinctAggr = containsDistinctAggr || isDistinct;
boolean isStar = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, (mode != GroupByDesc.Mode.FINAL && value.getToken().getType() == HiveParser.TOK_FUNCTIONDI), amode));
String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver2.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver2.getColumnInfos()), reduceSinkOperatorInfo2), groupByOutputRowResolver2);
op.setColumnExprMap(colExprMap);
return op;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class ExprNodeDescUtils method genExprNodeDesc.
/**
* Build ExprNodeColumnDesc for the projections in the input operator from
* sartpos to endpos(both included). Operator must have an associated
* colExprMap.
*
* @param inputOp
* Input Hive Operator
* @param startPos
* starting position in the input operator schema; must be >=0 and <=
* endPos
* @param endPos
* end position in the input operator schema; must be >=0.
* @return List of ExprNodeDesc
*/
public static ArrayList<ExprNodeDesc> genExprNodeDesc(Operator inputOp, int startPos, int endPos, boolean addEmptyTabAlias, boolean setColToNonVirtual) {
ArrayList<ExprNodeDesc> exprColLst = new ArrayList<ExprNodeDesc>();
List<ColumnInfo> colInfoLst = inputOp.getSchema().getSignature();
String tabAlias;
boolean vc;
ColumnInfo ci;
for (int i = startPos; i <= endPos; i++) {
ci = colInfoLst.get(i);
tabAlias = ci.getTabAlias();
if (addEmptyTabAlias) {
tabAlias = "";
}
vc = ci.getIsVirtualCol();
if (setColToNonVirtual) {
vc = false;
}
exprColLst.add(new ExprNodeColumnDesc(ci.getType(), ci.getInternalName(), tabAlias, vc));
}
return exprColLst;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class ColumnPrunerProcCtx method getColsFromSelectExpr.
/**
* Creates the list of internal column names (represented by field nodes)
* from select expressions in a select operator. This function is used for the
* select operator instead of the genColLists function (which is used by
* the rest of the operators).
*
* @param op The select operator.
* @return a list of field nodes representing the internal column names.
*/
public List<FieldNode> getColsFromSelectExpr(SelectOperator op) {
List<FieldNode> cols = new ArrayList<>();
SelectDesc conf = op.getConf();
if (conf.isSelStarNoCompute()) {
for (ColumnInfo colInfo : op.getSchema().getSignature()) {
cols.add(new FieldNode(colInfo.getInternalName()));
}
} else {
List<ExprNodeDesc> exprList = conf.getColList();
for (ExprNodeDesc expr : exprList) {
cols = mergeFieldNodesWithDesc(cols, expr);
}
}
return cols;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class ColumnPrunerProcCtx method handleFilterUnionChildren.
/**
* If the input filter operator has direct child(ren) which are union operator,
* and the filter's column is not the same as union's
* create select operator between them. The select operator has same number of columns as
* pruned child operator.
*
* @param curOp
* The filter operator which need to handle children.
* @throws SemanticException
*/
public void handleFilterUnionChildren(Operator<? extends OperatorDesc> curOp) throws SemanticException {
if (curOp.getChildOperators() == null || !(curOp instanceof FilterOperator)) {
return;
}
List<FieldNode> parentPrunList = prunedColLists.get(curOp);
if (parentPrunList == null || parentPrunList.size() == 0) {
return;
}
List<FieldNode> prunList = null;
for (Operator<? extends OperatorDesc> child : curOp.getChildOperators()) {
if (child instanceof UnionOperator) {
prunList = genColLists(child);
if (prunList == null || prunList.size() == 0 || parentPrunList.size() == prunList.size()) {
continue;
}
ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
ArrayList<String> outputColNames = new ArrayList<String>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
for (ColumnInfo colInfo : child.getSchema().getSignature()) {
if (lookupColumn(prunList, colInfo.getInternalName()) == null) {
continue;
}
ExprNodeDesc colDesc = new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
exprs.add(colDesc);
outputColNames.add(colInfo.getInternalName());
ColumnInfo newCol = new ColumnInfo(colInfo.getInternalName(), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
newCol.setAlias(colInfo.getAlias());
outputRS.add(newCol);
colExprMap.put(colInfo.getInternalName(), colDesc);
}
SelectDesc select = new SelectDesc(exprs, outputColNames, false);
curOp.removeChild(child);
SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), curOp);
OperatorFactory.makeChild(sel, child);
sel.setColumnExprMap(colExprMap);
}
}
}
Aggregations