use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SemanticAnalyzer method genPlanForSubQueryPredicate.
private Operator genPlanForSubQueryPredicate(QB qbSQ, ISubQueryJoinInfo subQueryPredicate) throws SemanticException {
qbSQ.setSubQueryDef(subQueryPredicate.getSubQuery());
Phase1Ctx ctx_1 = initPhase1Ctx();
doPhase1(subQueryPredicate.getSubQueryAST(), qbSQ, ctx_1, null);
getMetaData(qbSQ);
Operator op = genPlan(qbSQ);
return op;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SemanticAnalyzer method genFilterPlan.
@SuppressWarnings("nls")
private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, Map<String, Operator> aliasToOpInfo, boolean forHavingClause, boolean forGroupByClause) throws SemanticException {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
/*
* Handling of SubQuery Expressions:
* if "Where clause contains no SubQuery expressions" then
* -->[true] ===CONTINUE_FILTER_PROCESSING===
* else
* -->[false] "extract SubQuery expressions\n from Where clause"
* if "this is a nested SubQuery or \nthere are more than 1 SubQuery expressions" then
* -->[yes] "throw Unsupported Error"
* else
* --> "Rewrite Search condition to \nremove SubQuery predicate"
* --> "build QBSubQuery"
* --> "extract correlated predicates \nfrom Where Clause"
* --> "add correlated Items to \nSelect List and Group By"
* --> "construct Join Predicate \nfrom correlation predicates"
* --> "Generate Plan for\n modified SubQuery"
* --> "Build the Join Condition\n for Parent Query to SubQuery join"
* --> "Build the QBJoinTree from the Join condition"
* --> "Update Parent Query Filter\n with any Post Join conditions"
* --> ===CONTINUE_FILTER_PROCESSING===
* endif
* endif
*
* Support for Sub Queries in Having Clause:
* - By and large this works the same way as SubQueries in the Where Clause.
* - The one addum is the handling of aggregation expressions from the Outer Query
* appearing in correlation clauses.
* - So such correlating predicates are allowed:
* min(OuterQuert.x) = SubQuery.y
* - this requires special handling when converting to joins. See QBSubQuery.rewrite
* method method for detailed comments.
*/
List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond);
if (subQueriesInOriginalTree.size() > 0) {
/*
* Restriction.9.m :: disallow nested SubQuery expressions.
*/
if (qb.getSubQueryPredicateDef() != null) {
throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported."));
}
/*
* Restriction.8.m :: We allow only 1 SubQuery expression per Query.
*/
if (subQueriesInOriginalTree.size() > 1) {
throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported."));
}
/*
* Clone the Search AST; apply all rewrites on the clone.
*/
ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond);
List<ASTNode> subQueries = SubQueryUtils.findSubQueries(clonedSearchCond);
for (int i = 0; i < subQueries.size(); i++) {
ASTNode subQueryAST = subQueries.get(i);
ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i);
int sqIdx = qb.incrNumSubQueryPredicates();
clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST);
QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, originalSubQueryAST, ctx);
if (!forHavingClause) {
qb.setWhereClauseSubQueryPredicate(subQuery);
} else {
qb.setHavingClauseSubQueryPredicate(subQuery);
}
String havingInputAlias = null;
if (forHavingClause) {
havingInputAlias = "gby_sq" + sqIdx;
aliasToOpInfo.put(havingInputAlias, input);
}
subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias, aliasToOpInfo.keySet());
QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true);
Operator sqPlanTopOp = genPlanForSubQueryPredicate(qbSQ, subQuery);
aliasToOpInfo.put(subQuery.getAlias(), sqPlanTopOp);
RowResolver sqRR = opParseCtx.get(sqPlanTopOp).getRowResolver();
/*
* Check.5.h :: For In and Not In the SubQuery must implicitly or
* explicitly only contain one select item.
*/
if (subQuery.getOperator().getType() != SubQueryType.EXISTS && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS && sqRR.getColumnInfos().size() - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) {
throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST, "SubQuery can contain only 1 item in Select List."));
}
/*
* If this is a Not In SubQuery Predicate then Join in the Null Check SubQuery.
* See QBSubQuery.NotInCheck for details on why and how this is constructed.
*/
if (subQuery.getNotInCheck() != null) {
QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck();
notInCheck.setSQRR(sqRR);
QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true);
Operator sqnicPlanTopOp = genPlanForSubQueryPredicate(qbSQ_nic, notInCheck);
aliasToOpInfo.put(notInCheck.getAlias(), sqnicPlanTopOp);
QBJoinTree joinTree_nic = genSQJoinTree(qb, notInCheck, input, aliasToOpInfo);
pushJoinFilters(qb, joinTree_nic, aliasToOpInfo, false);
input = genJoinOperator(qbSQ_nic, joinTree_nic, aliasToOpInfo, input);
inputRR = opParseCtx.get(input).getRowResolver();
if (forHavingClause) {
aliasToOpInfo.put(havingInputAlias, input);
}
}
/*
* Gen Join between outer Operator and SQ op
*/
subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias);
QBJoinTree joinTree = genSQJoinTree(qb, subQuery, input, aliasToOpInfo);
/*
* push filters only for this QBJoinTree. Child QBJoinTrees have already been handled.
*/
pushJoinFilters(qb, joinTree, aliasToOpInfo, false);
input = genJoinOperator(qbSQ, joinTree, aliasToOpInfo, input);
searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond);
}
}
return genFilterPlan(qb, searchCond, input, forHavingClause || forGroupByClause);
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SemanticAnalyzer method genGroupByPlanGroupByOperator.
/**
* Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
* The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
*
* @param mode
* The mode of the aggregation (PARTIAL1 or COMPLETE)
* @param genericUDAFEvaluators
* If not null, this function will store the mapping from Aggregation
* StringTree to the genericUDAFEvaluator in this parameter, so it
* can be used in the next-stage GroupBy aggregations.
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo, String dest, Operator input, ReduceSinkOperator rs, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators) throws SemanticException {
RowResolver groupByInputRowResolver = opParseCtx.get(input).getRowResolver();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
ArrayList<String> outputColumnNames = new ArrayList<String>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), exprInfo.getInternalName(), "", false));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), null, false);
groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo);
addAlternateGByKeyMappings(grpbyExpr, oColInfo, input, groupByOutputRowResolver);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// For each aggregation
HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
assert (aggregationTrees != null);
// get the last colName for the reduce KEY
// it represents the column name corresponding to distinct aggr, if any
String lastKeyColName = null;
List<String> inputKeyCols = rs.getConf().getOutputKeyColumnNames();
if (inputKeyCols.size() > 0) {
lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
}
List<ExprNodeDesc> reduceValues = rs.getConf().getValueCols();
int numDistinctUDFs = 0;
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
// This is the GenericUDAF name
String aggName = unescapeIdentifier(value.getChild(0).getText());
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
// Convert children to aggParameters
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(paraExpr);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
if (isDistinct && lastKeyColName != null) {
// if aggr is distinct, the parameter is name is constructed as
// KEY.lastKeyColName:<tag>._colx
paraExpression = Utilities.ReduceField.KEY.name() + "." + lastKeyColName + ":" + numDistinctUDFs + "." + getColumnInternalName(i - 1);
}
ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol());
ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(paraExprInfo.getInternalName(), reduceValues);
if (reduceValue != null) {
// this parameter is a constant
expr = reduceValue;
}
aggParameters.add(expr);
}
if (isDistinct) {
numDistinctUDFs++;
}
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value, isDistinct, isAllColumns);
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, amode));
String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
// GroupByOperators
if (genericUDAFEvaluators != null) {
genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
}
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, -1, numDistinctUDFs > 0), new RowSchema(groupByOutputRowResolver.getColumnInfos()), input), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SemanticAnalyzer method genNotNullFilterForJoinSourcePlan.
/*
* for inner joins push a 'is not null predicate' to the join sources for
* every non nullSafe predicate.
*/
private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, QBJoinTree joinTree, ExprNodeDesc[] joinKeys) throws SemanticException {
if (qb == null || joinTree == null) {
return input;
}
if (!joinTree.getNoOuterJoin()) {
return input;
}
if (joinKeys == null || joinKeys.length == 0) {
return input;
}
Map<Integer, ExprNodeDesc> hashes = new HashMap<Integer, ExprNodeDesc>();
if (input instanceof FilterOperator) {
ExprNodeDescUtils.getExprNodeColumnDesc(Arrays.asList(((FilterDesc) input.getConf()).getPredicate()), hashes);
}
ExprNodeDesc filterPred = null;
List<Boolean> nullSafes = joinTree.getNullSafes();
for (int i = 0; i < joinKeys.length; i++) {
if (nullSafes.get(i) || (joinKeys[i] instanceof ExprNodeColumnDesc && ((ExprNodeColumnDesc) joinKeys[i]).getIsPartitionColOrVirtualCol())) {
// virtual column, since those columns can never be null.
continue;
}
if (null != hashes.get(joinKeys[i].hashCode())) {
// there is already a predicate on this src.
continue;
}
List<ExprNodeDesc> args = new ArrayList<ExprNodeDesc>();
args.add(joinKeys[i]);
ExprNodeDesc nextExpr = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("isnotnull").getGenericUDF(), args);
filterPred = filterPred == null ? nextExpr : ExprNodeDescUtils.mergePredicates(filterPred, nextExpr);
}
if (filterPred == null) {
return input;
}
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
if (input instanceof FilterOperator) {
FilterOperator f = (FilterOperator) input;
List<ExprNodeDesc> preds = new ArrayList<ExprNodeDesc>();
preds.add(f.getConf().getPredicate());
preds.add(filterPred);
f.getConf().setPredicate(ExprNodeDescUtils.mergePredicates(preds));
return input;
}
FilterDesc filterDesc = new FilterDesc(filterPred, false);
filterDesc.setGenerated(true);
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(inputRR.getColumnInfos()), input), inputRR);
if (LOG.isDebugEnabled()) {
LOG.debug("Created Filter Plan for " + qb.getId() + " row schema: " + inputRR.toString());
}
return output;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SemanticAnalyzer method genSelectAllDesc.
private Operator genSelectAllDesc(Operator input) throws SemanticException {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
ArrayList<String> columnNames = new ArrayList<String>();
Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < columns.size(); i++) {
ColumnInfo col = columns.get(i);
colList.add(new ExprNodeColumnDesc(col, true));
columnNames.add(col.getInternalName());
columnExprMap.put(col.getInternalName(), new ExprNodeColumnDesc(col, true));
}
RowResolver outputRR = inputRR.duplicate();
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, columnNames, true), outputRR.getRowSchema(), input), outputRR);
output.setColumnExprMap(columnExprMap);
return output;
}
Aggregations