use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SemanticAnalyzer method genGroupByPlanGroupByOperator2MR.
/**
* Generate the second GroupByOperator for the Group By Plan
* (parseInfo.getXXX(dest)). The new GroupByOperator will do the second
* aggregation based on the partial aggregation results.
*
* @param mode
* the mode of aggregation (FINAL)
* @param genericUDAFEvaluators
* The mapping from Aggregation StringTree to the
* genericUDAFEvaluator.
* @return the new GroupByOperator
* @throws SemanticException
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo2, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, boolean groupingSetsPresent) throws SemanticException {
RowResolver groupByInputRowResolver2 = opParseCtx.get(reduceSinkOperatorInfo2).getRowResolver();
RowResolver groupByOutputRowResolver2 = new RowResolver();
groupByOutputRowResolver2.setIsExprResolver(true);
ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
ArrayList<String> outputColumnNames = new ArrayList<String>();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ColumnInfo exprInfo = groupByInputRowResolver2.getExpression(grpbyExpr);
if (exprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
}
String expression = exprInfo.getInternalName();
groupByKeys.add(new ExprNodeColumnDesc(exprInfo.getType(), expression, exprInfo.getTabAlias(), exprInfo.getIsVirtualCol()));
String field = getColumnInternalName(i);
outputColumnNames.add(field);
ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
groupByOutputRowResolver2.putExpression(grpbyExpr, oColInfo);
addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo2, groupByOutputRowResolver2);
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
int groupingSetsPosition = -1;
// For grouping sets, add a dummy grouping key
if (groupingSetsPresent) {
groupingSetsPosition = groupByKeys.size();
addGroupingSetKey(groupByKeys, groupByInputRowResolver2, groupByOutputRowResolver2, outputColumnNames, colExprMap);
}
HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
boolean containsDistinctAggr = false;
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
ASTNode value = entry.getValue();
ColumnInfo paraExprInfo = groupByInputRowResolver2.getExpression(value);
if (paraExprInfo == null) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
}
String paraExpression = paraExprInfo.getInternalName();
assert (paraExpression != null);
aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol()));
String aggName = unescapeIdentifier(value.getChild(0).getText());
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
containsDistinctAggr = containsDistinctAggr || isDistinct;
boolean isStar = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, (mode != GroupByDesc.Mode.FINAL && value.getToken().getType() == HiveParser.TOK_FUNCTIONDI), amode));
String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver2.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver2.getColumnInfos()), reduceSinkOperatorInfo2), groupByOutputRowResolver2);
op.setColumnExprMap(colExprMap);
return op;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SemanticAnalyzer method analyzeInternal.
void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticException {
// 1. Generate Resolved Parse tree from syntax tree
LOG.info("Starting Semantic Analysis");
// change the location of position alias process here
processPositionAlias(ast);
PlannerContext plannerCtx = pcf.create();
if (!genResolvedParseTree(ast, plannerCtx)) {
return;
}
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_REMOVE_ORDERBY_IN_SUBQUERY)) {
for (String alias : qb.getSubqAliases()) {
removeOBInSubQuery(qb.getSubqForAlias(alias));
}
}
// Check query results cache.
// If no masking/filtering required, then we can check the cache now, before
// generating the operator tree and going through CBO.
// Otherwise we have to wait until after the masking/filtering step.
boolean isCacheEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_ENABLED);
QueryResultsCache.LookupInfo lookupInfo = null;
boolean needsTransform = needsTransform();
if (isCacheEnabled && !needsTransform && queryTypeCanUseCache()) {
lookupInfo = createLookupInfoForQuery(ast);
if (checkResultsCache(lookupInfo)) {
return;
}
}
// 2. Gen OP Tree from resolved Parse Tree
Operator sinkOp = genOPTree(ast, plannerCtx);
if (!unparseTranslator.isEnabled() && (tableMask.isEnabled() && analyzeRewrite == null)) {
// Here we rewrite the * and also the masking table
ASTNode tree = rewriteASTWithMaskAndFilter(tableMask, ast, ctx.getTokenRewriteStream(), ctx, db, tabNameToTabObject, ignoredTokens);
if (tree != ast) {
plannerCtx = pcf.create();
ctx.setSkipTableMasking(true);
init(true);
// change the location of position alias process here
processPositionAlias(tree);
genResolvedParseTree(tree, plannerCtx);
if (this instanceof CalcitePlanner) {
((CalcitePlanner) this).resetCalciteConfiguration();
}
sinkOp = genOPTree(tree, plannerCtx);
}
}
// here, after applying the masking/filtering rewrite rules to the AST.
if (isCacheEnabled && needsTransform && queryTypeCanUseCache()) {
lookupInfo = createLookupInfoForQuery(ast);
if (checkResultsCache(lookupInfo)) {
return;
}
}
// 3. Deduce Resultset Schema
if (createVwDesc != null && !this.ctx.isCboSucceeded()) {
resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver());
} else {
// succeeds.
if (resultSchema == null) {
resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
}
}
// 4. Generate Parse Context for Optimizer & Physical compiler
copyInfoToQueryProperties(queryProperties);
ParseContext pCtx = new ParseContext(queryState, opToPartPruner, opToPartList, topOps, new HashSet<JoinOperator>(joinContext.keySet()), new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, analyzeRewrite, tableDesc, createVwDesc, materializedViewUpdateDesc, queryProperties, viewProjectToTableSchema, acidFileSinks);
// Set the semijoin hints in parse context
pCtx.setSemiJoinHints(parseSemiJoinHint(getQB().getParseInfo().getHintList()));
// Set the mapjoin hint if it needs to be disabled.
pCtx.setDisableMapJoin(disableMapJoinWithHint(getQB().getParseInfo().getHintList()));
// 5. Take care of view creation
if (createVwDesc != null) {
if (ctx.getExplainAnalyze() == AnalyzeState.RUNNING) {
return;
}
if (!ctx.isCboSucceeded()) {
saveViewDefinition();
}
// validate the create view statement at this point, the createVwDesc gets
// all the information for semanticcheck
validateCreateView();
if (createVwDesc.isMaterialized()) {
createVwDesc.setTablesUsed(getTablesUsed(pCtx));
} else {
// Since we're only creating a view (not executing it), we don't need to
// optimize or translate the plan (and in fact, those procedures can
// interfere with the view creation). So skip the rest of this method.
ctx.setResDir(null);
ctx.setResFile(null);
try {
PlanUtils.addInputsForView(pCtx);
} catch (HiveException e) {
throw new SemanticException(e);
}
// Generate lineage info for create view statements
// if LineageLogger hook is configured.
// Add the transformation that computes the lineage information.
Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults().omitEmptyStrings().split(Strings.nullToEmpty(HiveConf.getVar(conf, HiveConf.ConfVars.POSTEXECHOOKS))));
if (postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
ArrayList<Transform> transformations = new ArrayList<Transform>();
transformations.add(new HiveOpConverterPostProc());
transformations.add(new Generator(postExecHooks));
for (Transform t : transformations) {
pCtx = t.transform(pCtx);
}
// we just use view name as location.
queryState.getLineageState().mapDirToOp(new Path(createVwDesc.getViewName()), sinkOp);
}
return;
}
}
// 6. Generate table access stats if required
if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS)) {
TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx);
setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess());
}
// 7. Perform Logical optimization
if (LOG.isDebugEnabled()) {
LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
}
Optimizer optm = new Optimizer();
optm.setPctx(pCtx);
optm.initialize(conf);
pCtx = optm.optimize();
if (pCtx.getColumnAccessInfo() != null) {
// set ColumnAccessInfo for view column authorization
setColumnAccessInfo(pCtx.getColumnAccessInfo());
}
if (LOG.isDebugEnabled()) {
LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
}
// 8. Generate column access stats if required - wait until column pruning
// takes place during optimization
boolean isColumnInfoNeedForAuth = SessionState.get().isAuthorizationModeV2() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED);
if (isColumnInfoNeedForAuth || HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx);
// view column access info is carried by this.getColumnAccessInfo().
setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess(this.getColumnAccessInfo()));
}
// TEZ..)
if (!ctx.getExplainLogical()) {
TaskCompiler compiler = TaskCompilerFactory.getCompiler(conf, pCtx);
compiler.init(queryState, console, db);
compiler.compile(pCtx, rootTasks, inputs, outputs);
fetchTask = pCtx.getFetchTask();
}
// find all Acid FileSinkOperatorS
QueryPlanPostProcessor qp = new QueryPlanPostProcessor(rootTasks, acidFileSinks, ctx.getExecutionId());
LOG.info("Completed plan generation");
// 10. put accessed columns to readEntity
if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
putAccessedColumnsToReadEntity(inputs, columnAccessInfo);
}
if (isCacheEnabled && lookupInfo != null) {
if (queryCanBeCached()) {
QueryResultsCache.QueryInfo queryInfo = createCacheQueryInfoForQuery(lookupInfo);
// Specify that the results of this query can be cached.
setCacheUsage(new CacheUsage(CacheUsage.CacheStatus.CAN_CACHE_QUERY_RESULTS, queryInfo));
}
}
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SemanticAnalyzer method genPlanForSubQueryPredicate.
private Operator genPlanForSubQueryPredicate(QB qbSQ, ISubQueryJoinInfo subQueryPredicate) throws SemanticException {
qbSQ.setSubQueryDef(subQueryPredicate.getSubQuery());
Phase1Ctx ctx_1 = initPhase1Ctx();
doPhase1(subQueryPredicate.getSubQueryAST(), qbSQ, ctx_1, null);
getMetaData(qbSQ);
Operator op = genPlan(qbSQ);
return op;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SemanticAnalyzer method genGroupByPlanMapGroupByOperator.
/**
* Generate the map-side GroupByOperator for the Query Block
* (qb.getParseInfo().getXXX(dest)). The new GroupByOperator will be a child
* of the inputOperatorInfo.
*
* @param mode
* The mode of the aggregation (HASH)
* @param genericUDAFEvaluators
* If not null, this function will store the mapping from Aggregation
* StringTree to the genericUDAFEvaluator in this parameter, so it
* can be used in the next-stage GroupBy aggregations.
* @return the new GroupByOperator
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanMapGroupByOperator(QB qb, String dest, List<ASTNode> grpByExprs, Operator inputOperatorInfo, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, List<Long> groupingSetKeys, boolean groupingSetsPresent) throws SemanticException {
RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver();
QBParseInfo parseInfo = qb.getParseInfo();
RowResolver groupByOutputRowResolver = new RowResolver();
groupByOutputRowResolver.setIsExprResolver(true);
ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
ArrayList<String> outputColumnNames = new ArrayList<String>();
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, groupByInputRowResolver);
if ((grpByExprNode instanceof ExprNodeColumnDesc) && ExprNodeDescUtils.indexOf(grpByExprNode, groupByKeys) >= 0) {
// Skip duplicated grouping keys, it happens when define column alias.
grpByExprs.remove(i--);
continue;
}
groupByKeys.add(grpByExprNode);
String field = getColumnInternalName(i);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(grpbyExpr, new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
// The grouping set key is present after the grouping keys, before the distinct keys
int groupingSetsPosition = -1;
// for the grouping set (corresponding to the rollup).
if (groupingSetsPresent) {
groupingSetsPosition = groupByKeys.size();
createNewGroupingKey(groupByKeys, outputColumnNames, groupByOutputRowResolver, colExprMap);
}
// If there is a distinctFuncExp, add all parameters to the reduceKeys.
if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) {
List<ASTNode> list = parseInfo.getDistinctFuncExprsForClause(dest);
for (ASTNode value : list) {
// 0 is function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode parameter = (ASTNode) value.getChild(i);
if (groupByOutputRowResolver.getExpression(parameter) == null) {
ExprNodeDesc distExprNode = genExprNodeDesc(parameter, groupByInputRowResolver);
groupByKeys.add(distExprNode);
String field = getColumnInternalName(groupByKeys.size() - 1);
outputColumnNames.add(field);
groupByOutputRowResolver.putExpression(parameter, new ColumnInfo(field, distExprNode.getTypeInfo(), "", false));
colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
}
}
}
}
// For each aggregation
HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
assert (aggregationTrees != null);
boolean containsDistinctAggr = false;
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
String aggName = unescapeIdentifier(value.getChild(0).getText());
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, groupByInputRowResolver);
aggParameters.add(paraExprNode);
}
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
containsDistinctAggr = containsDistinctAggr || isDistinct;
boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters, value, isDistinct, isAllColumns);
assert (genericUDAFEvaluator != null);
GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, amode));
String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
outputColumnNames.add(field);
if (groupByOutputRowResolver.getExpression(value) == null) {
groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
}
// GroupByOperators
if (genericUDAFEvaluators != null) {
genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
}
}
float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, groupingSetKeys, groupingSetsPresent, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver.getColumnInfos()), inputOperatorInfo), groupByOutputRowResolver);
op.setColumnExprMap(colExprMap);
return op;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class TezCompiler method runDynamicPartitionPruning.
private void runDynamicPartitionPruning(OptimizeTezProcContext procCtx, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING)) {
return;
}
// Sequence of TableScan operators to be walked
Deque<Operator<?>> deque = new LinkedList<Operator<?>>();
deque.addAll(procCtx.parseContext.getTopOps().values());
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp(new String("Dynamic Partition Pruning"), FilterOperator.getOperatorName() + "%"), new DynamicPartitionPruningOptimization());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(procCtx.parseContext.getTopOps().values());
GraphWalker ogw = new ForwardWalker(disp);
ogw.startWalking(topNodes, null);
}
Aggregations