use of org.apache.hadoop.hive.ql.lib.SemanticDispatcher in project hive by apache.
the class ColumnPruner method transform.
/**
* Transform the query tree. For each table under consideration, check if all
* columns are needed. If not, only select the operators needed at the
* beginning and proceed.
*
* @param pactx
* the current parse context
*/
@Override
public ParseContext transform(ParseContext pactx) throws SemanticException {
pGraphContext = pactx;
// generate pruned column list for all relevant operators
ColumnPrunerProcCtx cppCtx = new ColumnPrunerProcCtx(pactx);
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack. The dispatcher
// generates the plan from the operator tree
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getFilterProc());
opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getGroupByProc());
opRules.put(new RuleRegExp("R3", ReduceSinkOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getReduceSinkProc());
opRules.put(new RuleRegExp("R4", SelectOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getSelectProc());
opRules.put(new RuleRegExp("R5", CommonJoinOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getJoinProc());
opRules.put(new RuleRegExp("R6", MapJoinOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getMapJoinProc());
opRules.put(new RuleRegExp("R7", TableScanOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getTableScanProc());
opRules.put(new RuleRegExp("R8", LateralViewJoinOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getLateralViewJoinProc());
opRules.put(new RuleRegExp("R9", LateralViewForwardOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getLateralViewForwardProc());
opRules.put(new RuleRegExp("R10", PTFOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getPTFProc());
opRules.put(new RuleRegExp("R11", ScriptOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getScriptProc());
opRules.put(new RuleRegExp("R12", LimitOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getLimitProc());
opRules.put(new RuleRegExp("R13", UnionOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getUnionProc());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(ColumnPrunerProcFactory.getDefaultProc(), opRules, cppCtx);
SemanticGraphWalker ogw = new ColumnPrunerWalker(disp);
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pGraphContext.getTopOps().values());
ogw.startWalking(topNodes, null);
// set it back so that column pruner in the optimizer will not do the
// view column authorization again even if it is triggered again.
pGraphContext.setNeedViewColumnAuthorization(false);
return pGraphContext;
}
use of org.apache.hadoop.hive.ql.lib.SemanticDispatcher in project hive by apache.
the class TezCompiler method runTopNKeyOptimization.
private static void runTopNKeyOptimization(OptimizeTezProcContext procCtx) throws SemanticException {
if (!procCtx.conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_TOPNKEY)) {
return;
}
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("Top n key optimization", ReduceSinkOperator.getOperatorName() + "%"), new TopNKeyProcessor(HiveConf.getIntVar(procCtx.conf, HiveConf.ConfVars.HIVE_MAX_TOPN_ALLOWED), HiveConf.getFloatVar(procCtx.conf, ConfVars.HIVE_TOPN_EFFICIENCY_THRESHOLD), HiveConf.getIntVar(procCtx.conf, ConfVars.HIVE_TOPN_EFFICIENCY_CHECK_BATCHES), HiveConf.getIntVar(procCtx.conf, ConfVars.HIVE_TOPN_MAX_NUMBER_OF_PARTITIONS)));
opRules.put(new RuleRegExp("Top n key pushdown", TopNKeyOperator.getOperatorName() + "%"), new TopNKeyPushdownProcessor());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(procCtx.parseContext.getTopOps().values());
SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
ogw.startWalking(topNodes, null);
}
use of org.apache.hadoop.hive.ql.lib.SemanticDispatcher in project hive by apache.
the class TezCompiler method runStatsDependentOptimizations.
private void runStatsDependentOptimizations(OptimizeTezProcContext procCtx) throws SemanticException {
// Sequence of TableScan operators to be walked
Deque<Operator<?>> deque = new LinkedList<Operator<?>>();
deque.addAll(procCtx.parseContext.getTopOps().values());
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack.
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("Set parallelism - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), new SetReducerParallelism());
opRules.put(new RuleRegExp("Convert Join to Map-join", JoinOperator.getOperatorName() + "%"), new ConvertJoinMapJoin());
if (procCtx.conf.getBoolVar(ConfVars.HIVEMAPAGGRHASHMINREDUCTIONSTATSADJUST)) {
opRules.put(new RuleRegExp("Set min reduction - GBy (Hash)", GroupByOperator.getOperatorName() + "%"), new SetHashGroupByMinReduction());
}
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(procCtx.parseContext.getTopOps().values());
SemanticGraphWalker ogw = new ForwardWalker(disp);
ogw.startWalking(topNodes, null);
}
use of org.apache.hadoop.hive.ql.lib.SemanticDispatcher in project hive by apache.
the class TezCompiler method generateTaskTree.
@Override
protected void generateTaskTree(List<Task<?>> rootTasks, ParseContext pCtx, List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
GenTezUtils utils = new GenTezUtils();
GenTezWork genTezWork = new GenTezWork(utils);
GenTezProcContext procCtx = new GenTezProcContext(conf, tempParseContext, mvTask, rootTasks, inputs, outputs);
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack.
// The dispatcher generates the plan from the operator tree
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genTezWork);
opRules.put(new RuleRegExp("No more walking on ReduceSink-MapJoin", MapJoinOperator.getOperatorName() + "%"), new ReduceSinkMapJoinProc());
opRules.put(new RuleRegExp("Recognize a Sorted Merge Join operator to setup the right edge and" + " stop traversing the DummyStore-MapJoin", CommonMergeJoinOperator.getOperatorName() + "%"), new MergeJoinProc());
opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new FileSinkProcessor(), genTezWork));
opRules.put(new RuleRegExp("Split work - DummyStore", DummyStoreOperator.getOperatorName() + "%"), genTezWork);
opRules.put(new RuleRegExp("Handle Potential Analyze Command", TableScanOperator.getOperatorName() + "%"), new ProcessAnalyzeTable(utils));
opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new UnionProcessor());
opRules.put(new RuleRegExp("AppMasterEventOperator", AppMasterEventOperator.getOperatorName() + "%"), new AppMasterEventProcessor());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pCtx.getTopOps().values());
SemanticGraphWalker ogw = new GenTezWorkWalker(disp, procCtx);
ogw.startWalking(topNodes, null);
// we need to specify the reserved memory for each work that contains Map Join
for (List<BaseWork> baseWorkList : procCtx.mapJoinWorkMap.values()) {
for (BaseWork w : baseWorkList) {
// work should be the smallest unit for memory allocation
w.setReservedMemoryMB((int) (conf.getLongVar(ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD) / (1024 * 1024)));
}
}
// we need to clone some operator plans and remove union operators still
int indexForTezUnion = 0;
for (BaseWork w : procCtx.workWithUnionOperators) {
GenTezUtils.removeUnionOperators(procCtx, w, indexForTezUnion++);
}
// then we make sure the file sink operators are set up right
for (FileSinkOperator fileSink : procCtx.fileSinkSet) {
GenTezUtils.processFileSink(procCtx, fileSink);
}
// Connect any edges required for min/max pushdown
if (pCtx.getRsToRuntimeValuesInfoMap().size() > 0) {
for (ReduceSinkOperator rs : pCtx.getRsToRuntimeValuesInfoMap().keySet()) {
// Process min/max
GenTezUtils.processDynamicSemiJoinPushDownOperator(procCtx, pCtx.getRsToRuntimeValuesInfoMap().get(rs), rs);
}
}
// and finally we hook up any events that need to be sent to the tez AM
LOG.debug("There are " + procCtx.eventOperatorSet.size() + " app master events.");
for (AppMasterEventOperator event : procCtx.eventOperatorSet) {
LOG.debug("Handling AppMasterEventOperator: " + event);
GenTezUtils.processAppMasterEvent(procCtx, event);
}
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "generateTaskTree");
}
use of org.apache.hadoop.hive.ql.lib.SemanticDispatcher in project hive by apache.
the class TezCompiler method removeSemiJoinIfNoStats.
private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx) throws SemanticException {
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%"), new SemiJoinRemovalProc(true, false));
SemiJoinRemovalContext ctx = new SemiJoinRemovalContext(procCtx.parseContext);
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(procCtx.parseContext.getTopOps().values());
SemanticGraphWalker ogw = new PreOrderOnceWalker(disp);
ogw.startWalking(topNodes, null);
}
Aggregations