Search in sources :

Example 46 with SemanticNodeProcessor

use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.

the class ColumnPruner method transform.

/**
 * Transform the query tree. For each table under consideration, check if all
 * columns are needed. If not, only select the operators needed at the
 * beginning and proceed.
 *
 * @param pactx
 *          the current parse context
 */
@Override
public ParseContext transform(ParseContext pactx) throws SemanticException {
    pGraphContext = pactx;
    // generate pruned column list for all relevant operators
    ColumnPrunerProcCtx cppCtx = new ColumnPrunerProcCtx(pactx);
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack. The dispatcher
    // generates the plan from the operator tree
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getFilterProc());
    opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getGroupByProc());
    opRules.put(new RuleRegExp("R3", ReduceSinkOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getReduceSinkProc());
    opRules.put(new RuleRegExp("R4", SelectOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getSelectProc());
    opRules.put(new RuleRegExp("R5", CommonJoinOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getJoinProc());
    opRules.put(new RuleRegExp("R6", MapJoinOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getMapJoinProc());
    opRules.put(new RuleRegExp("R7", TableScanOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getTableScanProc());
    opRules.put(new RuleRegExp("R8", LateralViewJoinOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getLateralViewJoinProc());
    opRules.put(new RuleRegExp("R9", LateralViewForwardOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getLateralViewForwardProc());
    opRules.put(new RuleRegExp("R10", PTFOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getPTFProc());
    opRules.put(new RuleRegExp("R11", ScriptOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getScriptProc());
    opRules.put(new RuleRegExp("R12", LimitOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getLimitProc());
    opRules.put(new RuleRegExp("R13", UnionOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getUnionProc());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(ColumnPrunerProcFactory.getDefaultProc(), opRules, cppCtx);
    SemanticGraphWalker ogw = new ColumnPrunerWalker(disp);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pGraphContext.getTopOps().values());
    ogw.startWalking(topNodes, null);
    // set it back so that column pruner in the optimizer will not do the
    // view column authorization again even if it is triggered again.
    pGraphContext.setNeedViewColumnAuthorization(false);
    return pGraphContext;
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)

Example 47 with SemanticNodeProcessor

use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.

the class IdentityProjectRemover method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    // 0. We check the conditions to apply this transformation,
    // if we do not meet them we bail out
    final boolean cboEnabled = HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVE_CBO_ENABLED);
    final boolean returnPathEnabled = HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP);
    final boolean cboSucceeded = pctx.getContext().isCboSucceeded();
    if (cboEnabled && returnPathEnabled && cboSucceeded) {
        return pctx;
    }
    // 1. We apply the transformation
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("R1", "(" + SelectOperator.getOperatorName() + "%)"), new ProjectRemover());
    SemanticGraphWalker ogw = new DefaultGraphWalker(new DefaultRuleDispatcher(null, opRules, null));
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return pctx;
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)

Example 48 with SemanticNodeProcessor

use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.

the class TezCompiler method runTopNKeyOptimization.

private static void runTopNKeyOptimization(OptimizeTezProcContext procCtx) throws SemanticException {
    if (!procCtx.conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_TOPNKEY)) {
        return;
    }
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("Top n key optimization", ReduceSinkOperator.getOperatorName() + "%"), new TopNKeyProcessor(HiveConf.getIntVar(procCtx.conf, HiveConf.ConfVars.HIVE_MAX_TOPN_ALLOWED), HiveConf.getFloatVar(procCtx.conf, ConfVars.HIVE_TOPN_EFFICIENCY_THRESHOLD), HiveConf.getIntVar(procCtx.conf, ConfVars.HIVE_TOPN_EFFICIENCY_CHECK_BATCHES), HiveConf.getIntVar(procCtx.conf, ConfVars.HIVE_TOPN_MAX_NUMBER_OF_PARTITIONS)));
    opRules.put(new RuleRegExp("Top n key pushdown", TopNKeyOperator.getOperatorName() + "%"), new TopNKeyPushdownProcessor());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    ogw.startWalking(topNodes, null);
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) TopNKeyProcessor(org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyProcessor) LinkedHashMap(java.util.LinkedHashMap) TopNKeyPushdownProcessor(org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)

Example 49 with SemanticNodeProcessor

use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.

the class TezCompiler method runStatsDependentOptimizations.

private void runStatsDependentOptimizations(OptimizeTezProcContext procCtx) throws SemanticException {
    // Sequence of TableScan operators to be walked
    Deque<Operator<?>> deque = new LinkedList<Operator<?>>();
    deque.addAll(procCtx.parseContext.getTopOps().values());
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("Set parallelism - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), new SetReducerParallelism());
    opRules.put(new RuleRegExp("Convert Join to Map-join", JoinOperator.getOperatorName() + "%"), new ConvertJoinMapJoin());
    if (procCtx.conf.getBoolVar(ConfVars.HIVEMAPAGGRHASHMINREDUCTIONSTATSADJUST)) {
        opRules.put(new RuleRegExp("Set min reduction - GBy (Hash)", GroupByOperator.getOperatorName() + "%"), new SetHashGroupByMinReduction());
    }
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    SemanticGraphWalker ogw = new ForwardWalker(disp);
    ogw.startWalking(topNodes, null);
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) TopNKeyOperator(org.apache.hadoop.hive.ql.exec.TopNKeyOperator) TerminalOperator(org.apache.hadoop.hive.ql.exec.TerminalOperator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) ForwardWalker(org.apache.hadoop.hive.ql.lib.ForwardWalker) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) SetReducerParallelism(org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism) LinkedList(java.util.LinkedList) LinkedHashMap(java.util.LinkedHashMap) ConvertJoinMapJoin(org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) SetHashGroupByMinReduction(org.apache.hadoop.hive.ql.optimizer.SetHashGroupByMinReduction)

Example 50 with SemanticNodeProcessor

use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.

the class TezCompiler method generateTaskTree.

@Override
protected void generateTaskTree(List<Task<?>> rootTasks, ParseContext pCtx, List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
    GenTezUtils utils = new GenTezUtils();
    GenTezWork genTezWork = new GenTezWork(utils);
    GenTezProcContext procCtx = new GenTezProcContext(conf, tempParseContext, mvTask, rootTasks, inputs, outputs);
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    // The dispatcher generates the plan from the operator tree
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genTezWork);
    opRules.put(new RuleRegExp("No more walking on ReduceSink-MapJoin", MapJoinOperator.getOperatorName() + "%"), new ReduceSinkMapJoinProc());
    opRules.put(new RuleRegExp("Recognize a Sorted Merge Join operator to setup the right edge and" + " stop traversing the DummyStore-MapJoin", CommonMergeJoinOperator.getOperatorName() + "%"), new MergeJoinProc());
    opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new FileSinkProcessor(), genTezWork));
    opRules.put(new RuleRegExp("Split work - DummyStore", DummyStoreOperator.getOperatorName() + "%"), genTezWork);
    opRules.put(new RuleRegExp("Handle Potential Analyze Command", TableScanOperator.getOperatorName() + "%"), new ProcessAnalyzeTable(utils));
    opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new UnionProcessor());
    opRules.put(new RuleRegExp("AppMasterEventOperator", AppMasterEventOperator.getOperatorName() + "%"), new AppMasterEventProcessor());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pCtx.getTopOps().values());
    SemanticGraphWalker ogw = new GenTezWorkWalker(disp, procCtx);
    ogw.startWalking(topNodes, null);
    // we need to specify the reserved memory for each work that contains Map Join
    for (List<BaseWork> baseWorkList : procCtx.mapJoinWorkMap.values()) {
        for (BaseWork w : baseWorkList) {
            // work should be the smallest unit for memory allocation
            w.setReservedMemoryMB((int) (conf.getLongVar(ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD) / (1024 * 1024)));
        }
    }
    // we need to clone some operator plans and remove union operators still
    int indexForTezUnion = 0;
    for (BaseWork w : procCtx.workWithUnionOperators) {
        GenTezUtils.removeUnionOperators(procCtx, w, indexForTezUnion++);
    }
    // then we make sure the file sink operators are set up right
    for (FileSinkOperator fileSink : procCtx.fileSinkSet) {
        GenTezUtils.processFileSink(procCtx, fileSink);
    }
    // Connect any edges required for min/max pushdown
    if (pCtx.getRsToRuntimeValuesInfoMap().size() > 0) {
        for (ReduceSinkOperator rs : pCtx.getRsToRuntimeValuesInfoMap().keySet()) {
            // Process min/max
            GenTezUtils.processDynamicSemiJoinPushDownOperator(procCtx, pCtx.getRsToRuntimeValuesInfoMap().get(rs), rs);
        }
    }
    // and finally we hook up any events that need to be sent to the tez AM
    LOG.debug("There are " + procCtx.eventOperatorSet.size() + " app master events.");
    for (AppMasterEventOperator event : procCtx.eventOperatorSet) {
        LOG.debug("Handling AppMasterEventOperator: " + event);
        GenTezUtils.processAppMasterEvent(procCtx, event);
    }
    perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "generateTaskTree");
}
Also used : Node(org.apache.hadoop.hive.ql.lib.Node) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) ReduceSinkMapJoinProc(org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc) CompositeProcessor(org.apache.hadoop.hive.ql.lib.CompositeProcessor) MergeJoinProc(org.apache.hadoop.hive.ql.optimizer.MergeJoinProc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)

Aggregations

SemanticNodeProcessor (org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)68 LinkedHashMap (java.util.LinkedHashMap)66 Node (org.apache.hadoop.hive.ql.lib.Node)66 SemanticGraphWalker (org.apache.hadoop.hive.ql.lib.SemanticGraphWalker)66 SemanticRule (org.apache.hadoop.hive.ql.lib.SemanticRule)66 SemanticDispatcher (org.apache.hadoop.hive.ql.lib.SemanticDispatcher)65 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)63 ArrayList (java.util.ArrayList)62 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)57 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)36 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)11 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)11 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)9 Operator (org.apache.hadoop.hive.ql.exec.Operator)9 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)8 ForwardWalker (org.apache.hadoop.hive.ql.lib.ForwardWalker)8 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)7 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)7 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)6 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)6