Search in sources :

Example 1 with Dispatcher

use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.

the class MapReduceCompiler method generateTaskTree.

@Override
protected void generateTaskTree(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
    // generate map reduce plans
    ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
    GenMRProcContext procCtx = new GenMRProcContext(conf, // Must be deterministic order map for consistent q-test output across Java versions
    new LinkedHashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>>(), tempParseContext, mvTask, rootTasks, new LinkedHashMap<Operator<? extends OperatorDesc>, GenMapRedCtx>(), inputs, outputs);
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    // The dispatcher generates the plan from the operator tree
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp(new String("R1"), TableScanOperator.getOperatorName() + "%"), new GenMRTableScan1());
    opRules.put(new RuleRegExp(new String("R2"), TableScanOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), new GenMRRedSink1());
    opRules.put(new RuleRegExp(new String("R3"), ReduceSinkOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), new GenMRRedSink2());
    opRules.put(new RuleRegExp(new String("R4"), FileSinkOperator.getOperatorName() + "%"), new GenMRFileSink1());
    opRules.put(new RuleRegExp(new String("R5"), UnionOperator.getOperatorName() + "%"), new GenMRUnion1());
    opRules.put(new RuleRegExp(new String("R6"), UnionOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), new GenMRRedSink3());
    opRules.put(new RuleRegExp(new String("R7"), MapJoinOperator.getOperatorName() + "%"), MapJoinFactory.getTableScanMapJoin());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(new GenMROperator(), opRules, procCtx);
    GraphWalker ogw = new GenMapRedWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pCtx.getTopOps().values());
    ogw.startWalking(topNodes, null);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) GenMROperator(org.apache.hadoop.hive.ql.optimizer.GenMROperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) Task(org.apache.hadoop.hive.ql.exec.Task) Serializable(java.io.Serializable) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) GenMRTableScan1(org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1) GenMapRedCtx(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx) GenMRProcContext(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) GenMROperator(org.apache.hadoop.hive.ql.optimizer.GenMROperator) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) GenMRUnion1(org.apache.hadoop.hive.ql.optimizer.GenMRUnion1) GenMRFileSink1(org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) GenMRRedSink2(org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2) GenMRRedSink1(org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1) GenMRRedSink3(org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3) Rule(org.apache.hadoop.hive.ql.lib.Rule) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 2 with Dispatcher

use of org.apache.hadoop.hive.ql.lib.Dispatcher in project SQLWindowing by hbutani.

the class WindowingTypeCheckProcFactory method genExprNode.

public static HashMap<Node, Object> genExprNode(ASTNode expr, TypeCheckCtx tcCtx) throws SemanticException {
    // Create the walker, the rules dispatcher and the context.
    // create a walker which walks the tree in a DFS manner while
    // maintaining
    // the operator stack. The dispatcher
    // generates the plan from the operator tree
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("R1", Windowing2Parser.NULL + "%"), getNullExprProcessor());
    opRules.put(new RuleRegExp("R2", Windowing2Parser.Number + "%|" + Windowing2Parser.TinyintLiteral + "%|" + Windowing2Parser.SmallintLiteral + "%|" + Windowing2Parser.BigintLiteral + "%"), getNumExprProcessor());
    opRules.put(new RuleRegExp("R3", Windowing2Parser.Identifier + "%|" + Windowing2Parser.StringLiteral + "%|" + Windowing2Parser.CHARSETLITERAL + "%|" + Windowing2Parser.STRINGLITERALSEQUENCE + "%|" + "%|" + Windowing2Parser.IF + "%|" + Windowing2Parser.CASE + "%|" + Windowing2Parser.WHEN + "%|" + Windowing2Parser.IN + "%|" + Windowing2Parser.ARRAY + "%|" + Windowing2Parser.MAP + "%|" + Windowing2Parser.BETWEEN + "%|" + Windowing2Parser.STRUCT + "%"), getStrExprProcessor());
    opRules.put(new RuleRegExp("R4", Windowing2Parser.TRUE + "%|" + Windowing2Parser.FALSE + "%"), getBoolExprProcessor());
    opRules.put(new RuleRegExp("R5", Windowing2Parser.TABLEORCOL + "%"), getColumnExprProcessor());
    // The dispatcher fires the processor corresponding to the closest
    // matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), opRules, tcCtx);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(expr);
    HashMap<Node, Object> nodeOutputs = new HashMap<Node, Object>();
    ogw.startWalking(topNodes, nodeOutputs);
    return nodeOutputs;
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) Rule(org.apache.hadoop.hive.ql.lib.Rule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)

Example 3 with Dispatcher

use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.

the class TezCompiler method runDynamicPartitionPruning.

private void runDynamicPartitionPruning(OptimizeTezProcContext procCtx, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
    if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING)) {
        return;
    }
    // Sequence of TableScan operators to be walked
    Deque<Operator<?>> deque = new LinkedList<Operator<?>>();
    deque.addAll(procCtx.parseContext.getTopOps().values());
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp(new String("Dynamic Partition Pruning"), FilterOperator.getOperatorName() + "%"), new DynamicPartitionPruningOptimization());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    GraphWalker ogw = new ForwardWalker(disp);
    ogw.startWalking(topNodes, null);
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) ForwardWalker(org.apache.hadoop.hive.ql.lib.ForwardWalker) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedList(java.util.LinkedList) LinkedHashMap(java.util.LinkedHashMap) DynamicPartitionPruningOptimization(org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization) Rule(org.apache.hadoop.hive.ql.lib.Rule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Example 4 with Dispatcher

use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.

the class TezCompiler method generateTaskTree.

@Override
protected void generateTaskTree(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
    GenTezUtils utils = new GenTezUtils();
    GenTezWork genTezWork = new GenTezWork(utils);
    GenTezProcContext procCtx = new GenTezProcContext(conf, tempParseContext, mvTask, rootTasks, inputs, outputs);
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    // The dispatcher generates the plan from the operator tree
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genTezWork);
    opRules.put(new RuleRegExp("No more walking on ReduceSink-MapJoin", MapJoinOperator.getOperatorName() + "%"), new ReduceSinkMapJoinProc());
    opRules.put(new RuleRegExp("Recognize a Sorted Merge Join operator to setup the right edge and" + " stop traversing the DummyStore-MapJoin", CommonMergeJoinOperator.getOperatorName() + "%"), new MergeJoinProc());
    opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new FileSinkProcessor(), genTezWork));
    opRules.put(new RuleRegExp("Split work - DummyStore", DummyStoreOperator.getOperatorName() + "%"), genTezWork);
    opRules.put(new RuleRegExp("Handle Potential Analyze Command", TableScanOperator.getOperatorName() + "%"), new ProcessAnalyzeTable(utils));
    opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new UnionProcessor());
    opRules.put(new RuleRegExp("AppMasterEventOperator", AppMasterEventOperator.getOperatorName() + "%"), new AppMasterEventProcessor());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pCtx.getTopOps().values());
    GraphWalker ogw = new GenTezWorkWalker(disp, procCtx);
    ogw.startWalking(topNodes, null);
    // we need to specify the reserved memory for each work that contains Map Join
    for (List<BaseWork> baseWorkList : procCtx.mapJoinWorkMap.values()) {
        for (BaseWork w : baseWorkList) {
            // work should be the smallest unit for memory allocation
            w.setReservedMemoryMB((int) (conf.getLongVar(ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD) / (1024 * 1024)));
        }
    }
    // we need to clone some operator plans and remove union operators still
    int indexForTezUnion = 0;
    for (BaseWork w : procCtx.workWithUnionOperators) {
        GenTezUtils.removeUnionOperators(procCtx, w, indexForTezUnion++);
    }
    // then we make sure the file sink operators are set up right
    for (FileSinkOperator fileSink : procCtx.fileSinkSet) {
        GenTezUtils.processFileSink(procCtx, fileSink);
    }
    // Connect any edges required for min/max pushdown
    if (pCtx.getRsToRuntimeValuesInfoMap().size() > 0) {
        for (ReduceSinkOperator rs : pCtx.getRsToRuntimeValuesInfoMap().keySet()) {
            // Process min/max
            GenTezUtils.processDynamicSemiJoinPushDownOperator(procCtx, pCtx.getRsToRuntimeValuesInfoMap().get(rs), rs);
        }
    }
    // and finally we hook up any events that need to be sent to the tez AM
    LOG.debug("There are " + procCtx.eventOperatorSet.size() + " app master events.");
    for (AppMasterEventOperator event : procCtx.eventOperatorSet) {
        LOG.debug("Handling AppMasterEventOperator: " + event);
        GenTezUtils.processAppMasterEvent(procCtx, event);
    }
    perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "generateTaskTree");
}
Also used : Node(org.apache.hadoop.hive.ql.lib.Node) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) ReduceSinkMapJoinProc(org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc) CompositeProcessor(org.apache.hadoop.hive.ql.lib.CompositeProcessor) MergeJoinProc(org.apache.hadoop.hive.ql.optimizer.MergeJoinProc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) Rule(org.apache.hadoop.hive.ql.lib.Rule)

Example 5 with Dispatcher

use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.

the class TezCompiler method runRemoveDynamicPruningOptimization.

private void runRemoveDynamicPruningOptimization(OptimizeTezProcContext procCtx, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
    // Sequence of TableScan operators to be walked
    Deque<Operator<?>> deque = new LinkedList<Operator<?>>();
    deque.addAll(procCtx.parseContext.getTopOps().values());
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("Remove dynamic pruning by size", AppMasterEventOperator.getOperatorName() + "%"), new RemoveDynamicPruningBySize());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    GraphWalker ogw = new ForwardWalker(disp);
    ogw.startWalking(topNodes, null);
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) ForwardWalker(org.apache.hadoop.hive.ql.lib.ForwardWalker) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedList(java.util.LinkedList) RemoveDynamicPruningBySize(org.apache.hadoop.hive.ql.optimizer.RemoveDynamicPruningBySize) LinkedHashMap(java.util.LinkedHashMap) Rule(org.apache.hadoop.hive.ql.lib.Rule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Aggregations

Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)22 Node (org.apache.hadoop.hive.ql.lib.Node)22 ArrayList (java.util.ArrayList)21 GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)21 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)19 LinkedHashMap (java.util.LinkedHashMap)18 NodeProcessor (org.apache.hadoop.hive.ql.lib.NodeProcessor)18 Rule (org.apache.hadoop.hive.ql.lib.Rule)18 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)13 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)11 HashMap (java.util.HashMap)7 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)6 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)6 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)5 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)4 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)4 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)4 LinkedList (java.util.LinkedList)3 RelNode (org.apache.calcite.rel.RelNode)3 CommonMergeJoinOperator (org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator)3