Search in sources :

Example 61 with RuleRegExp

use of org.apache.hadoop.hive.ql.lib.RuleRegExp in project hive by apache.

the class SparkCompiler method generateTaskTreeHelper.

private void generateTaskTreeHelper(GenSparkProcContext procCtx, List<Node> topNodes) throws SemanticException {
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack. The dispatcher generates the plan from the operator tree
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    GenSparkWork genSparkWork = new GenSparkWork(GenSparkUtils.getUtils());
    opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genSparkWork);
    opRules.put(new RuleRegExp("Split Work - SparkPartitionPruningSink", SparkPartitionPruningSinkOperator.getOperatorName() + "%"), genSparkWork);
    opRules.put(new TypeRule(MapJoinOperator.class), new SparkReduceSinkMapJoinProc());
    opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new SparkFileSinkProcessor(), genSparkWork));
    opRules.put(new RuleRegExp("Handle Analyze Command", TableScanOperator.getOperatorName() + "%"), new SparkProcessAnalyzeTable(GenSparkUtils.getUtils()));
    opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new NodeProcessor() {

        @Override
        public Object process(Node n, Stack<Node> s, NodeProcessorCtx procCtx, Object... os) throws SemanticException {
            GenSparkProcContext context = (GenSparkProcContext) procCtx;
            UnionOperator union = (UnionOperator) n;
            // simply need to remember that we've seen a union.
            context.currentUnionOperators.add(union);
            return null;
        }
    });
    /**
     *  SMB join case:   (Big)   (Small)  (Small)
     *                     TS       TS       TS
     *                      \       |       /
     *                       \      DS     DS
     *                         \   |    /
     *                         SMBJoinOP
     *
     * Some of the other processors are expecting only one traversal beyond SMBJoinOp.
     * We need to traverse from the big-table path only, and stop traversing on the
     * small-table path once we reach SMBJoinOp.
     * Also add some SMB join information to the context, so we can properly annotate
     * the MapWork later on.
     */
    opRules.put(new TypeRule(SMBMapJoinOperator.class), new NodeProcessor() {

        @Override
        public Object process(Node currNode, Stack<Node> stack, NodeProcessorCtx procCtx, Object... os) throws SemanticException {
            GenSparkProcContext context = (GenSparkProcContext) procCtx;
            SMBMapJoinOperator currSmbNode = (SMBMapJoinOperator) currNode;
            SparkSMBMapJoinInfo smbMapJoinCtx = context.smbMapJoinCtxMap.get(currSmbNode);
            if (smbMapJoinCtx == null) {
                smbMapJoinCtx = new SparkSMBMapJoinInfo();
                context.smbMapJoinCtxMap.put(currSmbNode, smbMapJoinCtx);
            }
            for (Node stackNode : stack) {
                if (stackNode instanceof DummyStoreOperator) {
                    // If coming from small-table side, do some book-keeping, and skip traversal.
                    smbMapJoinCtx.smallTableRootOps.add(context.currentRootOperator);
                    return true;
                }
            }
            // If coming from big-table side, do some book-keeping, and continue traversal
            smbMapJoinCtx.bigTableRootOp = context.currentRootOperator;
            return false;
        }
    });
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    GraphWalker ogw = new GenSparkWorkWalker(disp, procCtx);
    ogw.startWalking(topNodes, null);
}
Also used : Node(org.apache.hadoop.hive.ql.lib.Node) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) NodeProcessorCtx(org.apache.hadoop.hive.ql.lib.NodeProcessorCtx) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) CompositeProcessor(org.apache.hadoop.hive.ql.lib.CompositeProcessor) SparkReduceSinkMapJoinProc(org.apache.hadoop.hive.ql.optimizer.spark.SparkReduceSinkMapJoinProc) Rule(org.apache.hadoop.hive.ql.lib.Rule) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule)

Aggregations

NodeProcessor (org.apache.hadoop.hive.ql.lib.NodeProcessor)61 Rule (org.apache.hadoop.hive.ql.lib.Rule)61 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)61 LinkedHashMap (java.util.LinkedHashMap)60 Node (org.apache.hadoop.hive.ql.lib.Node)60 GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)59 ArrayList (java.util.ArrayList)58 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)58 Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)58 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)37 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)10 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)8 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)8 ForwardWalker (org.apache.hadoop.hive.ql.lib.ForwardWalker)8 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)7 Operator (org.apache.hadoop.hive.ql.exec.Operator)7 TypeRule (org.apache.hadoop.hive.ql.lib.TypeRule)7 HashMap (java.util.HashMap)6 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)6 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)6