Search in sources :

Example 71 with RuleRegExp

use of org.apache.hadoop.hive.ql.lib.RuleRegExp in project hive by apache.

the class TezCompiler method removeSemiJoinCyclesDueToMapsideJoins.

private static void removeSemiJoinCyclesDueToMapsideJoins(OptimizeTezProcContext procCtx) throws SemanticException {
    if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || procCtx.parseContext.getRsToSemiJoinBranchInfo().size() == 0) {
        return;
    }
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("R1", MapJoinOperator.getOperatorName() + "%" + MapJoinOperator.getOperatorName() + "%"), new SemiJoinCycleRemovalDueToMapsideJoins());
    opRules.put(new RuleRegExp("R2", MapJoinOperator.getOperatorName() + "%" + CommonMergeJoinOperator.getOperatorName() + "%"), new SemiJoinCycleRemovalDueToMapsideJoins());
    opRules.put(new RuleRegExp("R3", CommonMergeJoinOperator.getOperatorName() + "%" + MapJoinOperator.getOperatorName() + "%"), new SemiJoinCycleRemovalDueToMapsideJoins());
    opRules.put(new RuleRegExp("R4", CommonMergeJoinOperator.getOperatorName() + "%" + CommonMergeJoinOperator.getOperatorName() + "%"), new SemiJoinCycleRemovalDueToMapsideJoins());
    SemiJoinCycleRemovalDueTOMapsideJoinContext ctx = new SemiJoinCycleRemovalDueTOMapsideJoinContext();
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    GraphWalker ogw = new PreOrderOnceWalker(disp);
    ogw.startWalking(topNodes, null);
    // process the list
    ParseContext pCtx = procCtx.parseContext;
    for (Operator<?> parentJoin : ctx.childParentMap.keySet()) {
        Operator<?> childJoin = ctx.childParentMap.get(parentJoin);
        if (parentJoin.getChildOperators().size() == 1) {
            continue;
        }
        for (Operator<?> child : parentJoin.getChildOperators()) {
            if (!(child instanceof SelectOperator)) {
                continue;
            }
            while (child.getChildOperators().size() > 0) {
                child = child.getChildOperators().get(0);
            }
            if (!(child instanceof ReduceSinkOperator)) {
                continue;
            }
            ReduceSinkOperator rs = ((ReduceSinkOperator) child);
            SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs);
            if (sjInfo == null) {
                continue;
            }
            TableScanOperator ts = sjInfo.getTsOp();
            // cycle with childJoin.
            for (Operator<?> parent : childJoin.getParentOperators()) {
                if (parent == parentJoin) {
                    continue;
                }
                assert parent instanceof ReduceSinkOperator;
                while (parent.getParentOperators().size() > 0) {
                    parent = parent.getParentOperators().get(0);
                }
                if (parent == ts) {
                    // We have a cycle!
                    if (sjInfo.getIsHint()) {
                        throw new SemanticException("Removing hinted semijoin as it is creating cycles with mapside joins " + rs + " : " + ts);
                    }
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Semijoin cycle due to mapjoin. Removing semijoin " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
                    }
                    GenTezUtils.removeBranch(rs);
                    GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts);
                }
            }
        }
    }
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) PreOrderOnceWalker(org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker) Rule(org.apache.hadoop.hive.ql.lib.Rule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Example 72 with RuleRegExp

use of org.apache.hadoop.hive.ql.lib.RuleRegExp in project hive by apache.

the class TezCompiler method runStatsDependentOptimizations.

private void runStatsDependentOptimizations(OptimizeTezProcContext procCtx, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
    // Sequence of TableScan operators to be walked
    Deque<Operator<?>> deque = new LinkedList<Operator<?>>();
    deque.addAll(procCtx.parseContext.getTopOps().values());
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("Set parallelism - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), new SetReducerParallelism());
    opRules.put(new RuleRegExp("Convert Join to Map-join", JoinOperator.getOperatorName() + "%"), new ConvertJoinMapJoin());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    GraphWalker ogw = new ForwardWalker(disp);
    ogw.startWalking(topNodes, null);
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) ForwardWalker(org.apache.hadoop.hive.ql.lib.ForwardWalker) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SetReducerParallelism(org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedList(java.util.LinkedList) LinkedHashMap(java.util.LinkedHashMap) ConvertJoinMapJoin(org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin) Rule(org.apache.hadoop.hive.ql.lib.Rule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Example 73 with RuleRegExp

use of org.apache.hadoop.hive.ql.lib.RuleRegExp in project flink by apache.

the class HiveParserTypeCheckProcFactory method genExprNode.

public static Map<HiveParserASTNode, ExprNodeDesc> genExprNode(HiveParserASTNode expr, HiveParserTypeCheckCtx tcCtx, HiveParserTypeCheckProcFactory tf) throws SemanticException {
    // Create the walker, the rules dispatcher and the context.
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack. The dispatcher
    // generates the plan from the operator tree
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<>();
    opRules.put(new RuleRegExp("R1", HiveASTParser.TOK_NULL + "%"), tf.getNullExprProcessor());
    opRules.put(new RuleRegExp("R2", HiveASTParser.Number + "%|" + HiveASTParser.IntegralLiteral + "%|" + HiveASTParser.NumberLiteral + "%"), tf.getNumExprProcessor());
    opRules.put(new RuleRegExp("R3", HiveASTParser.Identifier + "%|" + HiveASTParser.StringLiteral + "%|" + HiveASTParser.TOK_CHARSETLITERAL + "%|" + HiveASTParser.TOK_STRINGLITERALSEQUENCE + "%|" + "%|" + HiveASTParser.KW_IF + "%|" + HiveASTParser.KW_CASE + "%|" + HiveASTParser.KW_WHEN + "%|" + HiveASTParser.KW_IN + "%|" + HiveASTParser.KW_ARRAY + "%|" + HiveASTParser.KW_MAP + "%|" + HiveASTParser.KW_STRUCT + "%|" + HiveASTParser.KW_EXISTS + "%|" + HiveASTParser.TOK_SUBQUERY_OP_NOTIN + "%"), tf.getStrExprProcessor());
    opRules.put(new RuleRegExp("R4", HiveASTParser.KW_TRUE + "%|" + HiveASTParser.KW_FALSE + "%"), tf.getBoolExprProcessor());
    opRules.put(new RuleRegExp("R5", HiveASTParser.TOK_DATELITERAL + "%|" + HiveASTParser.TOK_TIMESTAMPLITERAL + "%"), tf.getDateTimeExprProcessor());
    opRules.put(new RuleRegExp("R6", HiveASTParser.TOK_INTERVAL_YEAR_MONTH_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_DAY_TIME_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_YEAR_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_MONTH_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_DAY_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_HOUR_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_MINUTE_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_SECOND_LITERAL + "%"), tf.getIntervalExprProcessor());
    opRules.put(new RuleRegExp("R7", HiveASTParser.TOK_TABLE_OR_COL + "%"), tf.getColumnExprProcessor());
    opRules.put(new RuleRegExp("R8", HiveASTParser.TOK_SUBQUERY_EXPR + "%"), tf.getSubQueryExprProcessor());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(), opRules, tcCtx);
    GraphWalker ogw = new HiveParserExpressionWalker(disp);
    // Create a list of top nodes
    ArrayList<Node> topNodes = new ArrayList<>(Collections.singleton(expr));
    HashMap<Node, Object> nodeOutputs = new LinkedHashMap<>();
    ogw.startWalking(topNodes, nodeOutputs);
    return convert(nodeOutputs);
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) HiveParserASTNode(org.apache.flink.table.planner.delegation.hive.copy.HiveParserASTNode) RexNode(org.apache.calcite.rex.RexNode) RelNode(org.apache.calcite.rel.RelNode) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) Rule(org.apache.hadoop.hive.ql.lib.Rule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) HiveParserExpressionWalker(org.apache.flink.table.planner.delegation.hive.copy.HiveParserExpressionWalker)

Example 74 with RuleRegExp

use of org.apache.hadoop.hive.ql.lib.RuleRegExp in project flink by splunk.

the class HiveParserTypeCheckProcFactory method genExprNode.

public static Map<HiveParserASTNode, ExprNodeDesc> genExprNode(HiveParserASTNode expr, HiveParserTypeCheckCtx tcCtx, HiveParserTypeCheckProcFactory tf) throws SemanticException {
    // Create the walker, the rules dispatcher and the context.
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack. The dispatcher
    // generates the plan from the operator tree
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<>();
    opRules.put(new RuleRegExp("R1", HiveASTParser.TOK_NULL + "%"), tf.getNullExprProcessor());
    opRules.put(new RuleRegExp("R2", HiveASTParser.Number + "%|" + HiveASTParser.IntegralLiteral + "%|" + HiveASTParser.NumberLiteral + "%"), tf.getNumExprProcessor());
    opRules.put(new RuleRegExp("R3", HiveASTParser.Identifier + "%|" + HiveASTParser.StringLiteral + "%|" + HiveASTParser.TOK_CHARSETLITERAL + "%|" + HiveASTParser.TOK_STRINGLITERALSEQUENCE + "%|" + "%|" + HiveASTParser.KW_IF + "%|" + HiveASTParser.KW_CASE + "%|" + HiveASTParser.KW_WHEN + "%|" + HiveASTParser.KW_IN + "%|" + HiveASTParser.KW_ARRAY + "%|" + HiveASTParser.KW_MAP + "%|" + HiveASTParser.KW_STRUCT + "%|" + HiveASTParser.KW_EXISTS + "%|" + HiveASTParser.TOK_SUBQUERY_OP_NOTIN + "%"), tf.getStrExprProcessor());
    opRules.put(new RuleRegExp("R4", HiveASTParser.KW_TRUE + "%|" + HiveASTParser.KW_FALSE + "%"), tf.getBoolExprProcessor());
    opRules.put(new RuleRegExp("R5", HiveASTParser.TOK_DATELITERAL + "%|" + HiveASTParser.TOK_TIMESTAMPLITERAL + "%"), tf.getDateTimeExprProcessor());
    opRules.put(new RuleRegExp("R6", HiveASTParser.TOK_INTERVAL_YEAR_MONTH_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_DAY_TIME_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_YEAR_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_MONTH_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_DAY_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_HOUR_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_MINUTE_LITERAL + "%|" + HiveASTParser.TOK_INTERVAL_SECOND_LITERAL + "%"), tf.getIntervalExprProcessor());
    opRules.put(new RuleRegExp("R7", HiveASTParser.TOK_TABLE_OR_COL + "%"), tf.getColumnExprProcessor());
    opRules.put(new RuleRegExp("R8", HiveASTParser.TOK_SUBQUERY_EXPR + "%"), tf.getSubQueryExprProcessor());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(), opRules, tcCtx);
    GraphWalker ogw = new HiveParserExpressionWalker(disp);
    // Create a list of top nodes
    ArrayList<Node> topNodes = new ArrayList<>(Collections.singleton(expr));
    HashMap<Node, Object> nodeOutputs = new LinkedHashMap<>();
    ogw.startWalking(topNodes, nodeOutputs);
    return convert(nodeOutputs);
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) HiveParserASTNode(org.apache.flink.table.planner.delegation.hive.copy.HiveParserASTNode) RexNode(org.apache.calcite.rex.RexNode) RelNode(org.apache.calcite.rel.RelNode) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) Rule(org.apache.hadoop.hive.ql.lib.Rule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) HiveParserExpressionWalker(org.apache.flink.table.planner.delegation.hive.copy.HiveParserExpressionWalker)

Aggregations

RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)74 LinkedHashMap (java.util.LinkedHashMap)73 Node (org.apache.hadoop.hive.ql.lib.Node)72 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)71 ArrayList (java.util.ArrayList)70 SemanticNodeProcessor (org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)57 SemanticRule (org.apache.hadoop.hive.ql.lib.SemanticRule)57 SemanticGraphWalker (org.apache.hadoop.hive.ql.lib.SemanticGraphWalker)56 SemanticDispatcher (org.apache.hadoop.hive.ql.lib.SemanticDispatcher)55 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)33 NodeProcessor (org.apache.hadoop.hive.ql.lib.NodeProcessor)17 Rule (org.apache.hadoop.hive.ql.lib.Rule)17 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)16 GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)16 Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)15 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)14 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)13 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 Operator (org.apache.hadoop.hive.ql.exec.Operator)12 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)11