Search in sources :

Example 1 with RuleExactMatch

use of org.apache.hadoop.hive.ql.lib.RuleExactMatch in project hive by apache.

the class PrunerUtils method walkOperatorTree.

/**
   * Walk operator tree for pruner generation.
   *
   * @param pctx
   * @param opWalkerCtx
   * @param filterProc
   * @param defaultProc
   * @throws SemanticException
   */
public static void walkOperatorTree(ParseContext pctx, NodeProcessorCtx opWalkerCtx, NodeProcessor filterProc, NodeProcessor defaultProc) throws SemanticException {
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    // Build regular expression for operator rule.
    // "(TS%FIL%)|(TS%FIL%FIL%)"
    String tsOprName = TableScanOperator.getOperatorName();
    String filtOprName = FilterOperator.getOperatorName();
    opRules.put(new RuleExactMatch("R1", new String[] { tsOprName, filtOprName, filtOprName }), filterProc);
    opRules.put(new RuleExactMatch("R2", new String[] { tsOprName, filtOprName }), filterProc);
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(defaultProc, opRules, opWalkerCtx);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) RuleExactMatch(org.apache.hadoop.hive.ql.lib.RuleExactMatch) Rule(org.apache.hadoop.hive.ql.lib.Rule) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)

Example 2 with RuleExactMatch

use of org.apache.hadoop.hive.ql.lib.RuleExactMatch in project hive by apache.

the class BucketingSortingInferenceOptimizer method inferBucketingSorting.

/**
   * For each map reduce task, if it has a reducer, infer whether or not the final output of the
   * reducer is bucketed and/or sorted
   *
   * @param mapRedTasks
   * @throws SemanticException
   */
private void inferBucketingSorting(List<ExecDriver> mapRedTasks) throws SemanticException {
    for (ExecDriver mapRedTask : mapRedTasks) {
        // of the outputs of intermediate map reduce jobs.
        if (!mapRedTask.getWork().isFinalMapRed()) {
            continue;
        }
        if (mapRedTask.getWork().getReduceWork() == null) {
            continue;
        }
        Operator<? extends OperatorDesc> reducer = mapRedTask.getWork().getReduceWork().getReducer();
        // uses sampling, which means it's not bucketed
        boolean disableBucketing = mapRedTask.getWork().getMapWork().getSamplingType() > 0;
        BucketingSortingCtx bCtx = new BucketingSortingCtx(disableBucketing);
        // RuleRegExp rules are used to match operators anywhere in the tree
        // RuleExactMatch rules are used to specify exactly what the tree should look like
        // In particular, this guarantees that the first operator is the reducer
        // (and its parent(s) are ReduceSinkOperators) since it begins walking the tree from
        // the reducer.
        Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
        opRules.put(new RuleRegExp("R1", SelectOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getSelProc());
        // Matches only GroupByOperators which are reducers, rather than map group by operators,
        // or multi group by optimization specific operators
        opRules.put(new RuleExactMatch("R2", new String[] { GroupByOperator.getOperatorName() }), BucketingSortingOpProcFactory.getGroupByProc());
        // Matches only JoinOperators which are reducers, rather than map joins, SMB map joins, etc.
        opRules.put(new RuleExactMatch("R3", new String[] { JoinOperator.getOperatorName() }), BucketingSortingOpProcFactory.getJoinProc());
        opRules.put(new RuleRegExp("R5", FileSinkOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getFileSinkProc());
        opRules.put(new RuleRegExp("R7", FilterOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getFilterProc());
        opRules.put(new RuleRegExp("R8", LimitOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getLimitProc());
        opRules.put(new RuleRegExp("R9", LateralViewForwardOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getLateralViewForwardProc());
        opRules.put(new RuleRegExp("R10", LateralViewJoinOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getLateralViewJoinProc());
        // Matches only ForwardOperators which are preceded by some other operator in the tree,
        // in particular it can't be a reducer (and hence cannot be one of the ForwardOperators
        // added by the multi group by optimization)
        opRules.put(new RuleRegExp("R11", ".+" + ForwardOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getForwardProc());
        // Matches only ForwardOperators which are reducers and are followed by GroupByOperators
        // (specific to the multi group by optimization)
        opRules.put(new RuleExactMatch("R12", new String[] { ForwardOperator.getOperatorName(), GroupByOperator.getOperatorName() }), BucketingSortingOpProcFactory.getMultiGroupByProc());
        // The dispatcher fires the processor corresponding to the closest matching rule and passes
        // the context along
        Dispatcher disp = new DefaultRuleDispatcher(BucketingSortingOpProcFactory.getDefaultProc(), opRules, bCtx);
        GraphWalker ogw = new PreOrderWalker(disp);
        // Create a list of topop nodes
        ArrayList<Node> topNodes = new ArrayList<Node>();
        topNodes.add(reducer);
        ogw.startWalking(topNodes, null);
        mapRedTask.getWork().getMapWork().getBucketedColsByDirectory().putAll(bCtx.getBucketedColsByDirectory());
        mapRedTask.getWork().getMapWork().getSortedColsByDirectory().putAll(bCtx.getSortedColsByDirectory());
    }
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) RuleExactMatch(org.apache.hadoop.hive.ql.lib.RuleExactMatch) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) Rule(org.apache.hadoop.hive.ql.lib.Rule) PreOrderWalker(org.apache.hadoop.hive.ql.lib.PreOrderWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Aggregations

ArrayList (java.util.ArrayList)2 LinkedHashMap (java.util.LinkedHashMap)2 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)2 Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)2 GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)2 Node (org.apache.hadoop.hive.ql.lib.Node)2 NodeProcessor (org.apache.hadoop.hive.ql.lib.NodeProcessor)2 Rule (org.apache.hadoop.hive.ql.lib.Rule)2 RuleExactMatch (org.apache.hadoop.hive.ql.lib.RuleExactMatch)2 ExecDriver (org.apache.hadoop.hive.ql.exec.mr.ExecDriver)1 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)1 PreOrderWalker (org.apache.hadoop.hive.ql.lib.PreOrderWalker)1 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)1 TypeRule (org.apache.hadoop.hive.ql.lib.TypeRule)1