Search in sources :

Example 51 with SemanticNodeProcessor

use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.

the class TezCompiler method removeSemiJoinIfNoStats.

private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx) throws SemanticException {
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%"), new SemiJoinRemovalProc(true, false));
    SemiJoinRemovalContext ctx = new SemiJoinRemovalContext(procCtx.parseContext);
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    SemanticGraphWalker ogw = new PreOrderOnceWalker(disp);
    ogw.startWalking(topNodes, null);
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) PreOrderOnceWalker(org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker)

Example 52 with SemanticNodeProcessor

use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.

the class TezCompiler method runDynamicPartitionPruning.

private void runDynamicPartitionPruning(OptimizeTezProcContext procCtx) throws SemanticException {
    if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING)) {
        return;
    }
    // Sequence of TableScan operators to be walked
    Deque<Operator<?>> deque = new LinkedList<Operator<?>>();
    deque.addAll(procCtx.parseContext.getTopOps().values());
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp(new String("Dynamic Partition Pruning"), FilterOperator.getOperatorName() + "%"), new DynamicPartitionPruningOptimization());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    SemanticGraphWalker ogw = new ForwardWalker(disp);
    ogw.startWalking(topNodes, null);
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) TopNKeyOperator(org.apache.hadoop.hive.ql.exec.TopNKeyOperator) TerminalOperator(org.apache.hadoop.hive.ql.exec.TerminalOperator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) ForwardWalker(org.apache.hadoop.hive.ql.lib.ForwardWalker) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedList(java.util.LinkedList) LinkedHashMap(java.util.LinkedHashMap) DynamicPartitionPruningOptimization(org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)

Example 53 with SemanticNodeProcessor

use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.

the class TezCompiler method runRemoveDynamicPruningOptimization.

private void runRemoveDynamicPruningOptimization(OptimizeTezProcContext procCtx) throws SemanticException {
    // Sequence of TableScan operators to be walked
    Deque<Operator<?>> deque = new LinkedList<Operator<?>>();
    deque.addAll(procCtx.parseContext.getTopOps().values());
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("Remove dynamic pruning by size", AppMasterEventOperator.getOperatorName() + "%"), new RemoveDynamicPruningBySize());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    SemanticGraphWalker ogw = new ForwardWalker(disp);
    ogw.startWalking(topNodes, null);
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) TopNKeyOperator(org.apache.hadoop.hive.ql.exec.TopNKeyOperator) TerminalOperator(org.apache.hadoop.hive.ql.exec.TerminalOperator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) ForwardWalker(org.apache.hadoop.hive.ql.lib.ForwardWalker) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedList(java.util.LinkedList) RemoveDynamicPruningBySize(org.apache.hadoop.hive.ql.optimizer.RemoveDynamicPruningBySize) LinkedHashMap(java.util.LinkedHashMap) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)

Example 54 with SemanticNodeProcessor

use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.

the class TableAccessAnalyzer method analyzeTableAccess.

public TableAccessInfo analyzeTableAccess() throws SemanticException {
    // Set up the rules for the graph walker for group by and join operators
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%"), new GroupByProcessor(pGraphContext));
    opRules.put(new RuleRegExp("R2", JoinOperator.getOperatorName() + "%"), new JoinProcessor(pGraphContext));
    opRules.put(new RuleRegExp("R3", MapJoinOperator.getOperatorName() + "%"), new JoinProcessor(pGraphContext));
    TableAccessCtx tableAccessCtx = new TableAccessCtx();
    SemanticDispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, tableAccessCtx);
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes and walk!
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pGraphContext.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return tableAccessCtx.getTableAccessInfo();
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)

Example 55 with SemanticNodeProcessor

use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.

the class MapJoinProcessor method transform.

/**
 * Transform the query tree. For each join, check if it is a map-side join (user specified). If
 * yes, convert it to a map-side join.
 *
 * @param pactx
 *          current parse context
 */
@Override
public ParseContext transform(ParseContext pactx) throws SemanticException {
    List<MapJoinOperator> listMapJoinOps = new ArrayList<MapJoinOperator>();
    // traverse all the joins and convert them if necessary
    if (pactx.getJoinOps() != null) {
        Set<JoinOperator> joinMap = new HashSet<JoinOperator>();
        Set<MapJoinOperator> mapJoinMap = pactx.getMapJoinOps();
        if (mapJoinMap == null) {
            mapJoinMap = new HashSet<MapJoinOperator>();
            pactx.setMapJoinOps(mapJoinMap);
        }
        Iterator<JoinOperator> joinCtxIter = pactx.getJoinOps().iterator();
        while (joinCtxIter.hasNext()) {
            JoinOperator joinOp = joinCtxIter.next();
            int mapJoinPos = mapSideJoin(joinOp);
            if (mapJoinPos >= 0) {
                MapJoinOperator mapJoinOp = generateMapJoinOperator(pactx, joinOp, mapJoinPos);
                listMapJoinOps.add(mapJoinOp);
                mapJoinOp.getConf().setQBJoinTreeProps(joinOp.getConf());
                mapJoinMap.add(mapJoinOp);
            } else {
                joinOp.getConf().setQBJoinTreeProps(joinOp.getConf());
                joinMap.add(joinOp);
            }
        }
        // store the new joinContext
        pactx.setJoinOps(joinMap);
    }
    // Go over the list and find if a reducer is not needed
    List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoRed = new ArrayList<AbstractMapJoinOperator<? extends MapJoinDesc>>();
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    // The dispatcher generates the plan from the operator tree
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("R0", MapJoinOperator.getOperatorName() + "%"), getCurrentMapJoin());
    opRules.put(new RuleRegExp("R1", MapJoinOperator.getOperatorName() + "%.*" + FileSinkOperator.getOperatorName() + "%"), getMapJoinFS());
    opRules.put(new RuleRegExp("R2", MapJoinOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), getMapJoinDefault());
    opRules.put(new RuleRegExp("R4", MapJoinOperator.getOperatorName() + "%.*" + UnionOperator.getOperatorName() + "%"), getMapJoinDefault());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(getDefault(), opRules, new MapJoinWalkerCtx(listMapJoinOpsNoRed, pactx));
    SemanticGraphWalker ogw = new GenMapRedWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(listMapJoinOps);
    ogw.startWalking(topNodes, null);
    pactx.setListMapJoinOpsNoReducer(listMapJoinOpsNoRed);
    return pactx;
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) HashSet(java.util.HashSet) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) GenMapRedWalker(org.apache.hadoop.hive.ql.parse.GenMapRedWalker) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) EnabledOverride(org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.EnabledOverride)

Aggregations

SemanticNodeProcessor (org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)68 LinkedHashMap (java.util.LinkedHashMap)66 Node (org.apache.hadoop.hive.ql.lib.Node)66 SemanticGraphWalker (org.apache.hadoop.hive.ql.lib.SemanticGraphWalker)66 SemanticRule (org.apache.hadoop.hive.ql.lib.SemanticRule)66 SemanticDispatcher (org.apache.hadoop.hive.ql.lib.SemanticDispatcher)65 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)63 ArrayList (java.util.ArrayList)62 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)57 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)36 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)11 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)11 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)9 Operator (org.apache.hadoop.hive.ql.exec.Operator)9 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)8 ForwardWalker (org.apache.hadoop.hive.ql.lib.ForwardWalker)8 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)7 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)7 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)6 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)6