Search in sources :

Example 66 with Dispatcher

use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.

the class HiveOpConverterPostProc method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    // 0. We check the conditions to apply this transformation,
    // if we do not meet them we bail out
    final boolean cboEnabled = HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVE_CBO_ENABLED);
    final boolean returnPathEnabled = HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP);
    final boolean cboSucceeded = pctx.getContext().isCboSucceeded();
    if (!(cboEnabled && returnPathEnabled && cboSucceeded)) {
        return pctx;
    }
    // 1. Initialize aux data structures
    this.pctx = pctx;
    this.aliasToOpInfo = new HashMap<String, Operator<? extends OperatorDesc>>();
    // 2. Trigger transformation
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("R1", JoinOperator.getOperatorName() + "%"), new JoinAnnotate());
    opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + "%"), new TableScanAnnotate());
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null);
    GraphWalker ogw = new ForwardWalker(disp);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return pctx;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) ForwardWalker(org.apache.hadoop.hive.ql.lib.ForwardWalker) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) Rule(org.apache.hadoop.hive.ql.lib.Rule) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Example 67 with Dispatcher

use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.

the class ColumnPruner method transform.

/**
 * Transform the query tree. For each table under consideration, check if all
 * columns are needed. If not, only select the operators needed at the
 * beginning and proceed.
 *
 * @param pactx
 *          the current parse context
 */
@Override
public ParseContext transform(ParseContext pactx) throws SemanticException {
    pGraphContext = pactx;
    // generate pruned column list for all relevant operators
    ColumnPrunerProcCtx cppCtx = new ColumnPrunerProcCtx(pactx);
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack. The dispatcher
    // generates the plan from the operator tree
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getFilterProc());
    opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getGroupByProc());
    opRules.put(new RuleRegExp("R3", ReduceSinkOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getReduceSinkProc());
    opRules.put(new RuleRegExp("R4", SelectOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getSelectProc());
    opRules.put(new RuleRegExp("R5", CommonJoinOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getJoinProc());
    opRules.put(new RuleRegExp("R6", MapJoinOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getMapJoinProc());
    opRules.put(new RuleRegExp("R7", TableScanOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getTableScanProc());
    opRules.put(new RuleRegExp("R8", LateralViewJoinOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getLateralViewJoinProc());
    opRules.put(new RuleRegExp("R9", LateralViewForwardOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getLateralViewForwardProc());
    opRules.put(new RuleRegExp("R10", PTFOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getPTFProc());
    opRules.put(new RuleRegExp("R11", ScriptOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getScriptProc());
    opRules.put(new RuleRegExp("R12", LimitOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getLimitProc());
    opRules.put(new RuleRegExp("R13", UnionOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getUnionProc());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(ColumnPrunerProcFactory.getDefaultProc(), opRules, cppCtx);
    GraphWalker ogw = new ColumnPrunerWalker(disp);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pGraphContext.getTopOps().values());
    ogw.startWalking(topNodes, null);
    // set it back so that column pruner in the optimizer will not do the
    // view column authorization again even if it is triggered again.
    pGraphContext.setNeedViewColumnAuthorization(false);
    return pGraphContext;
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) Rule(org.apache.hadoop.hive.ql.lib.Rule) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Example 68 with Dispatcher

use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.

the class MapJoinProcessor method transform.

/**
 * Transform the query tree. For each join, check if it is a map-side join (user specified). If
 * yes, convert it to a map-side join.
 *
 * @param pactx
 *          current parse context
 */
@Override
public ParseContext transform(ParseContext pactx) throws SemanticException {
    List<MapJoinOperator> listMapJoinOps = new ArrayList<MapJoinOperator>();
    // traverse all the joins and convert them if necessary
    if (pactx.getJoinOps() != null) {
        Set<JoinOperator> joinMap = new HashSet<JoinOperator>();
        Set<MapJoinOperator> mapJoinMap = pactx.getMapJoinOps();
        if (mapJoinMap == null) {
            mapJoinMap = new HashSet<MapJoinOperator>();
            pactx.setMapJoinOps(mapJoinMap);
        }
        Iterator<JoinOperator> joinCtxIter = pactx.getJoinOps().iterator();
        while (joinCtxIter.hasNext()) {
            JoinOperator joinOp = joinCtxIter.next();
            int mapJoinPos = mapSideJoin(joinOp);
            if (mapJoinPos >= 0) {
                MapJoinOperator mapJoinOp = generateMapJoinOperator(pactx, joinOp, mapJoinPos);
                listMapJoinOps.add(mapJoinOp);
                mapJoinOp.getConf().setQBJoinTreeProps(joinOp.getConf());
                mapJoinMap.add(mapJoinOp);
            } else {
                joinOp.getConf().setQBJoinTreeProps(joinOp.getConf());
                joinMap.add(joinOp);
            }
        }
        // store the new joinContext
        pactx.setJoinOps(joinMap);
    }
    // Go over the list and find if a reducer is not needed
    List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoRed = new ArrayList<AbstractMapJoinOperator<? extends MapJoinDesc>>();
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    // The dispatcher generates the plan from the operator tree
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("R0", MapJoinOperator.getOperatorName() + "%"), getCurrentMapJoin());
    opRules.put(new RuleRegExp("R1", MapJoinOperator.getOperatorName() + "%.*" + FileSinkOperator.getOperatorName() + "%"), getMapJoinFS());
    opRules.put(new RuleRegExp("R2", MapJoinOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), getMapJoinDefault());
    opRules.put(new RuleRegExp("R4", MapJoinOperator.getOperatorName() + "%.*" + UnionOperator.getOperatorName() + "%"), getMapJoinDefault());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(getDefault(), opRules, new MapJoinWalkerCtx(listMapJoinOpsNoRed, pactx));
    GraphWalker ogw = new GenMapRedWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(listMapJoinOps);
    ogw.startWalking(topNodes, null);
    pactx.setListMapJoinOpsNoReducer(listMapJoinOpsNoRed);
    return pactx;
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) HashSet(java.util.HashSet) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) GenMapRedWalker(org.apache.hadoop.hive.ql.parse.GenMapRedWalker) Rule(org.apache.hadoop.hive.ql.lib.Rule)

Example 69 with Dispatcher

use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.

the class PrunerUtils method walkOperatorTree.

/**
 * Walk operator tree for pruner generation.
 *
 * @param pctx
 * @param opWalkerCtx
 * @param filterProc
 * @param defaultProc
 * @throws SemanticException
 */
public static void walkOperatorTree(ParseContext pctx, NodeProcessorCtx opWalkerCtx, NodeProcessor filterProc, NodeProcessor defaultProc) throws SemanticException {
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    // Build regular expression for operator rule.
    // "(TS%FIL%)|(TS%FIL%FIL%)"
    String tsOprName = TableScanOperator.getOperatorName();
    String filtOprName = FilterOperator.getOperatorName();
    opRules.put(new RuleExactMatch("R1", new String[] { tsOprName, filtOprName, filtOprName }), filterProc);
    opRules.put(new RuleExactMatch("R2", new String[] { tsOprName, filtOprName }), filterProc);
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(defaultProc, opRules, opWalkerCtx);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) RuleExactMatch(org.apache.hadoop.hive.ql.lib.RuleExactMatch) Rule(org.apache.hadoop.hive.ql.lib.Rule) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)

Example 70 with Dispatcher

use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.

the class PrunerUtils method walkExprTree.

/**
 * Walk expression tree for pruner generation.
 *
 * @param pred
 * @param ctx
 * @param colProc
 * @param fieldProc
 * @param genFuncProc
 * @param defProc
 * @return
 * @throws SemanticException
 */
public static Map<Node, Object> walkExprTree(ExprNodeDesc pred, NodeProcessorCtx ctx, NodeProcessor colProc, NodeProcessor fieldProc, NodeProcessor genFuncProc, NodeProcessor defProc) throws SemanticException {
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack. The dispatcher
    // generates the plan from the operator tree
    Map<Rule, NodeProcessor> exprRules = new LinkedHashMap<Rule, NodeProcessor>();
    exprRules.put(new TypeRule(ExprNodeColumnDesc.class), colProc);
    exprRules.put(new TypeRule(ExprNodeFieldDesc.class), fieldProc);
    exprRules.put(new TypeRule(ExprNodeGenericFuncDesc.class), genFuncProc);
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(defProc, exprRules, ctx);
    GraphWalker egw = new DefaultGraphWalker(disp);
    List<Node> startNodes = new ArrayList<Node>();
    startNodes.add(pred);
    HashMap<Node, Object> outputMap = new HashMap<Node, Object>();
    egw.startWalking(startNodes, outputMap);
    return outputMap;
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) Rule(org.apache.hadoop.hive.ql.lib.Rule) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule)

Aggregations

Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)97 Node (org.apache.hadoop.hive.ql.lib.Node)97 ArrayList (java.util.ArrayList)92 GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)87 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)86 NodeProcessor (org.apache.hadoop.hive.ql.lib.NodeProcessor)68 Rule (org.apache.hadoop.hive.ql.lib.Rule)68 LinkedHashMap (java.util.LinkedHashMap)67 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)62 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)59 HashMap (java.util.HashMap)21 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)17 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)16 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)15 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)14 TypeRule (org.apache.hadoop.hive.ql.lib.TypeRule)11 List (java.util.List)10 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)10 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)10 Test (org.junit.Test)10