Search in sources :

Example 31 with NodeProcessor

use of org.apache.hadoop.hive.ql.lib.NodeProcessor in project hive by apache.

the class CountDistinctRewriteProc method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    // process group-by pattern
    opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + GroupByOperator.getOperatorName() + "%"), getCountDistinctProc(pctx));
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, null);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return pctx;
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Rule(org.apache.hadoop.hive.ql.lib.Rule) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) LinkedHashMap(java.util.LinkedHashMap)

Example 32 with NodeProcessor

use of org.apache.hadoop.hive.ql.lib.NodeProcessor in project hive by apache.

the class SyntheticJoinPredicate method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    boolean enabled = false;
    String queryEngine = pctx.getConf().getVar(ConfVars.HIVE_EXECUTION_ENGINE);
    if (queryEngine.equals("tez") && pctx.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING)) {
        enabled = true;
    } else if ((queryEngine.equals("spark") && pctx.getConf().isSparkDPPAny())) {
        enabled = true;
    }
    if (!enabled) {
        return pctx;
    }
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("R1", "(" + TableScanOperator.getOperatorName() + "%" + ".*" + ReduceSinkOperator.getOperatorName() + "%" + JoinOperator.getOperatorName() + "%)"), new JoinSynthetic());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SyntheticContext context = new SyntheticContext(pctx);
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, context);
    GraphWalker ogw = new PreOrderOnceWalker(disp);
    // Create a list of top op nodes
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return pctx;
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) PreOrderOnceWalker(org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker) Rule(org.apache.hadoop.hive.ql.lib.Rule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Example 33 with NodeProcessor

use of org.apache.hadoop.hive.ql.lib.NodeProcessor in project hive by apache.

the class TableAccessAnalyzer method analyzeTableAccess.

public TableAccessInfo analyzeTableAccess() throws SemanticException {
    // Set up the rules for the graph walker for group by and join operators
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%"), new GroupByProcessor(pGraphContext));
    opRules.put(new RuleRegExp("R2", JoinOperator.getOperatorName() + "%"), new JoinProcessor(pGraphContext));
    opRules.put(new RuleRegExp("R3", MapJoinOperator.getOperatorName() + "%"), new JoinProcessor(pGraphContext));
    TableAccessCtx tableAccessCtx = new TableAccessCtx();
    Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, tableAccessCtx);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes and walk!
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pGraphContext.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return tableAccessCtx.getTableAccessInfo();
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) Rule(org.apache.hadoop.hive.ql.lib.Rule) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Example 34 with NodeProcessor

use of org.apache.hadoop.hive.ql.lib.NodeProcessor in project hive by apache.

the class IndexWhereTaskDispatcher method createOperatorRules.

/**
   * Create a set of rules that only matches WHERE predicates on columns we have
   * an index on.
   * @return
   */
private Map<Rule, NodeProcessor> createOperatorRules(ParseContext pctx) throws SemanticException {
    Map<Rule, NodeProcessor> operatorRules = new LinkedHashMap<Rule, NodeProcessor>();
    List<String> supportedIndexes = new ArrayList<String>();
    supportedIndexes.add(CompactIndexHandler.class.getName());
    supportedIndexes.add(BitmapIndexHandler.class.getName());
    // query the metastore to know what columns we have indexed
    Map<TableScanOperator, List<Index>> indexes = new HashMap<TableScanOperator, List<Index>>();
    for (Operator<? extends OperatorDesc> op : pctx.getTopOps().values()) {
        if (op instanceof TableScanOperator) {
            List<Index> tblIndexes = IndexUtils.getIndexes(((TableScanOperator) op).getConf().getTableMetadata(), supportedIndexes);
            if (tblIndexes.size() > 0) {
                indexes.put((TableScanOperator) op, tblIndexes);
            }
        }
    }
    // quit if our tables don't have any indexes
    if (indexes.size() == 0) {
        return null;
    }
    // We set the pushed predicate from the WHERE clause as the filter expr on
    // all table scan operators, so we look for table scan operators(TS%)
    operatorRules.put(new RuleRegExp("RULEWhere", TableScanOperator.getOperatorName() + "%"), new IndexWhereProcessor(indexes));
    return operatorRules;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) CompactIndexHandler(org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler) Index(org.apache.hadoop.hive.metastore.api.Index) LinkedHashMap(java.util.LinkedHashMap) BitmapIndexHandler(org.apache.hadoop.hive.ql.index.bitmap.BitmapIndexHandler) ArrayList(java.util.ArrayList) List(java.util.List) Rule(org.apache.hadoop.hive.ql.lib.Rule)

Example 35 with NodeProcessor

use of org.apache.hadoop.hive.ql.lib.NodeProcessor in project hive by apache.

the class IndexWhereTaskDispatcher method dispatch.

@Override
public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException {
    Task<? extends Serializable> task = (Task<? extends Serializable>) nd;
    ParseContext pctx = physicalContext.getParseContext();
    // create the regex's so the walker can recognize our WHERE queries
    Map<Rule, NodeProcessor> operatorRules = createOperatorRules(pctx);
    // check for no indexes on any table
    if (operatorRules == null) {
        return null;
    }
    // create context so the walker can carry the current task with it.
    IndexWhereProcCtx indexWhereOptimizeCtx = new IndexWhereProcCtx(task, pctx);
    // create the dispatcher, which fires the processor according to the rule that
    // best matches
    Dispatcher dispatcher = new DefaultRuleDispatcher(getDefaultProcessor(), operatorRules, indexWhereOptimizeCtx);
    // walk the mapper operator(not task) tree for each specific task
    GraphWalker ogw = new DefaultGraphWalker(dispatcher);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    if (task.getWork() instanceof MapredWork) {
        topNodes.addAll(((MapredWork) task.getWork()).getMapWork().getAliasToWork().values());
    } else {
        return null;
    }
    ogw.startWalking(topNodes, null);
    return null;
}
Also used : Task(org.apache.hadoop.hive.ql.exec.Task) Serializable(java.io.Serializable) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Rule(org.apache.hadoop.hive.ql.lib.Rule) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Aggregations

NodeProcessor (org.apache.hadoop.hive.ql.lib.NodeProcessor)72 Node (org.apache.hadoop.hive.ql.lib.Node)71 Rule (org.apache.hadoop.hive.ql.lib.Rule)71 LinkedHashMap (java.util.LinkedHashMap)69 GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)69 ArrayList (java.util.ArrayList)68 Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)68 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)67 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)62 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)46 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)11 TypeRule (org.apache.hadoop.hive.ql.lib.TypeRule)11 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)10 HashMap (java.util.HashMap)9 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)8 Operator (org.apache.hadoop.hive.ql.exec.Operator)8 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)8 ForwardWalker (org.apache.hadoop.hive.ql.lib.ForwardWalker)8 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)6 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)6