Search in sources :

Example 31 with RuleRegExp

use of org.apache.hadoop.hive.ql.lib.RuleRegExp in project hive by apache.

the class SyntheticJoinPredicate method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    boolean enabled = false;
    String queryEngine = pctx.getConf().getVar(ConfVars.HIVE_EXECUTION_ENGINE);
    if (queryEngine.equals("tez") && pctx.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING)) {
        enabled = true;
    } else if ((queryEngine.equals("spark") && pctx.getConf().isSparkDPPAny())) {
        enabled = true;
    }
    if (!enabled) {
        return pctx;
    }
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("R1", "(" + TableScanOperator.getOperatorName() + "%" + ".*" + ReduceSinkOperator.getOperatorName() + "%" + JoinOperator.getOperatorName() + "%)"), new JoinSynthetic());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SyntheticContext context = new SyntheticContext(pctx);
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, context);
    GraphWalker ogw = new PreOrderOnceWalker(disp);
    // Create a list of top op nodes
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return pctx;
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) PreOrderOnceWalker(org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker) Rule(org.apache.hadoop.hive.ql.lib.Rule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Example 32 with RuleRegExp

use of org.apache.hadoop.hive.ql.lib.RuleRegExp in project hive by apache.

the class TableAccessAnalyzer method analyzeTableAccess.

public TableAccessInfo analyzeTableAccess() throws SemanticException {
    // Set up the rules for the graph walker for group by and join operators
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%"), new GroupByProcessor(pGraphContext));
    opRules.put(new RuleRegExp("R2", JoinOperator.getOperatorName() + "%"), new JoinProcessor(pGraphContext));
    opRules.put(new RuleRegExp("R3", MapJoinOperator.getOperatorName() + "%"), new JoinProcessor(pGraphContext));
    TableAccessCtx tableAccessCtx = new TableAccessCtx();
    Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, tableAccessCtx);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes and walk!
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pGraphContext.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return tableAccessCtx.getTableAccessInfo();
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) Rule(org.apache.hadoop.hive.ql.lib.Rule) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Example 33 with RuleRegExp

use of org.apache.hadoop.hive.ql.lib.RuleRegExp in project hive by apache.

the class IndexWhereTaskDispatcher method createOperatorRules.

/**
   * Create a set of rules that only matches WHERE predicates on columns we have
   * an index on.
   * @return
   */
private Map<Rule, NodeProcessor> createOperatorRules(ParseContext pctx) throws SemanticException {
    Map<Rule, NodeProcessor> operatorRules = new LinkedHashMap<Rule, NodeProcessor>();
    List<String> supportedIndexes = new ArrayList<String>();
    supportedIndexes.add(CompactIndexHandler.class.getName());
    supportedIndexes.add(BitmapIndexHandler.class.getName());
    // query the metastore to know what columns we have indexed
    Map<TableScanOperator, List<Index>> indexes = new HashMap<TableScanOperator, List<Index>>();
    for (Operator<? extends OperatorDesc> op : pctx.getTopOps().values()) {
        if (op instanceof TableScanOperator) {
            List<Index> tblIndexes = IndexUtils.getIndexes(((TableScanOperator) op).getConf().getTableMetadata(), supportedIndexes);
            if (tblIndexes.size() > 0) {
                indexes.put((TableScanOperator) op, tblIndexes);
            }
        }
    }
    // quit if our tables don't have any indexes
    if (indexes.size() == 0) {
        return null;
    }
    // We set the pushed predicate from the WHERE clause as the filter expr on
    // all table scan operators, so we look for table scan operators(TS%)
    operatorRules.put(new RuleRegExp("RULEWhere", TableScanOperator.getOperatorName() + "%"), new IndexWhereProcessor(indexes));
    return operatorRules;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) CompactIndexHandler(org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler) Index(org.apache.hadoop.hive.metastore.api.Index) LinkedHashMap(java.util.LinkedHashMap) BitmapIndexHandler(org.apache.hadoop.hive.ql.index.bitmap.BitmapIndexHandler) ArrayList(java.util.ArrayList) List(java.util.List) Rule(org.apache.hadoop.hive.ql.lib.Rule)

Example 34 with RuleRegExp

use of org.apache.hadoop.hive.ql.lib.RuleRegExp in project hive by apache.

the class AnnotateWithStatistics method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    AnnotateStatsProcCtx aspCtx = new AnnotateStatsProcCtx(pctx);
    // create a walker which walks the tree in a BFS manner while maintaining the
    // operator stack. The dispatcher generates the plan from the operator tree
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("TS", TableScanOperator.getOperatorName() + "%"), StatsRulesProcFactory.getTableScanRule());
    opRules.put(new RuleRegExp("SEL", SelectOperator.getOperatorName() + "%"), StatsRulesProcFactory.getSelectRule());
    opRules.put(new RuleRegExp("FIL", FilterOperator.getOperatorName() + "%"), StatsRulesProcFactory.getFilterRule());
    opRules.put(new RuleRegExp("GBY", GroupByOperator.getOperatorName() + "%"), StatsRulesProcFactory.getGroupByRule());
    opRules.put(new RuleRegExp("JOIN", CommonJoinOperator.getOperatorName() + "%|" + MapJoinOperator.getOperatorName() + "%"), StatsRulesProcFactory.getJoinRule());
    opRules.put(new RuleRegExp("LIM", LimitOperator.getOperatorName() + "%"), StatsRulesProcFactory.getLimitRule());
    opRules.put(new RuleRegExp("RS", ReduceSinkOperator.getOperatorName() + "%"), StatsRulesProcFactory.getReduceSinkRule());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(StatsRulesProcFactory.getDefaultRule(), opRules, aspCtx);
    GraphWalker ogw = new LevelOrderWalker(disp, 0);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return pctx;
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) Rule(org.apache.hadoop.hive.ql.lib.Rule) LevelOrderWalker(org.apache.hadoop.hive.ql.lib.LevelOrderWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Example 35 with RuleRegExp

use of org.apache.hadoop.hive.ql.lib.RuleRegExp in project hive by apache.

the class ExprProcFactory method getExprDependency.

/**
   * Gets the expression dependencies for the expression.
   *
   * @param lctx
   *          The lineage context containing the input operators dependencies.
   * @param inpOp
   *          The input operator to the current operator.
   * @param expr
   *          The expression that is being processed.
   * @throws SemanticException
   */
public static Dependency getExprDependency(LineageCtx lctx, Operator<? extends OperatorDesc> inpOp, ExprNodeDesc expr) throws SemanticException {
    // Create the walker, the rules dispatcher and the context.
    ExprProcCtx exprCtx = new ExprProcCtx(lctx, inpOp);
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack. The dispatcher
    // generates the plan from the operator tree
    Map<Rule, NodeProcessor> exprRules = new LinkedHashMap<Rule, NodeProcessor>();
    exprRules.put(new RuleRegExp("R1", ExprNodeColumnDesc.class.getName() + "%"), getColumnProcessor());
    exprRules.put(new RuleRegExp("R2", ExprNodeFieldDesc.class.getName() + "%"), getFieldProcessor());
    exprRules.put(new RuleRegExp("R3", ExprNodeGenericFuncDesc.class.getName() + "%"), getGenericFuncProcessor());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), exprRules, exprCtx);
    GraphWalker egw = new DefaultGraphWalker(disp);
    List<Node> startNodes = new ArrayList<Node>();
    startNodes.add(expr);
    HashMap<Node, Object> outputMap = new HashMap<Node, Object>();
    egw.startWalking(startNodes, outputMap);
    return (Dependency) outputMap.get(expr);
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Dependency(org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) Rule(org.apache.hadoop.hive.ql.lib.Rule) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Aggregations

NodeProcessor (org.apache.hadoop.hive.ql.lib.NodeProcessor)61 Rule (org.apache.hadoop.hive.ql.lib.Rule)61 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)61 LinkedHashMap (java.util.LinkedHashMap)60 Node (org.apache.hadoop.hive.ql.lib.Node)60 GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)59 ArrayList (java.util.ArrayList)58 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)58 Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)58 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)37 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)10 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)8 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)8 ForwardWalker (org.apache.hadoop.hive.ql.lib.ForwardWalker)8 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)7 Operator (org.apache.hadoop.hive.ql.exec.Operator)7 TypeRule (org.apache.hadoop.hive.ql.lib.TypeRule)7 HashMap (java.util.HashMap)6 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)6 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)6