Search in sources :

Example 66 with GraphWalker

use of org.apache.hadoop.hive.ql.lib.GraphWalker in project hive by apache.

the class CorrelationOptimizer method transform.

/**
   * Detect correlations and transform the query tree.
   *
   * @param pactx
   *          current parse context
   * @throws SemanticException
   */
public ParseContext transform(ParseContext pctx) throws SemanticException {
    pCtx = pctx;
    if (HiveConf.getBoolVar(pCtx.getConf(), HiveConf.ConfVars.HIVECONVERTJOIN)) {
        findPossibleAutoConvertedJoinOperators();
    }
    // detect correlations
    CorrelationNodeProcCtx corrCtx = new CorrelationNodeProcCtx(pCtx);
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("R1", ReduceSinkOperator.getOperatorName() + "%"), new CorrelationNodeProc());
    Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, corrCtx);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topOp nodes
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pCtx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    // We have finished tree walking (correlation detection).
    // We will first see if we need to abort (the operator tree has not been changed).
    // If not, we will start to transform the operator tree.
    abort = corrCtx.isAbort();
    if (abort) {
        LOG.info("Abort. Reasons are ...");
        for (String reason : corrCtx.getAbortReasons()) {
            LOG.info("-- " + reason);
        }
    } else {
        // transform the operator tree
        LOG.info("Begain query plan transformation based on intra-query correlations. " + corrCtx.getCorrelations().size() + " correlation(s) to be applied");
        for (IntraQueryCorrelation correlation : corrCtx.getCorrelations()) {
            QueryPlanTreeTransformation.applyCorrelation(pCtx, corrCtx, correlation);
        }
    }
    return pCtx;
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) CommonJoinTaskDispatcher(org.apache.hadoop.hive.ql.optimizer.physical.CommonJoinTaskDispatcher) LinkedHashMap(java.util.LinkedHashMap) Rule(org.apache.hadoop.hive.ql.lib.Rule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)

Example 67 with GraphWalker

use of org.apache.hadoop.hive.ql.lib.GraphWalker in project hive by apache.

the class RewriteCanApplyCtx method populateRewriteVars.

/**
   * This method walks all the nodes starting from topOp TableScanOperator node
   * and invokes methods from {@link RewriteCanApplyProcFactory} for each of the rules
   * added to the opRules map. We use the {@link PreOrderOnceWalker} for a pre-order
   * traversal of the operator tree.
   *
   * The methods from {@link RewriteCanApplyProcFactory} set appropriate values in
   * {@link RewriteVars} enum.
   *
   * @param topOp
   * @throws SemanticException
   */
void populateRewriteVars(TableScanOperator topOp) throws SemanticException {
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    //^TS%[(SEL%)|(FIL%)]*GRY%[(FIL%)]*RS%[(FIL%)]*GRY%
    opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%[(" + SelectOperator.getOperatorName() + "%)|(" + FilterOperator.getOperatorName() + "%)]*" + GroupByOperator.getOperatorName() + "%[" + FilterOperator.getOperatorName() + "%]*" + ReduceSinkOperator.getOperatorName() + "%[" + FilterOperator.getOperatorName() + "%]*" + GroupByOperator.getOperatorName() + "%"), RewriteCanApplyProcFactory.canApplyOnTableScanOperator(topOp));
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, this);
    GraphWalker ogw = new PreOrderOnceWalker(disp);
    // Create a list of topop nodes
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.add(topOp);
    try {
        ogw.startWalking(topNodes, null);
    } catch (SemanticException e) {
        LOG.error("Exception in walking operator tree. Rewrite variables not populated");
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    }
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) PreOrderOnceWalker(org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker) Rule(org.apache.hadoop.hive.ql.lib.Rule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 68 with GraphWalker

use of org.apache.hadoop.hive.ql.lib.GraphWalker in project hive by apache.

the class LineageInfo method getLineageInfo.

/**
   * parses given query and gets the lineage info.
   *
   * @param query
   * @throws ParseException
   */
public void getLineageInfo(String query) throws ParseException, SemanticException {
    /*
     * Get the AST tree
     */
    ASTNode tree = ParseUtils.parse(query, null);
    while ((tree.getToken() == null) && (tree.getChildCount() > 0)) {
        tree = (ASTNode) tree.getChild(0);
    }
    /*
     * initialize Event Processor and dispatcher.
     */
    inputTableList.clear();
    OutputTableList.clear();
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack. The dispatcher
    // generates the plan from the operator tree
    Map<Rule, NodeProcessor> rules = new LinkedHashMap<Rule, NodeProcessor>();
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(this, rules, null);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(tree);
    ogw.startWalking(topNodes, null);
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) Node(org.apache.hadoop.hive.ql.lib.Node) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) ArrayList(java.util.ArrayList) Rule(org.apache.hadoop.hive.ql.lib.Rule) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) LinkedHashMap(java.util.LinkedHashMap)

Example 69 with GraphWalker

use of org.apache.hadoop.hive.ql.lib.GraphWalker in project hive by apache.

the class TableAccessAnalyzer method analyzeTableAccess.

public TableAccessInfo analyzeTableAccess() throws SemanticException {
    // Set up the rules for the graph walker for group by and join operators
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%"), new GroupByProcessor(pGraphContext));
    opRules.put(new RuleRegExp("R2", JoinOperator.getOperatorName() + "%"), new JoinProcessor(pGraphContext));
    opRules.put(new RuleRegExp("R3", MapJoinOperator.getOperatorName() + "%"), new JoinProcessor(pGraphContext));
    TableAccessCtx tableAccessCtx = new TableAccessCtx();
    Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, tableAccessCtx);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes and walk!
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pGraphContext.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return tableAccessCtx.getTableAccessInfo();
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) Rule(org.apache.hadoop.hive.ql.lib.Rule) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Example 70 with GraphWalker

use of org.apache.hadoop.hive.ql.lib.GraphWalker in project phoenix by apache.

the class IndexPredicateAnalyzer method analyzePredicate.

/**
     * Analyzes a predicate.
     *
     * @param predicate        predicate to be analyzed
     * @param searchConditions receives conditions produced by analysis
     * @return residual predicate which could not be translated to
     * searchConditions
     */
public ExprNodeDesc analyzePredicate(ExprNodeDesc predicate, final List<IndexSearchCondition> searchConditions) {
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    NodeProcessor nodeProcessor = new NodeProcessor() {

        @Override
        public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
            // a pure conjunction: reject OR, CASE, etc.
            for (Node ancestor : stack) {
                if (nd == ancestor) {
                    break;
                }
                if (!FunctionRegistry.isOpAnd((ExprNodeDesc) ancestor)) {
                    return nd;
                }
            }
            return analyzeExpr((ExprNodeGenericFuncDesc) nd, searchConditions, nodeOutputs);
        }
    };
    Dispatcher disp = new DefaultRuleDispatcher(nodeProcessor, opRules, null);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(predicate);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    try {
        ogw.startWalking(topNodes, nodeOutput);
    } catch (SemanticException ex) {
        throw new RuntimeException(ex);
    }
    ExprNodeDesc residualPredicate = (ExprNodeDesc) nodeOutput.get(predicate);
    return residualPredicate;
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Node(org.apache.hadoop.hive.ql.lib.Node) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) Stack(java.util.Stack) NodeProcessorCtx(org.apache.hadoop.hive.ql.lib.NodeProcessorCtx) Rule(org.apache.hadoop.hive.ql.lib.Rule) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)70 Node (org.apache.hadoop.hive.ql.lib.Node)70 Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)68 ArrayList (java.util.ArrayList)67 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)65 NodeProcessor (org.apache.hadoop.hive.ql.lib.NodeProcessor)57 Rule (org.apache.hadoop.hive.ql.lib.Rule)57 LinkedHashMap (java.util.LinkedHashMap)56 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)56 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)49 HashMap (java.util.HashMap)16 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)12 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)11 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)11 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)11 TypeRule (org.apache.hadoop.hive.ql.lib.TypeRule)10 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)7 GenericUDFOPEqualOrGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)7 Test (org.junit.Test)7 List (java.util.List)6