Search in sources :

Example 76 with DefaultRuleDispatcher

use of org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher in project hive by apache.

the class PcrExprProcFactory method walkExprTree.

/**
 * Remove partition conditions when necessary from the the expression tree.
 *
 * @param tabAlias
 *          the table alias
 * @param parts
 *          the list of all pruned partitions for the table
 * @param vcs
 *          virtual columns referenced
 * @param pred
 *          expression tree of the target filter operator
 * @return the node information of the root expression
 * @throws SemanticException
 */
public static NodeInfoWrapper walkExprTree(String tabAlias, ArrayList<Partition> parts, List<VirtualColumn> vcs, ExprNodeDesc pred) throws SemanticException {
    // Create the walker, the rules dispatcher and the context.
    PcrExprProcCtx pprCtx = new PcrExprProcCtx(tabAlias, parts, vcs);
    Map<SemanticRule, SemanticNodeProcessor> exprRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    exprRules.put(new RuleRegExp("R1", ExprNodeColumnDesc.class.getName() + "%"), getColumnProcessor());
    exprRules.put(new RuleRegExp("R2", ExprNodeFieldDesc.class.getName() + "%"), getFieldProcessor());
    exprRules.put(new RuleRegExp("R5", ExprNodeGenericFuncDesc.class.getName() + "%"), getGenericFuncProcessor());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), exprRules, pprCtx);
    SemanticGraphWalker egw = new ExpressionWalker(disp);
    List<Node> startNodes = new ArrayList<Node>();
    startNodes.add(pred);
    HashMap<Node, Object> outputMap = new HashMap<Node, Object>();
    egw.startWalking(startNodes, outputMap);
    // Return the wrapper of the root node
    return (NodeInfoWrapper) outputMap.get(pred);
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) ExpressionWalker(org.apache.hadoop.hive.ql.lib.ExpressionWalker) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)

Example 77 with DefaultRuleDispatcher

use of org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher in project hive by apache.

the class BucketingSortingInferenceOptimizer method inferBucketingSorting.

/**
 * For each map reduce task, if it has a reducer, infer whether or not the final output of the
 * reducer is bucketed and/or sorted
 *
 * @param mapRedTasks
 * @throws SemanticException
 */
private void inferBucketingSorting(List<ExecDriver> mapRedTasks) throws SemanticException {
    for (ExecDriver mapRedTask : mapRedTasks) {
        // of the outputs of intermediate map reduce jobs.
        if (!mapRedTask.getWork().isFinalMapRed()) {
            continue;
        }
        if (mapRedTask.getWork().getReduceWork() == null) {
            continue;
        }
        Operator<? extends OperatorDesc> reducer = mapRedTask.getWork().getReduceWork().getReducer();
        // uses sampling, which means it's not bucketed
        boolean disableBucketing = mapRedTask.getWork().getMapWork().getSamplingType() > 0;
        BucketingSortingCtx bCtx = new BucketingSortingCtx(disableBucketing);
        // RuleRegExp rules are used to match operators anywhere in the tree
        // RuleExactMatch rules are used to specify exactly what the tree should look like
        // In particular, this guarantees that the first operator is the reducer
        // (and its parent(s) are ReduceSinkOperators) since it begins walking the tree from
        // the reducer.
        Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
        opRules.put(new RuleRegExp("R1", SelectOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getSelProc());
        // Matches only GroupByOperators which are reducers, rather than map group by operators,
        // or multi group by optimization specific operators
        opRules.put(new RuleExactMatch("R2", new String[] { GroupByOperator.getOperatorName() }), BucketingSortingOpProcFactory.getGroupByProc());
        // Matches only JoinOperators which are reducers, rather than map joins, SMB map joins, etc.
        opRules.put(new RuleExactMatch("R3", new String[] { JoinOperator.getOperatorName() }), BucketingSortingOpProcFactory.getJoinProc());
        opRules.put(new RuleRegExp("R5", FileSinkOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getFileSinkProc());
        opRules.put(new RuleRegExp("R7", FilterOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getFilterProc());
        opRules.put(new RuleRegExp("R8", LimitOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getLimitProc());
        opRules.put(new RuleRegExp("R9", LateralViewForwardOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getLateralViewForwardProc());
        opRules.put(new RuleRegExp("R10", LateralViewJoinOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getLateralViewJoinProc());
        // Matches only ForwardOperators which are preceded by some other operator in the tree,
        // in particular it can't be a reducer (and hence cannot be one of the ForwardOperators
        // added by the multi group by optimization)
        opRules.put(new RuleRegExp("R11", ".+" + ForwardOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getForwardProc());
        // Matches only ForwardOperators which are reducers and are followed by GroupByOperators
        // (specific to the multi group by optimization)
        opRules.put(new RuleExactMatch("R12", new String[] { ForwardOperator.getOperatorName(), GroupByOperator.getOperatorName() }), BucketingSortingOpProcFactory.getMultiGroupByProc());
        // The dispatcher fires the processor corresponding to the closest matching rule and passes
        // the context along
        SemanticDispatcher disp = new DefaultRuleDispatcher(BucketingSortingOpProcFactory.getDefaultProc(), opRules, bCtx);
        SemanticGraphWalker ogw = new PreOrderWalker(disp);
        // Create a list of topop nodes
        ArrayList<Node> topNodes = new ArrayList<Node>();
        topNodes.add(reducer);
        ogw.startWalking(topNodes, null);
        mapRedTask.getWork().getMapWork().getBucketedColsByDirectory().putAll(bCtx.getBucketedColsByDirectory());
        mapRedTask.getWork().getMapWork().getSortedColsByDirectory().putAll(bCtx.getSortedColsByDirectory());
    }
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) RuleExactMatch(org.apache.hadoop.hive.ql.lib.RuleExactMatch) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) PreOrderWalker(org.apache.hadoop.hive.ql.lib.PreOrderWalker)

Example 78 with DefaultRuleDispatcher

use of org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher in project hive by apache.

the class Generator method transform.

/* (non-Javadoc)
   * @see org.apache.hadoop.hive.ql.optimizer.Transform#transform(org.apache.hadoop.hive.ql.parse.ParseContext)
   */
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    if (hooks != null && hooks.contains(ATLAS_HOOK_CLASSNAME)) {
        // Atlas would be interested in lineage information for insert,load,create etc.
        if (!pctx.getQueryProperties().isCTAS() && !pctx.getQueryProperties().isMaterializedView() && pctx.getQueryProperties().isQuery() && pctx.getCreateTable() == null && pctx.getCreateViewDesc() == null && (pctx.getLoadTableWork() == null || pctx.getLoadTableWork().isEmpty())) {
            LOG.debug("Not evaluating lineage");
            return pctx;
        }
    }
    Index index = pctx.getQueryState().getLineageState().getIndex();
    if (index == null) {
        index = new Index();
    }
    long sTime = System.currentTimeMillis();
    // Create the lineage context
    LineageCtx lCtx = new LineageCtx(pctx, index);
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%"), OpProcFactory.getTSProc());
    opRules.put(new RuleRegExp("R2", ScriptOperator.getOperatorName() + "%"), OpProcFactory.getTransformProc());
    opRules.put(new RuleRegExp("R3", UDTFOperator.getOperatorName() + "%"), OpProcFactory.getTransformProc());
    opRules.put(new RuleRegExp("R4", SelectOperator.getOperatorName() + "%"), OpProcFactory.getSelProc());
    opRules.put(new RuleRegExp("R5", GroupByOperator.getOperatorName() + "%"), OpProcFactory.getGroupByProc());
    opRules.put(new RuleRegExp("R6", UnionOperator.getOperatorName() + "%"), OpProcFactory.getUnionProc());
    opRules.put(new RuleRegExp("R7", CommonJoinOperator.getOperatorName() + "%|" + MapJoinOperator.getOperatorName() + "%"), OpProcFactory.getJoinProc());
    opRules.put(new RuleRegExp("R8", ReduceSinkOperator.getOperatorName() + "%"), OpProcFactory.getReduceSinkProc());
    opRules.put(new RuleRegExp("R9", LateralViewJoinOperator.getOperatorName() + "%"), OpProcFactory.getLateralViewJoinProc());
    opRules.put(new RuleRegExp("R10", PTFOperator.getOperatorName() + "%"), OpProcFactory.getTransformProc());
    opRules.put(new RuleRegExp("R11", FilterOperator.getOperatorName() + "%"), OpProcFactory.getFilterProc());
    // The dispatcher fires the processor corresponding to the closest matching rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(), opRules, lCtx);
    SemanticGraphWalker ogw = new LevelOrderWalker(disp, 2);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    LOG.debug("Time taken for lineage transform={}", (System.currentTimeMillis() - sTime));
    return pctx;
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) Index(org.apache.hadoop.hive.ql.optimizer.lineage.LineageCtx.Index) LinkedHashMap(java.util.LinkedHashMap) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) LevelOrderWalker(org.apache.hadoop.hive.ql.lib.LevelOrderWalker)

Example 79 with DefaultRuleDispatcher

use of org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher in project hive by apache.

the class ColumnPruner method transform.

/**
 * Transform the query tree. For each table under consideration, check if all
 * columns are needed. If not, only select the operators needed at the
 * beginning and proceed.
 *
 * @param pactx
 *          the current parse context
 */
@Override
public ParseContext transform(ParseContext pactx) throws SemanticException {
    pGraphContext = pactx;
    // generate pruned column list for all relevant operators
    ColumnPrunerProcCtx cppCtx = new ColumnPrunerProcCtx(pactx);
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack. The dispatcher
    // generates the plan from the operator tree
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getFilterProc());
    opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getGroupByProc());
    opRules.put(new RuleRegExp("R3", ReduceSinkOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getReduceSinkProc());
    opRules.put(new RuleRegExp("R4", SelectOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getSelectProc());
    opRules.put(new RuleRegExp("R5", CommonJoinOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getJoinProc());
    opRules.put(new RuleRegExp("R6", MapJoinOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getMapJoinProc());
    opRules.put(new RuleRegExp("R7", TableScanOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getTableScanProc());
    opRules.put(new RuleRegExp("R8", LateralViewJoinOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getLateralViewJoinProc());
    opRules.put(new RuleRegExp("R9", LateralViewForwardOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getLateralViewForwardProc());
    opRules.put(new RuleRegExp("R10", PTFOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getPTFProc());
    opRules.put(new RuleRegExp("R11", ScriptOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getScriptProc());
    opRules.put(new RuleRegExp("R12", LimitOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getLimitProc());
    opRules.put(new RuleRegExp("R13", UnionOperator.getOperatorName() + "%"), ColumnPrunerProcFactory.getUnionProc());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(ColumnPrunerProcFactory.getDefaultProc(), opRules, cppCtx);
    SemanticGraphWalker ogw = new ColumnPrunerWalker(disp);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pGraphContext.getTopOps().values());
    ogw.startWalking(topNodes, null);
    // set it back so that column pruner in the optimizer will not do the
    // view column authorization again even if it is triggered again.
    pGraphContext.setNeedViewColumnAuthorization(false);
    return pGraphContext;
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)

Example 80 with DefaultRuleDispatcher

use of org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher in project hive by apache.

the class GroupByOptimizer method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    HiveConf conf = pctx.getConf();
    if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
        // process group-by pattern
        opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + GroupByOperator.getOperatorName() + "%"), getMapSortedGroupbyProc(pctx));
    } else {
        // If hive.groupby.skewindata is set to true, the operator tree is as below
        opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + GroupByOperator.getOperatorName() + "%"), getMapSortedGroupbySkewProc(pctx));
    }
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, new GroupByOptimizerContext(conf));
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return pctx;
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) HiveConf(org.apache.hadoop.hive.conf.HiveConf)

Aggregations

DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)101 Node (org.apache.hadoop.hive.ql.lib.Node)98 ArrayList (java.util.ArrayList)92 LinkedHashMap (java.util.LinkedHashMap)82 SemanticGraphWalker (org.apache.hadoop.hive.ql.lib.SemanticGraphWalker)78 SemanticDispatcher (org.apache.hadoop.hive.ql.lib.SemanticDispatcher)77 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)71 SemanticNodeProcessor (org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)63 SemanticRule (org.apache.hadoop.hive.ql.lib.SemanticRule)63 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)59 HashMap (java.util.HashMap)25 GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)22 Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)21 NodeProcessor (org.apache.hadoop.hive.ql.lib.NodeProcessor)21 Rule (org.apache.hadoop.hive.ql.lib.Rule)21 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)17 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)16 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)16 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)15 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)14