Search in sources :

Example 31 with SemanticGraphWalker

use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.

the class AccurateEstimatesCheckerHook method postAnalyze.

@Override
public void postAnalyze(HiveSemanticAnalyzerHookContext context, List<Task<?>> rootTasks) throws SemanticException {
    HiveSemanticAnalyzerHookContext hookContext = context;
    HiveConf conf = (HiveConf) hookContext.getConf();
    absErr = conf.getDouble("accurate.estimate.checker.absolute.error", 3.0);
    relErr = conf.getDouble("accurate.estimate.checker.relative.error", .1);
    List<Node> rootOps = Lists.newArrayList();
    List<Task<?>> roots = rootTasks;
    for (Task<?> task0 : roots) {
        if (task0 instanceof ExplainTask) {
            ExplainTask explainTask = (ExplainTask) task0;
            ExplainWork w = explainTask.getWork();
            List<Task<?>> explainRoots = w.getRootTasks();
            for (Task<?> task : explainRoots) {
                Object work = task.getWork();
                if (work instanceof MapredWork) {
                    MapredWork mapredWork = (MapredWork) work;
                    MapWork mapWork = mapredWork.getMapWork();
                    if (mapWork != null) {
                        rootOps.addAll(mapWork.getAllRootOperators());
                    }
                    ReduceWork reduceWork = mapredWork.getReduceWork();
                    if (reduceWork != null) {
                        rootOps.addAll(reduceWork.getAllRootOperators());
                    }
                }
                if (work instanceof TezWork) {
                    for (BaseWork bw : ((TezWork) work).getAllWorkUnsorted()) {
                        rootOps.addAll(bw.getAllRootOperators());
                    }
                }
            }
        }
    }
    if (rootOps.isEmpty()) {
        return;
    }
    SemanticDispatcher disp = new DefaultRuleDispatcher(new EstimateCheckerHook(), new HashMap<>(), null);
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    ogw.startWalking(rootOps, nodeOutput);
}
Also used : HiveSemanticAnalyzerHookContext(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext) Task(org.apache.hadoop.hive.ql.exec.Task) ExplainTask(org.apache.hadoop.hive.ql.exec.ExplainTask) ExplainTask(org.apache.hadoop.hive.ql.exec.ExplainTask) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) HashMap(java.util.HashMap) Node(org.apache.hadoop.hive.ql.lib.Node) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) ExplainWork(org.apache.hadoop.hive.ql.plan.ExplainWork) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) HiveConf(org.apache.hadoop.hive.conf.HiveConf) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) TezWork(org.apache.hadoop.hive.ql.plan.TezWork)

Example 32 with SemanticGraphWalker

use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.

the class MapReduceCompiler method generateTaskTree.

@Override
protected void generateTaskTree(List<Task<?>> rootTasks, ParseContext pCtx, List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
    // generate map reduce plans
    ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
    GenMRProcContext procCtx = new GenMRProcContext(conf, // Must be deterministic order map for consistent q-test output across Java versions
    new LinkedHashMap<Operator<? extends OperatorDesc>, Task<?>>(), tempParseContext, mvTask, rootTasks, new LinkedHashMap<Operator<? extends OperatorDesc>, GenMapRedCtx>(), inputs, outputs);
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    // The dispatcher generates the plan from the operator tree
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp(new String("R1"), TableScanOperator.getOperatorName() + "%"), new GenMRTableScan1());
    opRules.put(new RuleRegExp(new String("R2"), TableScanOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), new GenMRRedSink1());
    opRules.put(new RuleRegExp(new String("R3"), ReduceSinkOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), new GenMRRedSink2());
    opRules.put(new RuleRegExp(new String("R4"), FileSinkOperator.getOperatorName() + "%"), new GenMRFileSink1());
    opRules.put(new RuleRegExp(new String("R5"), UnionOperator.getOperatorName() + "%"), new GenMRUnion1());
    opRules.put(new RuleRegExp(new String("R6"), UnionOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), new GenMRRedSink3());
    opRules.put(new RuleRegExp(new String("R7"), MapJoinOperator.getOperatorName() + "%"), MapJoinFactory.getTableScanMapJoin());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(new GenMROperator(), opRules, procCtx);
    SemanticGraphWalker ogw = new GenMapRedWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pCtx.getTopOps().values());
    ogw.startWalking(topNodes, null);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) GenMROperator(org.apache.hadoop.hive.ql.optimizer.GenMROperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) Task(org.apache.hadoop.hive.ql.exec.Task) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) GenMRTableScan1(org.apache.hadoop.hive.ql.optimizer.GenMRTableScan1) GenMapRedCtx(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx) GenMRProcContext(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext) GenMROperator(org.apache.hadoop.hive.ql.optimizer.GenMROperator) SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) GenMRUnion1(org.apache.hadoop.hive.ql.optimizer.GenMRUnion1) GenMRFileSink1(org.apache.hadoop.hive.ql.optimizer.GenMRFileSink1) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) GenMRRedSink2(org.apache.hadoop.hive.ql.optimizer.GenMRRedSink2) GenMRRedSink1(org.apache.hadoop.hive.ql.optimizer.GenMRRedSink1) GenMRRedSink3(org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 33 with SemanticGraphWalker

use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.

the class ReduceSinkDeDuplication method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    pGraphContext = pctx;
    // generate pruned column list for all relevant operators
    ReduceSinkDeduplicateProcCtx cppCtx = new ReduceSinkDeduplicateProcCtx(pGraphContext);
    // for auto convert map-joins, it not safe to dedup in here (todo)
    boolean mergeJoins = !pctx.getConf().getBoolVar(HIVECONVERTJOIN) && !pctx.getConf().getBoolVar(HIVECONVERTJOINNOCONDITIONALTASK) && !pctx.getConf().getBoolVar(ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ) && !pctx.getConf().getBoolVar(ConfVars.HIVEDYNAMICPARTITIONHASHJOIN);
    // If multiple rules can be matched with same cost, last rule will be choosen as a processor
    // see DefaultRuleDispatcher#dispatch()
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("R1", RS + "%.*%" + RS + "%"), ReduceSinkDeduplicateProcFactory.getReducerReducerProc());
    opRules.put(new RuleRegExp("R2", RS + "%" + GBY + "%.*%" + RS + "%"), ReduceSinkDeduplicateProcFactory.getGroupbyReducerProc());
    if (mergeJoins) {
        opRules.put(new RuleRegExp("R3", JOIN + "%.*%" + RS + "%"), ReduceSinkDeduplicateProcFactory.getJoinReducerProc());
    }
    // TODO RS+JOIN
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(ReduceSinkDeduplicateProcFactory.getDefaultProc(), opRules, cppCtx);
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pGraphContext.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return pGraphContext;
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)

Example 34 with SemanticGraphWalker

use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.

the class LlapPreVectorizationPass method resolve.

@Override
public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
    HiveConf conf = pctx.getConf();
    LlapMode mode = LlapMode.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.LLAP_EXECUTION_MODE));
    if (mode == none) {
        LOG.info("LLAP disabled.");
        return pctx;
    }
    SemanticDispatcher disp = new LlapPreVectorizationPassDispatcher(pctx);
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getRootTasks());
    ogw.startWalking(topNodes, null);
    return pctx;
}
Also used : DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) HiveConf(org.apache.hadoop.hive.conf.HiveConf) LlapMode(org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider.LlapMode)

Example 35 with SemanticGraphWalker

use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.

the class MetadataOnlyOptimizer method resolve.

@Override
public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%"), new TableScanProcessor());
    opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + "%.*" + FileSinkOperator.getOperatorName() + "%"), new FileSinkProcessor());
    SemanticDispatcher disp = new NullScanTaskDispatcher(pctx, opRules);
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getRootTasks());
    ogw.startWalking(topNodes, null);
    return pctx;
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)

Aggregations

SemanticGraphWalker (org.apache.hadoop.hive.ql.lib.SemanticGraphWalker)87 Node (org.apache.hadoop.hive.ql.lib.Node)84 SemanticDispatcher (org.apache.hadoop.hive.ql.lib.SemanticDispatcher)84 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)78 ArrayList (java.util.ArrayList)77 SemanticNodeProcessor (org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)66 LinkedHashMap (java.util.LinkedHashMap)65 SemanticRule (org.apache.hadoop.hive.ql.lib.SemanticRule)65 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)56 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)54 HashMap (java.util.HashMap)19 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)14 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)14 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)13 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)13 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)10 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)10 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)10 Test (org.junit.Test)10 List (java.util.List)9