use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.
the class AccurateEstimatesCheckerHook method postAnalyze.
@Override
public void postAnalyze(HiveSemanticAnalyzerHookContext context, List<Task<?>> rootTasks) throws SemanticException {
HiveSemanticAnalyzerHookContext hookContext = context;
HiveConf conf = (HiveConf) hookContext.getConf();
absErr = conf.getDouble("accurate.estimate.checker.absolute.error", 3.0);
relErr = conf.getDouble("accurate.estimate.checker.relative.error", .1);
List<Node> rootOps = Lists.newArrayList();
List<Task<?>> roots = rootTasks;
for (Task<?> task0 : roots) {
if (task0 instanceof ExplainTask) {
ExplainTask explainTask = (ExplainTask) task0;
ExplainWork w = explainTask.getWork();
List<Task<?>> explainRoots = w.getRootTasks();
for (Task<?> task : explainRoots) {
Object work = task.getWork();
if (work instanceof MapredWork) {
MapredWork mapredWork = (MapredWork) work;
MapWork mapWork = mapredWork.getMapWork();
if (mapWork != null) {
rootOps.addAll(mapWork.getAllRootOperators());
}
ReduceWork reduceWork = mapredWork.getReduceWork();
if (reduceWork != null) {
rootOps.addAll(reduceWork.getAllRootOperators());
}
}
if (work instanceof TezWork) {
for (BaseWork bw : ((TezWork) work).getAllWorkUnsorted()) {
rootOps.addAll(bw.getAllRootOperators());
}
}
}
}
}
if (rootOps.isEmpty()) {
return;
}
SemanticDispatcher disp = new DefaultRuleDispatcher(new EstimateCheckerHook(), new HashMap<>(), null);
SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
ogw.startWalking(rootOps, nodeOutput);
}
use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.
the class MapReduceCompiler method generateTaskTree.
@Override
protected void generateTaskTree(List<Task<?>> rootTasks, ParseContext pCtx, List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
// generate map reduce plans
ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
GenMRProcContext procCtx = new GenMRProcContext(conf, // Must be deterministic order map for consistent q-test output across Java versions
new LinkedHashMap<Operator<? extends OperatorDesc>, Task<?>>(), tempParseContext, mvTask, rootTasks, new LinkedHashMap<Operator<? extends OperatorDesc>, GenMapRedCtx>(), inputs, outputs);
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack.
// The dispatcher generates the plan from the operator tree
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp(new String("R1"), TableScanOperator.getOperatorName() + "%"), new GenMRTableScan1());
opRules.put(new RuleRegExp(new String("R2"), TableScanOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), new GenMRRedSink1());
opRules.put(new RuleRegExp(new String("R3"), ReduceSinkOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), new GenMRRedSink2());
opRules.put(new RuleRegExp(new String("R4"), FileSinkOperator.getOperatorName() + "%"), new GenMRFileSink1());
opRules.put(new RuleRegExp(new String("R5"), UnionOperator.getOperatorName() + "%"), new GenMRUnion1());
opRules.put(new RuleRegExp(new String("R6"), UnionOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), new GenMRRedSink3());
opRules.put(new RuleRegExp(new String("R7"), MapJoinOperator.getOperatorName() + "%"), MapJoinFactory.getTableScanMapJoin());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(new GenMROperator(), opRules, procCtx);
SemanticGraphWalker ogw = new GenMapRedWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pCtx.getTopOps().values());
ogw.startWalking(topNodes, null);
}
use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.
the class ReduceSinkDeDuplication method transform.
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
pGraphContext = pctx;
// generate pruned column list for all relevant operators
ReduceSinkDeduplicateProcCtx cppCtx = new ReduceSinkDeduplicateProcCtx(pGraphContext);
// for auto convert map-joins, it not safe to dedup in here (todo)
boolean mergeJoins = !pctx.getConf().getBoolVar(HIVECONVERTJOIN) && !pctx.getConf().getBoolVar(HIVECONVERTJOINNOCONDITIONALTASK) && !pctx.getConf().getBoolVar(ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ) && !pctx.getConf().getBoolVar(ConfVars.HIVEDYNAMICPARTITIONHASHJOIN);
// If multiple rules can be matched with same cost, last rule will be choosen as a processor
// see DefaultRuleDispatcher#dispatch()
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("R1", RS + "%.*%" + RS + "%"), ReduceSinkDeduplicateProcFactory.getReducerReducerProc());
opRules.put(new RuleRegExp("R2", RS + "%" + GBY + "%.*%" + RS + "%"), ReduceSinkDeduplicateProcFactory.getGroupbyReducerProc());
if (mergeJoins) {
opRules.put(new RuleRegExp("R3", JOIN + "%.*%" + RS + "%"), ReduceSinkDeduplicateProcFactory.getJoinReducerProc());
}
// TODO RS+JOIN
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(ReduceSinkDeduplicateProcFactory.getDefaultProc(), opRules, cppCtx);
SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pGraphContext.getTopOps().values());
ogw.startWalking(topNodes, null);
return pGraphContext;
}
use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.
the class LlapPreVectorizationPass method resolve.
@Override
public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
HiveConf conf = pctx.getConf();
LlapMode mode = LlapMode.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.LLAP_EXECUTION_MODE));
if (mode == none) {
LOG.info("LLAP disabled.");
return pctx;
}
SemanticDispatcher disp = new LlapPreVectorizationPassDispatcher(pctx);
SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getRootTasks());
ogw.startWalking(topNodes, null);
return pctx;
}
use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.
the class MetadataOnlyOptimizer method resolve.
@Override
public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%"), new TableScanProcessor());
opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + "%.*" + FileSinkOperator.getOperatorName() + "%"), new FileSinkProcessor());
SemanticDispatcher disp = new NullScanTaskDispatcher(pctx, opRules);
SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getRootTasks());
ogw.startWalking(topNodes, null);
return pctx;
}
Aggregations