use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.
the class TezCompiler method removeSemijoinOptimizationFromSMBJoins.
private static void removeSemijoinOptimizationFromSMBJoins(OptimizeTezProcContext procCtx) throws SemanticException {
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%" + ".*" + TezDummyStoreOperator.getOperatorName() + "%" + CommonMergeJoinOperator.getOperatorName() + "%"), new SMBJoinOpProc());
SMBJoinOpProcContext ctx = new SMBJoinOpProcContext();
// The dispatcher finds SMB and if there is semijoin optimization before it, removes it.
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(procCtx.parseContext.getTopOps().values());
SemanticGraphWalker ogw = new PreOrderOnceWalker(disp);
ogw.startWalking(topNodes, null);
List<TableScanOperator> tsOps = new ArrayList<>();
// Iterate over the map and remove semijoin optimizations if needed.
for (CommonMergeJoinOperator joinOp : ctx.JoinOpToTsOpMap.keySet()) {
// Get one top level TS Op directly from the stack
tsOps.add(ctx.JoinOpToTsOpMap.get(joinOp));
// Get the other one by examining Join Op
List<Operator<?>> parents = joinOp.getParentOperators();
for (Operator<?> parent : parents) {
if (parent instanceof TezDummyStoreOperator) {
// already accounted for
continue;
}
while (parent != null) {
if (parent instanceof TableScanOperator) {
tsOps.add((TableScanOperator) parent);
break;
}
parent = parent.getParentOperators().get(0);
}
}
}
// Now the relevant TableScanOperators are known, find if there exists
// a semijoin filter on any of them, if so, remove it.
ParseContext pctx = procCtx.parseContext;
Set<ReduceSinkOperator> rsSet = new HashSet<>(pctx.getRsToSemiJoinBranchInfo().keySet());
for (TableScanOperator ts : tsOps) {
for (ReduceSinkOperator rs : rsSet) {
SemiJoinBranchInfo sjInfo = pctx.getRsToSemiJoinBranchInfo().get(rs);
if (sjInfo != null && ts == sjInfo.getTsOp()) {
// match!
if (sjInfo.getIsHint()) {
throw new SemanticException("Removing hinted semijoin as it is with SMB join " + rs + " : " + ts);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Semijoin optimization found going to SMB join. Removing semijoin " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
}
GenTezUtils.removeBranch(rs);
GenTezUtils.removeSemiJoinOperator(pctx, rs, ts);
}
}
}
}
use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.
the class TezCompiler method removeRedundantSemijoinAndDpp.
private void removeRedundantSemijoinAndDpp(OptimizeTezProcContext procCtx) throws SemanticException {
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<>();
opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%"), new SemiJoinRemovalProc(false, true));
opRules.put(new RuleRegExp("R2", AppMasterEventOperator.getOperatorName() + "%"), new DynamicPruningRemovalRedundantProc());
// Gather
SemiJoinRemovalContext ctx = new SemiJoinRemovalContext(procCtx.parseContext);
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(procCtx.parseContext.getTopOps().values());
SemanticGraphWalker ogw = new PreOrderOnceWalker(disp);
ogw.startWalking(topNodes, null);
// Remove
for (Map.Entry<Operator<?>, TableScanOperator> p : ctx.opsToRemove.entrySet()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Removing redundant " + OperatorUtils.getOpNamePretty(p.getKey()) + " - " + OperatorUtils.getOpNamePretty(p.getValue()));
}
GenTezUtils.removeBranch(p.getKey());
if (p.getKey() instanceof AppMasterEventOperator) {
GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, (AppMasterEventOperator) p.getKey(), p.getValue());
} else if (p.getKey() instanceof ReduceSinkOperator) {
GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, (ReduceSinkOperator) p.getKey(), p.getValue());
} else {
throw new SemanticException("Unexpected error - type for branch could not be recognized");
}
}
}
use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.
the class MapReduceCompiler method generateTaskTree.
@Override
protected void generateTaskTree(List<Task<?>> rootTasks, ParseContext pCtx, List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
// generate map reduce plans
ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
GenMRProcContext procCtx = new GenMRProcContext(conf, // Must be deterministic order map for consistent q-test output across Java versions
new LinkedHashMap<Operator<? extends OperatorDesc>, Task<?>>(), tempParseContext, mvTask, rootTasks, new LinkedHashMap<Operator<? extends OperatorDesc>, GenMapRedCtx>(), inputs, outputs);
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack.
// The dispatcher generates the plan from the operator tree
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp(new String("R1"), TableScanOperator.getOperatorName() + "%"), new GenMRTableScan1());
opRules.put(new RuleRegExp(new String("R2"), TableScanOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), new GenMRRedSink1());
opRules.put(new RuleRegExp(new String("R3"), ReduceSinkOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), new GenMRRedSink2());
opRules.put(new RuleRegExp(new String("R4"), FileSinkOperator.getOperatorName() + "%"), new GenMRFileSink1());
opRules.put(new RuleRegExp(new String("R5"), UnionOperator.getOperatorName() + "%"), new GenMRUnion1());
opRules.put(new RuleRegExp(new String("R6"), UnionOperator.getOperatorName() + "%.*" + ReduceSinkOperator.getOperatorName() + "%"), new GenMRRedSink3());
opRules.put(new RuleRegExp(new String("R7"), MapJoinOperator.getOperatorName() + "%"), MapJoinFactory.getTableScanMapJoin());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(new GenMROperator(), opRules, procCtx);
SemanticGraphWalker ogw = new GenMapRedWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pCtx.getTopOps().values());
ogw.startWalking(topNodes, null);
}
use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.
the class ReduceSinkDeDuplication method transform.
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
pGraphContext = pctx;
// generate pruned column list for all relevant operators
ReduceSinkDeduplicateProcCtx cppCtx = new ReduceSinkDeduplicateProcCtx(pGraphContext);
// for auto convert map-joins, it not safe to dedup in here (todo)
boolean mergeJoins = !pctx.getConf().getBoolVar(HIVECONVERTJOIN) && !pctx.getConf().getBoolVar(HIVECONVERTJOINNOCONDITIONALTASK) && !pctx.getConf().getBoolVar(ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ) && !pctx.getConf().getBoolVar(ConfVars.HIVEDYNAMICPARTITIONHASHJOIN);
// If multiple rules can be matched with same cost, last rule will be choosen as a processor
// see DefaultRuleDispatcher#dispatch()
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("R1", RS + "%.*%" + RS + "%"), ReduceSinkDeduplicateProcFactory.getReducerReducerProc());
opRules.put(new RuleRegExp("R2", RS + "%" + GBY + "%.*%" + RS + "%"), ReduceSinkDeduplicateProcFactory.getGroupbyReducerProc());
if (mergeJoins) {
opRules.put(new RuleRegExp("R3", JOIN + "%.*%" + RS + "%"), ReduceSinkDeduplicateProcFactory.getJoinReducerProc());
}
// TODO RS+JOIN
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(ReduceSinkDeduplicateProcFactory.getDefaultProc(), opRules, cppCtx);
SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pGraphContext.getTopOps().values());
ogw.startWalking(topNodes, null);
return pGraphContext;
}
use of org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor in project hive by apache.
the class MetadataOnlyOptimizer method resolve.
@Override
public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%"), new TableScanProcessor());
opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + "%.*" + FileSinkOperator.getOperatorName() + "%"), new FileSinkProcessor());
SemanticDispatcher disp = new NullScanTaskDispatcher(pctx, opRules);
SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getRootTasks());
ogw.startWalking(topNodes, null);
return pctx;
}
Aggregations