use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.
the class StatsOptimizer method transform.
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
if (pctx.getFetchTask() != null || !pctx.getQueryProperties().isQuery() || pctx.getQueryProperties().isAnalyzeRewrite() || pctx.getQueryProperties().isCTAS() || pctx.getLoadFileWork().size() > 1 || !pctx.getLoadTableWork().isEmpty() || // tables is being sampled and we can not optimize.
!pctx.getNameToSplitSample().isEmpty()) {
return pctx;
}
String TS = TableScanOperator.getOperatorName() + "%";
String GBY = GroupByOperator.getOperatorName() + "%";
String RS = ReduceSinkOperator.getOperatorName() + "%";
String SEL = SelectOperator.getOperatorName() + "%";
String FS = FileSinkOperator.getOperatorName() + "%";
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("R1", TS + SEL + GBY + RS + GBY + SEL + FS), new MetaDataProcessor(pctx));
opRules.put(new RuleRegExp("R2", TS + SEL + GBY + RS + GBY + FS), new MetaDataProcessor(pctx));
NodeProcessorCtx soProcCtx = new StatsOptimizerProcContext();
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, soProcCtx);
SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
return pctx;
}
use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.
the class SkewJoinOptimizer method transform.
/* (non-Javadoc)
* @see org.apache.hadoop.hive.ql.optimizer.Transform#transform
* (org.apache.hadoop.hive.ql.parse.ParseContext)
*/
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("R1", "TS%.*RS%JOIN%"), getSkewJoinProc(pctx));
SkewJoinOptProcCtx skewJoinOptProcCtx = new SkewJoinOptProcCtx(pctx);
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, skewJoinOptProcCtx);
SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
// Create a list of topop nodes
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
return pctx;
}
use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.
the class SortedMergeBucketMapJoinOptimizer method getListOfRejectedJoins.
private void getListOfRejectedJoins(ParseContext pctx, SortBucketJoinProcCtx smbJoinContext) throws SemanticException {
// Go through all joins - it should only contain selects and filters between
// tablescan and join operators.
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("R1", JoinOperator.getOperatorName() + "%"), getCheckCandidateJoin());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, smbJoinContext);
SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
}
use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.
the class TezCompiler method removeSemijoinOptimizationFromSMBJoins.
private static void removeSemijoinOptimizationFromSMBJoins(OptimizeTezProcContext procCtx) throws SemanticException {
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%" + ".*" + TezDummyStoreOperator.getOperatorName() + "%" + CommonMergeJoinOperator.getOperatorName() + "%"), new SMBJoinOpProc());
SMBJoinOpProcContext ctx = new SMBJoinOpProcContext();
// The dispatcher finds SMB and if there is semijoin optimization before it, removes it.
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(procCtx.parseContext.getTopOps().values());
SemanticGraphWalker ogw = new PreOrderOnceWalker(disp);
ogw.startWalking(topNodes, null);
List<TableScanOperator> tsOps = new ArrayList<>();
// Iterate over the map and remove semijoin optimizations if needed.
for (CommonMergeJoinOperator joinOp : ctx.JoinOpToTsOpMap.keySet()) {
// Get one top level TS Op directly from the stack
tsOps.add(ctx.JoinOpToTsOpMap.get(joinOp));
// Get the other one by examining Join Op
List<Operator<?>> parents = joinOp.getParentOperators();
for (Operator<?> parent : parents) {
if (parent instanceof TezDummyStoreOperator) {
// already accounted for
continue;
}
while (parent != null) {
if (parent instanceof TableScanOperator) {
tsOps.add((TableScanOperator) parent);
break;
}
parent = parent.getParentOperators().get(0);
}
}
}
// Now the relevant TableScanOperators are known, find if there exists
// a semijoin filter on any of them, if so, remove it.
ParseContext pctx = procCtx.parseContext;
Set<ReduceSinkOperator> rsSet = new HashSet<>(pctx.getRsToSemiJoinBranchInfo().keySet());
for (TableScanOperator ts : tsOps) {
for (ReduceSinkOperator rs : rsSet) {
SemiJoinBranchInfo sjInfo = pctx.getRsToSemiJoinBranchInfo().get(rs);
if (sjInfo != null && ts == sjInfo.getTsOp()) {
// match!
if (sjInfo.getIsHint()) {
throw new SemanticException("Removing hinted semijoin as it is with SMB join " + rs + " : " + ts);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Semijoin optimization found going to SMB join. Removing semijoin " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
}
GenTezUtils.removeBranch(rs);
GenTezUtils.removeSemiJoinOperator(pctx, rs, ts);
}
}
}
}
use of org.apache.hadoop.hive.ql.lib.SemanticGraphWalker in project hive by apache.
the class TezCompiler method removeRedundantSemijoinAndDpp.
private void removeRedundantSemijoinAndDpp(OptimizeTezProcContext procCtx) throws SemanticException {
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<>();
opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%"), new SemiJoinRemovalProc(false, true));
opRules.put(new RuleRegExp("R2", AppMasterEventOperator.getOperatorName() + "%"), new DynamicPruningRemovalRedundantProc());
// Gather
SemiJoinRemovalContext ctx = new SemiJoinRemovalContext(procCtx.parseContext);
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(procCtx.parseContext.getTopOps().values());
SemanticGraphWalker ogw = new PreOrderOnceWalker(disp);
ogw.startWalking(topNodes, null);
// Remove
for (Map.Entry<Operator<?>, TableScanOperator> p : ctx.opsToRemove.entrySet()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Removing redundant " + OperatorUtils.getOpNamePretty(p.getKey()) + " - " + OperatorUtils.getOpNamePretty(p.getValue()));
}
GenTezUtils.removeBranch(p.getKey());
if (p.getKey() instanceof AppMasterEventOperator) {
GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, (AppMasterEventOperator) p.getKey(), p.getValue());
} else if (p.getKey() instanceof ReduceSinkOperator) {
GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, (ReduceSinkOperator) p.getKey(), p.getValue());
} else {
throw new SemanticException("Unexpected error - type for branch could not be recognized");
}
}
}
Aggregations