use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.
the class NullScanTaskDispatcher method dispatch.
@Override
public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException {
Task<?> task = (Task<?>) nd;
// create a the context for walking operators
ParseContext parseContext = physicalContext.getParseContext();
WalkerCtx walkerCtx = new WalkerCtx();
List<MapWork> mapWorks = new ArrayList<MapWork>(task.getMapWork());
Collections.sort(mapWorks, new Comparator<MapWork>() {
@Override
public int compare(MapWork o1, MapWork o2) {
return o1.getName().compareTo(o2.getName());
}
});
for (MapWork mapWork : mapWorks) {
LOG.debug("Looking at: {}", mapWork.getName());
Collection<Operator<? extends OperatorDesc>> topOperators = mapWork.getAliasToWork().values();
if (topOperators.isEmpty()) {
LOG.debug("No top operators");
return null;
}
LOG.debug("Looking for table scans where optimization is applicable");
// The dispatcher fires the processor corresponding to the closest
// matching rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(null, rules, walkerCtx);
SemanticGraphWalker ogw = new PreOrderOnceWalker(disp);
// Create a list of topOp nodes
ArrayList<Node> topNodes = new ArrayList<>();
// Get the top Nodes for this task
Collection<TableScanOperator> topOps = parseContext.getTopOps().values();
for (Operator<? extends OperatorDesc> workOperator : topOperators) {
if (topOps.contains(workOperator)) {
topNodes.add(workOperator);
}
}
Operator<? extends OperatorDesc> reducer = task.getReducer(mapWork);
if (reducer != null) {
topNodes.add(reducer);
}
ogw.startWalking(topNodes, null);
int scanTableSize = walkerCtx.getMetadataOnlyTableScans().size();
LOG.debug("Found {} null table scans", scanTableSize);
if (scanTableSize > 0) {
processTableScans(mapWork, walkerCtx.getMetadataOnlyTableScans());
}
}
return null;
}
use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.
the class SparkCompiler method runSetReducerParallelism.
private void runSetReducerParallelism(OptimizeSparkProcContext procCtx) throws SemanticException {
ParseContext pCtx = procCtx.getParseContext();
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("Set parallelism - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), new SetSparkReducerParallelism(pCtx.getConf()));
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
SemanticGraphWalker ogw = new PreOrderWalker(disp);
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pCtx.getTopOps().values());
ogw.startWalking(topNodes, null);
}
use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.
the class SparkCompiler method runDynamicPartitionPruning.
private void runDynamicPartitionPruning(OptimizeSparkProcContext procCtx) throws SemanticException {
if (!conf.isSparkDPPAny()) {
return;
}
ParseContext parseContext = procCtx.getParseContext();
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp(new String("Dynamic Partition Pruning"), FilterOperator.getOperatorName() + "%"), new DynamicPartitionPruningOptimization());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
SemanticGraphWalker ogw = new ForwardWalker(disp);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(parseContext.getTopOps().values());
ogw.startWalking(topNodes, null);
}
use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.
the class SparkCompiler method runRemoveDynamicPruning.
private void runRemoveDynamicPruning(OptimizeSparkProcContext procCtx) throws SemanticException {
ParseContext pCtx = procCtx.getParseContext();
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new RuleRegExp("Disabling Dynamic Partition Pruning", SparkPartitionPruningSinkOperator.getOperatorName() + "%"), new SparkRemoveDynamicPruning());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pCtx.getTopOps().values());
ogw.startWalking(topNodes, null);
}
use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.
the class SparkCompiler method runJoinOptimizations.
private void runJoinOptimizations(OptimizeSparkProcContext procCtx) throws SemanticException {
ParseContext pCtx = procCtx.getParseContext();
Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
opRules.put(new TypeRule(JoinOperator.class), new SparkJoinOptimizer(pCtx));
opRules.put(new TypeRule(MapJoinOperator.class), new SparkJoinHintOptimizer(pCtx));
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pCtx.getTopOps().values());
ogw.startWalking(topNodes, null);
}
Aggregations