Search in sources :

Example 16 with ParseContext

use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.

the class NullScanTaskDispatcher method dispatch.

@Override
public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException {
    Task<?> task = (Task<?>) nd;
    // create a the context for walking operators
    ParseContext parseContext = physicalContext.getParseContext();
    WalkerCtx walkerCtx = new WalkerCtx();
    List<MapWork> mapWorks = new ArrayList<MapWork>(task.getMapWork());
    Collections.sort(mapWorks, new Comparator<MapWork>() {

        @Override
        public int compare(MapWork o1, MapWork o2) {
            return o1.getName().compareTo(o2.getName());
        }
    });
    for (MapWork mapWork : mapWorks) {
        LOG.debug("Looking at: {}", mapWork.getName());
        Collection<Operator<? extends OperatorDesc>> topOperators = mapWork.getAliasToWork().values();
        if (topOperators.isEmpty()) {
            LOG.debug("No top operators");
            return null;
        }
        LOG.debug("Looking for table scans where optimization is applicable");
        // The dispatcher fires the processor corresponding to the closest
        // matching rule and passes the context along
        SemanticDispatcher disp = new DefaultRuleDispatcher(null, rules, walkerCtx);
        SemanticGraphWalker ogw = new PreOrderOnceWalker(disp);
        // Create a list of topOp nodes
        ArrayList<Node> topNodes = new ArrayList<>();
        // Get the top Nodes for this task
        Collection<TableScanOperator> topOps = parseContext.getTopOps().values();
        for (Operator<? extends OperatorDesc> workOperator : topOperators) {
            if (topOps.contains(workOperator)) {
                topNodes.add(workOperator);
            }
        }
        Operator<? extends OperatorDesc> reducer = task.getReducer(mapWork);
        if (reducer != null) {
            topNodes.add(reducer);
        }
        ogw.startWalking(topNodes, null);
        int scanTableSize = walkerCtx.getMetadataOnlyTableScans().size();
        LOG.debug("Found {} null table scans", scanTableSize);
        if (scanTableSize > 0) {
            processTableScans(mapWork, walkerCtx.getMetadataOnlyTableScans());
        }
    }
    return null;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) Task(org.apache.hadoop.hive.ql.exec.Task) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) PreOrderOnceWalker(org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) WalkerCtx(org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer.WalkerCtx)

Example 17 with ParseContext

use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.

the class SparkCompiler method runSetReducerParallelism.

private void runSetReducerParallelism(OptimizeSparkProcContext procCtx) throws SemanticException {
    ParseContext pCtx = procCtx.getParseContext();
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("Set parallelism - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), new SetSparkReducerParallelism(pCtx.getConf()));
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    SemanticGraphWalker ogw = new PreOrderWalker(disp);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pCtx.getTopOps().values());
    ogw.startWalking(topNodes, null);
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) SetSparkReducerParallelism(org.apache.hadoop.hive.ql.optimizer.spark.SetSparkReducerParallelism) LinkedHashMap(java.util.LinkedHashMap) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) PreOrderWalker(org.apache.hadoop.hive.ql.lib.PreOrderWalker)

Example 18 with ParseContext

use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.

the class SparkCompiler method runDynamicPartitionPruning.

private void runDynamicPartitionPruning(OptimizeSparkProcContext procCtx) throws SemanticException {
    if (!conf.isSparkDPPAny()) {
        return;
    }
    ParseContext parseContext = procCtx.getParseContext();
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp(new String("Dynamic Partition Pruning"), FilterOperator.getOperatorName() + "%"), new DynamicPartitionPruningOptimization());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    SemanticGraphWalker ogw = new ForwardWalker(disp);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(parseContext.getTopOps().values());
    ogw.startWalking(topNodes, null);
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) ForwardWalker(org.apache.hadoop.hive.ql.lib.ForwardWalker) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) DynamicPartitionPruningOptimization(org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)

Example 19 with ParseContext

use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.

the class SparkCompiler method runRemoveDynamicPruning.

private void runRemoveDynamicPruning(OptimizeSparkProcContext procCtx) throws SemanticException {
    ParseContext pCtx = procCtx.getParseContext();
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("Disabling Dynamic Partition Pruning", SparkPartitionPruningSinkOperator.getOperatorName() + "%"), new SparkRemoveDynamicPruning());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pCtx.getTopOps().values());
    ogw.startWalking(topNodes, null);
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) SparkRemoveDynamicPruning(org.apache.hadoop.hive.ql.optimizer.SparkRemoveDynamicPruning) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)

Example 20 with ParseContext

use of org.apache.hadoop.hive.ql.parse.ParseContext in project hive by apache.

the class SparkCompiler method runJoinOptimizations.

private void runJoinOptimizations(OptimizeSparkProcContext procCtx) throws SemanticException {
    ParseContext pCtx = procCtx.getParseContext();
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new TypeRule(JoinOperator.class), new SparkJoinOptimizer(pCtx));
    opRules.put(new TypeRule(MapJoinOperator.class), new SparkJoinHintOptimizer(pCtx));
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pCtx.getTopOps().values());
    ogw.startWalking(topNodes, null);
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) SparkJoinHintOptimizer(org.apache.hadoop.hive.ql.optimizer.spark.SparkJoinHintOptimizer) SparkJoinOptimizer(org.apache.hadoop.hive.ql.optimizer.spark.SparkJoinOptimizer) LinkedHashMap(java.util.LinkedHashMap) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule)

Aggregations

ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)35 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)15 ArrayList (java.util.ArrayList)14 Path (org.apache.hadoop.fs.Path)12 Context (org.apache.hadoop.hive.ql.Context)12 Node (org.apache.hadoop.hive.ql.lib.Node)10 Operator (org.apache.hadoop.hive.ql.exec.Operator)8 LinkedHashMap (java.util.LinkedHashMap)7 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)7 Task (org.apache.hadoop.hive.ql.exec.Task)7 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)7 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)7 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)7 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)7 SemanticDispatcher (org.apache.hadoop.hive.ql.lib.SemanticDispatcher)6 SemanticGraphWalker (org.apache.hadoop.hive.ql.lib.SemanticGraphWalker)6 Partition (org.apache.hadoop.hive.ql.metadata.Partition)6 Table (org.apache.hadoop.hive.ql.metadata.Table)6 PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)6 FileSinkDesc (org.apache.hadoop.hive.ql.plan.FileSinkDesc)6