Search in sources :

Example 1 with SparkJoinOptimizer

use of org.apache.hadoop.hive.ql.optimizer.spark.SparkJoinOptimizer in project hive by apache.

the class SparkCompiler method runJoinOptimizations.

private void runJoinOptimizations(OptimizeSparkProcContext procCtx) throws SemanticException {
    ParseContext pCtx = procCtx.getParseContext();
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new TypeRule(JoinOperator.class), new SparkJoinOptimizer(pCtx));
    opRules.put(new TypeRule(MapJoinOperator.class), new SparkJoinHintOptimizer(pCtx));
    opRules.put(new RuleRegExp("Disabling Dynamic Partition Pruning By Size", SparkPartitionPruningSinkOperator.getOperatorName() + "%"), new SparkRemoveDynamicPruningBySize());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pCtx.getTopOps().values());
    ogw.startWalking(topNodes, null);
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SparkRemoveDynamicPruningBySize(org.apache.hadoop.hive.ql.optimizer.SparkRemoveDynamicPruningBySize) SparkJoinHintOptimizer(org.apache.hadoop.hive.ql.optimizer.spark.SparkJoinHintOptimizer) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) SparkJoinOptimizer(org.apache.hadoop.hive.ql.optimizer.spark.SparkJoinOptimizer) LinkedHashMap(java.util.LinkedHashMap) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Rule(org.apache.hadoop.hive.ql.lib.Rule) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule)

Aggregations

ArrayList (java.util.ArrayList)1 LinkedHashMap (java.util.LinkedHashMap)1 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)1 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)1 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)1 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)1 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)1 Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)1 GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)1 Node (org.apache.hadoop.hive.ql.lib.Node)1 NodeProcessor (org.apache.hadoop.hive.ql.lib.NodeProcessor)1 Rule (org.apache.hadoop.hive.ql.lib.Rule)1 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)1 TypeRule (org.apache.hadoop.hive.ql.lib.TypeRule)1 SparkRemoveDynamicPruningBySize (org.apache.hadoop.hive.ql.optimizer.SparkRemoveDynamicPruningBySize)1 SparkJoinHintOptimizer (org.apache.hadoop.hive.ql.optimizer.spark.SparkJoinHintOptimizer)1 SparkJoinOptimizer (org.apache.hadoop.hive.ql.optimizer.spark.SparkJoinOptimizer)1 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)1