Search in sources :

Example 31 with SemanticRule

use of org.apache.hadoop.hive.ql.lib.SemanticRule in project hive by apache.

the class SparkCompiler method runRemoveDynamicPruning.

private void runRemoveDynamicPruning(OptimizeSparkProcContext procCtx) throws SemanticException {
    ParseContext pCtx = procCtx.getParseContext();
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("Disabling Dynamic Partition Pruning", SparkPartitionPruningSinkOperator.getOperatorName() + "%"), new SparkRemoveDynamicPruning());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pCtx.getTopOps().values());
    ogw.startWalking(topNodes, null);
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) SparkRemoveDynamicPruning(org.apache.hadoop.hive.ql.optimizer.SparkRemoveDynamicPruning) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)

Example 32 with SemanticRule

use of org.apache.hadoop.hive.ql.lib.SemanticRule in project hive by apache.

the class SparkCompiler method runJoinOptimizations.

private void runJoinOptimizations(OptimizeSparkProcContext procCtx) throws SemanticException {
    ParseContext pCtx = procCtx.getParseContext();
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new TypeRule(JoinOperator.class), new SparkJoinOptimizer(pCtx));
    opRules.put(new TypeRule(MapJoinOperator.class), new SparkJoinHintOptimizer(pCtx));
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pCtx.getTopOps().values());
    ogw.startWalking(topNodes, null);
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) SparkJoinHintOptimizer(org.apache.hadoop.hive.ql.optimizer.spark.SparkJoinHintOptimizer) SparkJoinOptimizer(org.apache.hadoop.hive.ql.optimizer.spark.SparkJoinOptimizer) LinkedHashMap(java.util.LinkedHashMap) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) TypeRule(org.apache.hadoop.hive.ql.lib.TypeRule)

Example 33 with SemanticRule

use of org.apache.hadoop.hive.ql.lib.SemanticRule in project hive by apache.

the class SparkCompiler method generateTaskTree.

/**
 * TODO: need to turn on rules that's commented out and add more if necessary.
 */
@Override
protected void generateTaskTree(List<Task<?>> rootTasks, ParseContext pCtx, List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
    PERF_LOGGER.perfLogBegin(CLASS_NAME, PerfLogger.SPARK_GENERATE_TASK_TREE);
    GenSparkUtils utils = GenSparkUtils.getUtils();
    utils.resetSequenceNumber();
    ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
    GenSparkProcContext procCtx = new GenSparkProcContext(conf, tempParseContext, mvTask, rootTasks, inputs, outputs, pCtx.getTopOps());
    // -------------------------------- First Pass ---------------------------------- //
    // Identify SparkPartitionPruningSinkOperators, and break OP tree if necessary
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("Clone OP tree for PartitionPruningSink", SparkPartitionPruningSinkOperator.getOperatorName() + "%"), new SplitOpTreeForDPP());
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    SemanticGraphWalker ogw = new GenSparkWorkWalker(disp, procCtx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pCtx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    // -------------------------------- Second Pass ---------------------------------- //
    // Process operator tree in two steps: first we process the extra op trees generated
    // in the first pass. Then we process the main op tree, and the result task will depend
    // on the task generated in the first pass.
    topNodes.clear();
    topNodes.addAll(procCtx.topOps.values());
    generateTaskTreeHelper(procCtx, topNodes);
    // the partitions used.
    if (!procCtx.clonedPruningTableScanSet.isEmpty()) {
        SparkTask pruningTask = SparkUtilities.createSparkTask(conf);
        SparkTask mainTask = procCtx.currentTask;
        pruningTask.addDependentTask(procCtx.currentTask);
        procCtx.rootTasks.remove(procCtx.currentTask);
        procCtx.rootTasks.add(pruningTask);
        procCtx.currentTask = pruningTask;
        topNodes.clear();
        topNodes.addAll(procCtx.clonedPruningTableScanSet);
        generateTaskTreeHelper(procCtx, topNodes);
        procCtx.currentTask = mainTask;
    }
    // we need to clone some operator plans and remove union operators still
    for (BaseWork w : procCtx.workWithUnionOperators) {
        GenSparkUtils.getUtils().removeUnionOperators(procCtx, w);
    }
    // we need to fill MapWork with 'local' work and bucket information for SMB Join.
    GenSparkUtils.getUtils().annotateMapWork(procCtx);
    // finally make sure the file sink operators are set up right
    for (FileSinkOperator fileSink : procCtx.fileSinkSet) {
        GenSparkUtils.getUtils().processFileSink(procCtx, fileSink);
    }
    // Process partition pruning sinks
    for (Operator<?> prunerSink : procCtx.pruningSinkSet) {
        utils.processPartitionPruningSink(procCtx, (SparkPartitionPruningSinkOperator) prunerSink);
    }
    PERF_LOGGER.perfLogEnd(CLASS_NAME, PerfLogger.SPARK_GENERATE_TASK_TREE);
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Example 34 with SemanticRule

use of org.apache.hadoop.hive.ql.lib.SemanticRule in project hive by apache.

the class HiveOpConverterPostProc method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    // 0. We check the conditions to apply this transformation,
    // if we do not meet them we bail out
    final boolean cboEnabled = HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVE_CBO_ENABLED);
    final boolean returnPathEnabled = HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP);
    final boolean cboSucceeded = pctx.getContext().isCboSucceeded();
    if (!(cboEnabled && returnPathEnabled && cboSucceeded)) {
        return pctx;
    }
    // 1. Initialize aux data structures
    this.pctx = pctx;
    this.aliasToOpInfo = new HashMap<String, Operator<? extends OperatorDesc>>();
    // 2. Trigger transformation
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    opRules.put(new RuleRegExp("R1", JoinOperator.getOperatorName() + "%"), new JoinAnnotate());
    opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + "%"), new TableScanAnnotate());
    SemanticDispatcher disp = new DefaultRuleDispatcher(null, opRules, null);
    SemanticGraphWalker ogw = new ForwardWalker(disp);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return pctx;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) ForwardWalker(org.apache.hadoop.hive.ql.lib.ForwardWalker) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) LinkedHashMap(java.util.LinkedHashMap) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 35 with SemanticRule

use of org.apache.hadoop.hive.ql.lib.SemanticRule in project hive by apache.

the class AnnotateRunTimeStatsOptimizer method resolve.

@Override
public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
    Map<SemanticRule, SemanticNodeProcessor> opRules = new LinkedHashMap<SemanticRule, SemanticNodeProcessor>();
    SemanticDispatcher disp = new AnnotateRunTimeStatsDispatcher(pctx, opRules);
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getRootTasks());
    ogw.startWalking(topNodes, null);
    return pctx;
}
Also used : SemanticRule(org.apache.hadoop.hive.ql.lib.SemanticRule) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) ArrayList(java.util.ArrayList) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) LinkedHashMap(java.util.LinkedHashMap)

Aggregations

SemanticNodeProcessor (org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor)66 SemanticRule (org.apache.hadoop.hive.ql.lib.SemanticRule)66 LinkedHashMap (java.util.LinkedHashMap)65 SemanticGraphWalker (org.apache.hadoop.hive.ql.lib.SemanticGraphWalker)65 Node (org.apache.hadoop.hive.ql.lib.Node)64 SemanticDispatcher (org.apache.hadoop.hive.ql.lib.SemanticDispatcher)64 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)63 ArrayList (java.util.ArrayList)62 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)57 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)36 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)10 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)10 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)8 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)8 Operator (org.apache.hadoop.hive.ql.exec.Operator)8 ForwardWalker (org.apache.hadoop.hive.ql.lib.ForwardWalker)8 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)7 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)7 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)6 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)6