Search in sources :

Example 1 with SplitSparkWorkResolver

use of org.apache.hadoop.hive.ql.optimizer.spark.SplitSparkWorkResolver in project hive by apache.

the class SparkCompiler method optimizeTaskPlan.

@Override
protected void optimizeTaskPlan(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, Context ctx) throws SemanticException {
    PERF_LOGGER.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE);
    PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask());
    physicalCtx = new SplitSparkWorkResolver().resolve(physicalCtx);
    if (conf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) {
        (new SparkSkewJoinResolver()).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping runtime skew join optimization");
    }
    physicalCtx = new SparkMapJoinResolver().resolve(physicalCtx);
    if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) {
        physicalCtx = new NullScanOptimizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping null scan query optimization");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) {
        physicalCtx = new MetadataOnlyOptimizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping metadata only query optimization");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) {
        physicalCtx = new SparkCrossProductCheck().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping cross product analysis");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) && ctx.getExplainAnalyze() == null) {
        (new Vectorizer()).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping vectorization");
    }
    if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
        (new StageIDsRearranger()).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping stage id rearranger");
    }
    new CombineEquivalentWorkResolver().resolve(physicalCtx);
    if (physicalCtx.getContext().getExplainAnalyze() != null) {
        new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx);
    }
    PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE);
    return;
}
Also used : SparkMapJoinResolver(org.apache.hadoop.hive.ql.optimizer.physical.SparkMapJoinResolver) CombineEquivalentWorkResolver(org.apache.hadoop.hive.ql.optimizer.spark.CombineEquivalentWorkResolver) NullScanOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer) Vectorizer(org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer) SparkSkewJoinResolver(org.apache.hadoop.hive.ql.optimizer.spark.SparkSkewJoinResolver) SplitSparkWorkResolver(org.apache.hadoop.hive.ql.optimizer.spark.SplitSparkWorkResolver) AnnotateRunTimeStatsOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer) SparkCrossProductCheck(org.apache.hadoop.hive.ql.optimizer.physical.SparkCrossProductCheck) MetadataOnlyOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer) PhysicalContext(org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext) StageIDsRearranger(org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger)

Aggregations

AnnotateRunTimeStatsOptimizer (org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer)1 MetadataOnlyOptimizer (org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer)1 NullScanOptimizer (org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer)1 PhysicalContext (org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext)1 SparkCrossProductCheck (org.apache.hadoop.hive.ql.optimizer.physical.SparkCrossProductCheck)1 SparkMapJoinResolver (org.apache.hadoop.hive.ql.optimizer.physical.SparkMapJoinResolver)1 StageIDsRearranger (org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger)1 Vectorizer (org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer)1 CombineEquivalentWorkResolver (org.apache.hadoop.hive.ql.optimizer.spark.CombineEquivalentWorkResolver)1 SparkSkewJoinResolver (org.apache.hadoop.hive.ql.optimizer.spark.SparkSkewJoinResolver)1 SplitSparkWorkResolver (org.apache.hadoop.hive.ql.optimizer.spark.SplitSparkWorkResolver)1