Search in sources :

Example 1 with SparkCrossProductCheck

use of org.apache.hadoop.hive.ql.optimizer.physical.SparkCrossProductCheck in project hive by apache.

the class SparkCompiler method optimizeTaskPlan.

@Override
protected void optimizeTaskPlan(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, Context ctx) throws SemanticException {
    PERF_LOGGER.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE);
    PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask());
    physicalCtx = new SplitSparkWorkResolver().resolve(physicalCtx);
    if (conf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) {
        (new SparkSkewJoinResolver()).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping runtime skew join optimization");
    }
    physicalCtx = new SparkMapJoinResolver().resolve(physicalCtx);
    if (conf.isSparkDPPAny()) {
        physicalCtx = new SparkDynamicPartitionPruningResolver().resolve(physicalCtx);
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) {
        physicalCtx = new NullScanOptimizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping null scan query optimization");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) {
        physicalCtx = new MetadataOnlyOptimizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping metadata only query optimization");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) {
        physicalCtx = new SparkCrossProductCheck().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping cross product analysis");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
        (new Vectorizer()).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping vectorization");
    }
    if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
        (new StageIDsRearranger()).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping stage id rearranger");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_COMBINE_EQUIVALENT_WORK_OPTIMIZATION)) {
        new CombineEquivalentWorkResolver().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping combine equivalent work optimization");
    }
    if (physicalCtx.getContext().getExplainAnalyze() != null) {
        new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx);
    }
    PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE);
    return;
}
Also used : SparkMapJoinResolver(org.apache.hadoop.hive.ql.optimizer.physical.SparkMapJoinResolver) CombineEquivalentWorkResolver(org.apache.hadoop.hive.ql.optimizer.spark.CombineEquivalentWorkResolver) NullScanOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer) Vectorizer(org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer) SparkSkewJoinResolver(org.apache.hadoop.hive.ql.optimizer.spark.SparkSkewJoinResolver) SplitSparkWorkResolver(org.apache.hadoop.hive.ql.optimizer.spark.SplitSparkWorkResolver) AnnotateRunTimeStatsOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer) SparkDynamicPartitionPruningResolver(org.apache.hadoop.hive.ql.optimizer.physical.SparkDynamicPartitionPruningResolver) SparkCrossProductCheck(org.apache.hadoop.hive.ql.optimizer.physical.SparkCrossProductCheck) MetadataOnlyOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer) PhysicalContext(org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext) StageIDsRearranger(org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger)

Example 2 with SparkCrossProductCheck

use of org.apache.hadoop.hive.ql.optimizer.physical.SparkCrossProductCheck in project hive by apache.

the class SparkCompiler method optimizeTaskPlan.

@Override
protected void optimizeTaskPlan(List<Task<?>> rootTasks, ParseContext pCtx, Context ctx) throws SemanticException {
    PERF_LOGGER.perfLogBegin(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE);
    PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask());
    physicalCtx = new SplitSparkWorkResolver().resolve(physicalCtx);
    if (conf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) {
        (new SparkSkewJoinResolver()).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping runtime skew join optimization");
    }
    physicalCtx = new SparkMapJoinResolver().resolve(physicalCtx);
    if (conf.isSparkDPPAny()) {
        physicalCtx = new SparkDynamicPartitionPruningResolver().resolve(physicalCtx);
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) {
        physicalCtx = new NullScanOptimizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping null scan query optimization");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) {
        physicalCtx = new MetadataOnlyOptimizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping metadata only query optimization");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) {
        physicalCtx = new SparkCrossProductCheck().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping cross product analysis");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
        (new Vectorizer()).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping vectorization");
    }
    if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
        (new StageIDsRearranger()).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping stage id rearranger");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_COMBINE_EQUIVALENT_WORK_OPTIMIZATION)) {
        new CombineEquivalentWorkResolver().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping combine equivalent work optimization");
    }
    if (physicalCtx.getContext().getExplainAnalyze() != null) {
        new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx);
    }
    PERF_LOGGER.perfLogEnd(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE);
    return;
}
Also used : SparkMapJoinResolver(org.apache.hadoop.hive.ql.optimizer.physical.SparkMapJoinResolver) CombineEquivalentWorkResolver(org.apache.hadoop.hive.ql.optimizer.spark.CombineEquivalentWorkResolver) NullScanOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer) Vectorizer(org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer) SparkSkewJoinResolver(org.apache.hadoop.hive.ql.optimizer.spark.SparkSkewJoinResolver) SplitSparkWorkResolver(org.apache.hadoop.hive.ql.optimizer.spark.SplitSparkWorkResolver) AnnotateRunTimeStatsOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer) SparkDynamicPartitionPruningResolver(org.apache.hadoop.hive.ql.optimizer.physical.SparkDynamicPartitionPruningResolver) SparkCrossProductCheck(org.apache.hadoop.hive.ql.optimizer.physical.SparkCrossProductCheck) MetadataOnlyOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer) PhysicalContext(org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext) StageIDsRearranger(org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger)

Aggregations

AnnotateRunTimeStatsOptimizer (org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer)2 MetadataOnlyOptimizer (org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer)2 NullScanOptimizer (org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer)2 PhysicalContext (org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext)2 SparkCrossProductCheck (org.apache.hadoop.hive.ql.optimizer.physical.SparkCrossProductCheck)2 SparkDynamicPartitionPruningResolver (org.apache.hadoop.hive.ql.optimizer.physical.SparkDynamicPartitionPruningResolver)2 SparkMapJoinResolver (org.apache.hadoop.hive.ql.optimizer.physical.SparkMapJoinResolver)2 StageIDsRearranger (org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger)2 Vectorizer (org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer)2 CombineEquivalentWorkResolver (org.apache.hadoop.hive.ql.optimizer.spark.CombineEquivalentWorkResolver)2 SparkSkewJoinResolver (org.apache.hadoop.hive.ql.optimizer.spark.SparkSkewJoinResolver)2 SplitSparkWorkResolver (org.apache.hadoop.hive.ql.optimizer.spark.SplitSparkWorkResolver)2