Search in sources :

Example 1 with StageIDsRearranger

use of org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger in project hive by apache.

the class SparkCompiler method optimizeTaskPlan.

@Override
protected void optimizeTaskPlan(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, Context ctx) throws SemanticException {
    PERF_LOGGER.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE);
    PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask());
    physicalCtx = new SplitSparkWorkResolver().resolve(physicalCtx);
    if (conf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) {
        (new SparkSkewJoinResolver()).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping runtime skew join optimization");
    }
    physicalCtx = new SparkMapJoinResolver().resolve(physicalCtx);
    if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) {
        physicalCtx = new NullScanOptimizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping null scan query optimization");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) {
        physicalCtx = new MetadataOnlyOptimizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping metadata only query optimization");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) {
        physicalCtx = new SparkCrossProductCheck().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping cross product analysis");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) && ctx.getExplainAnalyze() == null) {
        (new Vectorizer()).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping vectorization");
    }
    if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
        (new StageIDsRearranger()).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping stage id rearranger");
    }
    new CombineEquivalentWorkResolver().resolve(physicalCtx);
    if (physicalCtx.getContext().getExplainAnalyze() != null) {
        new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx);
    }
    PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE);
    return;
}
Also used : SparkMapJoinResolver(org.apache.hadoop.hive.ql.optimizer.physical.SparkMapJoinResolver) CombineEquivalentWorkResolver(org.apache.hadoop.hive.ql.optimizer.spark.CombineEquivalentWorkResolver) NullScanOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer) Vectorizer(org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer) SparkSkewJoinResolver(org.apache.hadoop.hive.ql.optimizer.spark.SparkSkewJoinResolver) SplitSparkWorkResolver(org.apache.hadoop.hive.ql.optimizer.spark.SplitSparkWorkResolver) AnnotateRunTimeStatsOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer) SparkCrossProductCheck(org.apache.hadoop.hive.ql.optimizer.physical.SparkCrossProductCheck) MetadataOnlyOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer) PhysicalContext(org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext) StageIDsRearranger(org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger)

Example 2 with StageIDsRearranger

use of org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger in project hive by apache.

the class TezCompiler method optimizeTaskPlan.

@Override
protected void optimizeTaskPlan(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, Context ctx) throws SemanticException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask());
    if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) {
        physicalCtx = new NullScanOptimizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping null scan query optimization");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) {
        physicalCtx = new MetadataOnlyOptimizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping metadata only query optimization");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) {
        physicalCtx = new CrossProductCheck().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping cross product analysis");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) && ctx.getExplainAnalyze() == null) {
        physicalCtx = new Vectorizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping vectorization");
    }
    if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
        physicalCtx = new StageIDsRearranger().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping stage id rearranger");
    }
    if ((conf.getBoolVar(HiveConf.ConfVars.HIVE_TEZ_ENABLE_MEMORY_MANAGER)) && (conf.getBoolVar(HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN))) {
        physicalCtx = new MemoryDecider().resolve(physicalCtx);
    }
    if ("llap".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) {
        physicalCtx = new LlapDecider().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping llap decider");
    }
    //  This optimizer will serialize all filters that made it to the
    //  table scan operator to avoid having to do it multiple times on
    //  the backend. If you have a physical optimization that changes
    //  table scans or filters, you have to invoke it before this one.
    physicalCtx = new SerializeFilter().resolve(physicalCtx);
    if (physicalCtx.getContext().getExplainAnalyze() != null) {
        new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx);
    }
    perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "optimizeTaskPlan");
    return;
}
Also used : LlapDecider(org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider) NullScanOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer) Vectorizer(org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer) CrossProductCheck(org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) SerializeFilter(org.apache.hadoop.hive.ql.optimizer.physical.SerializeFilter) AnnotateRunTimeStatsOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer) MemoryDecider(org.apache.hadoop.hive.ql.optimizer.physical.MemoryDecider) MetadataOnlyOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer) PhysicalContext(org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext) StageIDsRearranger(org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger)

Aggregations

AnnotateRunTimeStatsOptimizer (org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer)2 MetadataOnlyOptimizer (org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer)2 NullScanOptimizer (org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer)2 PhysicalContext (org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext)2 StageIDsRearranger (org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger)2 Vectorizer (org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer)2 PerfLogger (org.apache.hadoop.hive.ql.log.PerfLogger)1 CrossProductCheck (org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck)1 LlapDecider (org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider)1 MemoryDecider (org.apache.hadoop.hive.ql.optimizer.physical.MemoryDecider)1 SerializeFilter (org.apache.hadoop.hive.ql.optimizer.physical.SerializeFilter)1 SparkCrossProductCheck (org.apache.hadoop.hive.ql.optimizer.physical.SparkCrossProductCheck)1 SparkMapJoinResolver (org.apache.hadoop.hive.ql.optimizer.physical.SparkMapJoinResolver)1 CombineEquivalentWorkResolver (org.apache.hadoop.hive.ql.optimizer.spark.CombineEquivalentWorkResolver)1 SparkSkewJoinResolver (org.apache.hadoop.hive.ql.optimizer.spark.SparkSkewJoinResolver)1 SplitSparkWorkResolver (org.apache.hadoop.hive.ql.optimizer.spark.SplitSparkWorkResolver)1