Search in sources :

Example 1 with MetadataOnlyOptimizer

use of org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer in project hive by apache.

the class SparkCompiler method optimizeTaskPlan.

@Override
protected void optimizeTaskPlan(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, Context ctx) throws SemanticException {
    PERF_LOGGER.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE);
    PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask());
    physicalCtx = new SplitSparkWorkResolver().resolve(physicalCtx);
    if (conf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) {
        (new SparkSkewJoinResolver()).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping runtime skew join optimization");
    }
    physicalCtx = new SparkMapJoinResolver().resolve(physicalCtx);
    if (conf.isSparkDPPAny()) {
        physicalCtx = new SparkDynamicPartitionPruningResolver().resolve(physicalCtx);
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) {
        physicalCtx = new NullScanOptimizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping null scan query optimization");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) {
        physicalCtx = new MetadataOnlyOptimizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping metadata only query optimization");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) {
        physicalCtx = new SparkCrossProductCheck().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping cross product analysis");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
        (new Vectorizer()).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping vectorization");
    }
    if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
        (new StageIDsRearranger()).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping stage id rearranger");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_COMBINE_EQUIVALENT_WORK_OPTIMIZATION)) {
        new CombineEquivalentWorkResolver().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping combine equivalent work optimization");
    }
    if (physicalCtx.getContext().getExplainAnalyze() != null) {
        new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx);
    }
    PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE);
    return;
}
Also used : SparkMapJoinResolver(org.apache.hadoop.hive.ql.optimizer.physical.SparkMapJoinResolver) CombineEquivalentWorkResolver(org.apache.hadoop.hive.ql.optimizer.spark.CombineEquivalentWorkResolver) NullScanOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer) Vectorizer(org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer) SparkSkewJoinResolver(org.apache.hadoop.hive.ql.optimizer.spark.SparkSkewJoinResolver) SplitSparkWorkResolver(org.apache.hadoop.hive.ql.optimizer.spark.SplitSparkWorkResolver) AnnotateRunTimeStatsOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer) SparkDynamicPartitionPruningResolver(org.apache.hadoop.hive.ql.optimizer.physical.SparkDynamicPartitionPruningResolver) SparkCrossProductCheck(org.apache.hadoop.hive.ql.optimizer.physical.SparkCrossProductCheck) MetadataOnlyOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer) PhysicalContext(org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext) StageIDsRearranger(org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger)

Example 2 with MetadataOnlyOptimizer

use of org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer in project hive by apache.

the class TezCompiler method optimizeTaskPlan.

@Override
protected void optimizeTaskPlan(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, Context ctx) throws SemanticException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask());
    if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) {
        physicalCtx = new NullScanOptimizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping null scan query optimization");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) {
        physicalCtx = new MetadataOnlyOptimizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping metadata only query optimization");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CHECK_CROSS_PRODUCT)) {
        physicalCtx = new CrossProductHandler().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping cross product analysis");
    }
    if ("llap".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) {
        physicalCtx = new LlapPreVectorizationPass().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping llap pre-vectorization pass");
    }
    if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
        physicalCtx = new Vectorizer().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping vectorization");
    }
    if (!"none".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) {
        physicalCtx = new StageIDsRearranger().resolve(physicalCtx);
    } else {
        LOG.debug("Skipping stage id rearranger");
    }
    if ((conf.getBoolVar(HiveConf.ConfVars.HIVE_TEZ_ENABLE_MEMORY_MANAGER)) && (conf.getBoolVar(HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN))) {
        physicalCtx = new MemoryDecider().resolve(physicalCtx);
    }
    if ("llap".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) {
        LlapClusterStateForCompile llapInfo = LlapClusterStateForCompile.getClusterInfo(conf);
        physicalCtx = new LlapDecider(llapInfo).resolve(physicalCtx);
    } else {
        LOG.debug("Skipping llap decider");
    }
    // This optimizer will serialize all filters that made it to the
    // table scan operator to avoid having to do it multiple times on
    // the backend. If you have a physical optimization that changes
    // table scans or filters, you have to invoke it before this one.
    physicalCtx = new SerializeFilter().resolve(physicalCtx);
    if (physicalCtx.getContext().getExplainAnalyze() != null) {
        new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx);
    }
    perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "optimizeTaskPlan");
    return;
}
Also used : LlapDecider(org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider) LlapClusterStateForCompile(org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) AnnotateRunTimeStatsOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer) MemoryDecider(org.apache.hadoop.hive.ql.optimizer.physical.MemoryDecider) MetadataOnlyOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer) PhysicalContext(org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext) LlapPreVectorizationPass(org.apache.hadoop.hive.ql.optimizer.physical.LlapPreVectorizationPass) NullScanOptimizer(org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer) Vectorizer(org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer) SerializeFilter(org.apache.hadoop.hive.ql.optimizer.physical.SerializeFilter) CrossProductHandler(org.apache.hadoop.hive.ql.optimizer.physical.CrossProductHandler) StageIDsRearranger(org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger)

Aggregations

AnnotateRunTimeStatsOptimizer (org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer)2 MetadataOnlyOptimizer (org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer)2 NullScanOptimizer (org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer)2 PhysicalContext (org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext)2 StageIDsRearranger (org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger)2 Vectorizer (org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer)2 PerfLogger (org.apache.hadoop.hive.ql.log.PerfLogger)1 CrossProductHandler (org.apache.hadoop.hive.ql.optimizer.physical.CrossProductHandler)1 LlapClusterStateForCompile (org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile)1 LlapDecider (org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider)1 LlapPreVectorizationPass (org.apache.hadoop.hive.ql.optimizer.physical.LlapPreVectorizationPass)1 MemoryDecider (org.apache.hadoop.hive.ql.optimizer.physical.MemoryDecider)1 SerializeFilter (org.apache.hadoop.hive.ql.optimizer.physical.SerializeFilter)1 SparkCrossProductCheck (org.apache.hadoop.hive.ql.optimizer.physical.SparkCrossProductCheck)1 SparkDynamicPartitionPruningResolver (org.apache.hadoop.hive.ql.optimizer.physical.SparkDynamicPartitionPruningResolver)1 SparkMapJoinResolver (org.apache.hadoop.hive.ql.optimizer.physical.SparkMapJoinResolver)1 CombineEquivalentWorkResolver (org.apache.hadoop.hive.ql.optimizer.spark.CombineEquivalentWorkResolver)1 SparkSkewJoinResolver (org.apache.hadoop.hive.ql.optimizer.spark.SparkSkewJoinResolver)1 SplitSparkWorkResolver (org.apache.hadoop.hive.ql.optimizer.spark.SplitSparkWorkResolver)1