Search in sources :

Example 1 with ReduceSinkDeDuplication

use of org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication in project hive by apache.

the class Optimizer method initialize.

/**
 * Create the list of transformations.
 *
 * @param hiveConf
 */
public void initialize(HiveConf hiveConf) {
    boolean isTezExecEngine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez");
    boolean isSparkExecEngine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark");
    boolean bucketMapJoinOptimizer = false;
    transformations = new ArrayList<Transform>();
    // Add the additional postprocessing transformations needed if
    // we are translating Calcite operators into Hive operators.
    transformations.add(new HiveOpConverterPostProc());
    // Add the transformation that computes the lineage information.
    Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults().omitEmptyStrings().split(Strings.nullToEmpty(HiveConf.getVar(hiveConf, HiveConf.ConfVars.POSTEXECHOOKS))));
    if (postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
        transformations.add(new Generator(postExecHooks));
    }
    // Try to transform OR predicates in Filter into simpler IN clauses first
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER) && !pctx.getContext().isCboSucceeded()) {
        final int min = HiveConf.getIntVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
        transformations.add(new PointLookupOptimizer(min));
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPARTITIONCOLUMNSEPARATOR)) {
        transformations.add(new PartitionColumnsSeparator());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) && !pctx.getContext().isCboSucceeded()) {
        transformations.add(new PredicateTransitivePropagate());
        if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
            transformations.add(new ConstantPropagate());
        }
        transformations.add(new SyntheticJoinPredicate());
        transformations.add(new PredicatePushDown());
    } else if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) && pctx.getContext().isCboSucceeded()) {
        transformations.add(new SyntheticJoinPredicate());
        transformations.add(new SimplePredicatePushDown());
        transformations.add(new RedundantDynamicPruningConditionsRemoval());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION) && !pctx.getContext().isCboSucceeded()) {
        // We run constant propagation twice because after predicate pushdown, filter expressions
        // are combined and may become eligible for reduction (like is not null filter).
        transformations.add(new ConstantPropagate());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.DYNAMICPARTITIONING) && HiveConf.getVar(hiveConf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE).equals("nonstrict") && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTSORTDYNAMICPARTITION) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) {
        transformations.add(new SortedDynPartitionOptimizer());
    }
    transformations.add(new SortedDynPartitionTimeGranularityOptimizer());
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) {
        transformations.add(new PartitionPruner());
        transformations.add(new PartitionConditionRemover());
        if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) {
            /* Add list bucketing pruner. */
            transformations.add(new ListBucketingPruner());
        }
        if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION) && !pctx.getContext().isCboSucceeded()) {
            // PartitionPruner may create more folding opportunities, run ConstantPropagate again.
            transformations.add(new ConstantPropagate());
        }
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTGROUPBY) || HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT)) {
        transformations.add(new GroupByOptimizer());
    }
    transformations.add(new ColumnPruner());
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVECOUNTDISTINCTOPTIMIZER) && (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_IN_TEST) || isTezExecEngine)) {
        transformations.add(new CountDistinctRewriteProc());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME)) {
        if (!isTezExecEngine) {
            transformations.add(new SkewJoinOptimizer());
        } else {
            LOG.warn("Skew join is currently not supported in tez! Disabling the skew join optimization.");
        }
    }
    transformations.add(new SamplePruner());
    MapJoinProcessor mapJoinProcessor = isSparkExecEngine ? new SparkMapJoinProcessor() : new MapJoinProcessor();
    transformations.add(mapJoinProcessor);
    if ((HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN)) && !isTezExecEngine && !isSparkExecEngine) {
        transformations.add(new BucketMapJoinOptimizer());
        bucketMapJoinOptimizer = true;
    }
    // BucketMapJoinOptimizer and SortedMergeBucketMapJoinOptimizer
    if ((HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTSORTMERGEBUCKETMAPJOIN)) && !isTezExecEngine && !isSparkExecEngine) {
        if (!bucketMapJoinOptimizer) {
            // No need to add BucketMapJoinOptimizer twice
            transformations.add(new BucketMapJoinOptimizer());
        }
        transformations.add(new SortedMergeBucketMapJoinOptimizer());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTIMIZEBUCKETINGSORTING)) {
        transformations.add(new BucketingSortingReduceSinkOptimizer());
    }
    transformations.add(new UnionProcessor());
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.NWAYJOINREORDER)) {
        transformations.add(new JoinReorder());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.TEZ_OPTIMIZE_BUCKET_PRUNING) && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTINDEXFILTER)) {
        final boolean compatMode = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.TEZ_OPTIMIZE_BUCKET_PRUNING_COMPAT);
        transformations.add(new FixedBucketPruningOptimizer(compatMode));
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION) || pctx.hasAcidWrite()) {
        transformations.add(new ReduceSinkDeDuplication());
    }
    transformations.add(new NonBlockingOpDeDupProc());
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEIDENTITYPROJECTREMOVER) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
        transformations.add(new IdentityProjectRemover());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVELIMITOPTENABLE)) {
        transformations.add(new GlobalLimitOptimizer());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCORRELATION) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEGROUPBYSKEW) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME) && !isTezExecEngine && !isSparkExecEngine) {
        transformations.add(new CorrelationOptimizer());
    }
    if (HiveConf.getFloatVar(hiveConf, HiveConf.ConfVars.HIVELIMITPUSHDOWNMEMORYUSAGE) > 0) {
        transformations.add(new LimitPushdownOptimizer());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES)) {
        transformations.add(new StatsOptimizer());
    }
    if (pctx.getContext().isExplainSkipExecution() && !isTezExecEngine && !isSparkExecEngine) {
        transformations.add(new AnnotateWithStatistics());
        transformations.add(new AnnotateWithOpTraits());
    }
    if (!HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVEFETCHTASKCONVERSION).equals("none")) {
        // must be called last
        transformations.add(new SimpleFetchOptimizer());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEFETCHTASKAGGR)) {
        transformations.add(new SimpleFetchAggregation());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_TABLE_PROPERTIES_FROM_SERDE)) {
        transformations.add(new TablePropertyEnrichmentOptimizer());
    }
}
Also used : ReduceSinkDeDuplication(org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication) AnnotateWithOpTraits(org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits) PredicateTransitivePropagate(org.apache.hadoop.hive.ql.ppd.PredicateTransitivePropagate) PartitionConditionRemover(org.apache.hadoop.hive.ql.optimizer.pcr.PartitionConditionRemover) UnionProcessor(org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcessor) SimplePredicatePushDown(org.apache.hadoop.hive.ql.ppd.SimplePredicatePushDown) SimplePredicatePushDown(org.apache.hadoop.hive.ql.ppd.SimplePredicatePushDown) PredicatePushDown(org.apache.hadoop.hive.ql.ppd.PredicatePushDown) HiveOpConverterPostProc(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc) PartitionPruner(org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner) ListBucketingPruner(org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPruner) SyntheticJoinPredicate(org.apache.hadoop.hive.ql.ppd.SyntheticJoinPredicate) CorrelationOptimizer(org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationOptimizer) AnnotateWithStatistics(org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics) Generator(org.apache.hadoop.hive.ql.optimizer.lineage.Generator)

Aggregations

HiveOpConverterPostProc (org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc)1 CorrelationOptimizer (org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationOptimizer)1 ReduceSinkDeDuplication (org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication)1 Generator (org.apache.hadoop.hive.ql.optimizer.lineage.Generator)1 ListBucketingPruner (org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPruner)1 AnnotateWithOpTraits (org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits)1 PartitionConditionRemover (org.apache.hadoop.hive.ql.optimizer.pcr.PartitionConditionRemover)1 PartitionPruner (org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner)1 AnnotateWithStatistics (org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics)1 UnionProcessor (org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcessor)1 PredicatePushDown (org.apache.hadoop.hive.ql.ppd.PredicatePushDown)1 PredicateTransitivePropagate (org.apache.hadoop.hive.ql.ppd.PredicateTransitivePropagate)1 SimplePredicatePushDown (org.apache.hadoop.hive.ql.ppd.SimplePredicatePushDown)1 SyntheticJoinPredicate (org.apache.hadoop.hive.ql.ppd.SyntheticJoinPredicate)1