Search in sources :

Example 11 with StatsProvider

use of io.prestosql.cost.StatsProvider in project hetu-core by openlookeng.

the class LogicalPlanner method plan.

public Plan plan(Analysis analysis, boolean skipStatsWithPlan, Stage stage) {
    PlanNode root = planStatement(analysis, analysis.getStatement());
    PlanNode.SkipOptRuleLevel optimizationLevel = APPLY_ALL_RULES;
    planSanityChecker.validateIntermediatePlan(root, session, metadata, typeAnalyzer, planSymbolAllocator.getTypes(), warningCollector);
    if (stage.ordinal() >= Stage.OPTIMIZED.ordinal()) {
        for (PlanOptimizer optimizer : planOptimizers) {
            if (OptimizerUtils.isEnabledLegacy(optimizer, session, root) && OptimizerUtils.canApplyOptimizer(optimizer, optimizationLevel)) {
                root = optimizer.optimize(root, session, planSymbolAllocator.getTypes(), planSymbolAllocator, idAllocator, warningCollector);
                requireNonNull(root, format("%s returned a null plan", optimizer.getClass().getName()));
                optimizationLevel = optimizationLevel == APPLY_ALL_RULES ? root.getSkipOptRuleLevel() : optimizationLevel;
            }
        }
    }
    if (stage.ordinal() >= Stage.OPTIMIZED_AND_VALIDATED.ordinal()) {
        // make sure we produce a valid plan after optimizations run. This is mainly to catch programming errors
        planSanityChecker.validateFinalPlan(root, session, metadata, typeAnalyzer, planSymbolAllocator.getTypes(), warningCollector);
    }
    TypeProvider types = planSymbolAllocator.getTypes();
    // we calculate stats here only if need to show as part of EXPLAIN, otherwise not needed.
    if (skipStatsWithPlan && isSkipAttachingStatsWithPlan(session)) {
        return new Plan(root, types, StatsAndCosts.empty());
    } else {
        StatsProvider statsProvider = new CachingStatsProvider(statsCalculator, session, types);
        CostProvider costProvider = new CachingCostProvider(costCalculator, statsProvider, Optional.empty(), session, types);
        return new Plan(root, types, StatsAndCosts.create(root, statsProvider, costProvider));
    }
}
Also used : PlanOptimizer(io.prestosql.sql.planner.optimizations.PlanOptimizer) CachingStatsProvider(io.prestosql.cost.CachingStatsProvider) PlanNode(io.prestosql.spi.plan.PlanNode) StatsProvider(io.prestosql.cost.StatsProvider) CachingStatsProvider(io.prestosql.cost.CachingStatsProvider) CachingCostProvider(io.prestosql.cost.CachingCostProvider) CostProvider(io.prestosql.cost.CostProvider) CachingCostProvider(io.prestosql.cost.CachingCostProvider) SystemSessionProperties.isSkipAttachingStatsWithPlan(io.prestosql.SystemSessionProperties.isSkipAttachingStatsWithPlan)

Example 12 with StatsProvider

use of io.prestosql.cost.StatsProvider in project hetu-core by openlookeng.

the class DetermineJoinDistributionType method getJoinNodeWithCost.

private PlanNodeWithCost getJoinNodeWithCost(Context context, JoinNode possibleJoinNode) {
    TypeProvider types = context.getSymbolAllocator().getTypes();
    StatsProvider stats = context.getStatsProvider();
    boolean replicated = possibleJoinNode.getDistributionType().get().equals(REPLICATED);
    /*
         *   HACK!
         *
         *   Currently cost model always has to compute the total cost of an operation.
         *   For JOIN the total cost consist of 4 parts:
         *     - Cost of exchanges that have to be introduced to execute a JOIN
         *     - Cost of building a hash table
         *     - Cost of probing a hash table
         *     - Cost of building an output for matched rows
         *
         *   When output size for a JOIN cannot be estimated the cost model returns
         *   UNKNOWN cost for the join.
         *
         *   However assuming the cost of JOIN output is always the same, we can still make
         *   cost based decisions based on the input cost for different types of JOINs.
         *
         *   Although the side flipping can be made purely based on stats (smaller side
         *   always goes to the right), determining JOIN type is not that simple. As when
         *   choosing REPLICATED over REPARTITIONED join the cost of exchanging and building
         *   the hash table scales with the number of nodes where the build side is replicated.
         *
         *   TODO Decision about the distribution should be based on LocalCostEstimate only when PlanCostEstimate cannot be calculated. Otherwise cost comparator cannot take query.max-memory into account.
         */
    int estimatedSourceDistributedTaskCount = taskCountEstimator.estimateSourceDistributedTaskCount();
    LocalCostEstimate cost = calculateJoinCostWithoutOutput(possibleJoinNode.getLeft(), possibleJoinNode.getRight(), stats, types, replicated, estimatedSourceDistributedTaskCount);
    return new PlanNodeWithCost(cost.toPlanCost(), possibleJoinNode);
}
Also used : StatsProvider(io.prestosql.cost.StatsProvider) TypeProvider(io.prestosql.sql.planner.TypeProvider) LocalCostEstimate(io.prestosql.cost.LocalCostEstimate)

Example 13 with StatsProvider

use of io.prestosql.cost.StatsProvider in project hetu-core by openlookeng.

the class DetermineSemiJoinDistributionType method getSemiJoinNodeWithCost.

private PlanNodeWithCost getSemiJoinNodeWithCost(SemiJoinNode possibleJoinNode, Context context) {
    TypeProvider types = context.getSymbolAllocator().getTypes();
    StatsProvider stats = context.getStatsProvider();
    boolean replicated = possibleJoinNode.getDistributionType().get().equals(REPLICATED);
    /*
         *   HACK!
         *
         *   Currently cost model always has to compute the total cost of an operation.
         *   For SEMI-JOIN the total cost consist of 4 parts:
         *     - Cost of exchanges that have to be introduced to execute a JOIN
         *     - Cost of building a hash table
         *     - Cost of probing a hash table
         *     - Cost of building an output for matched rows
         *
         *   When output size for a SEMI-JOIN cannot be estimated the cost model returns
         *   UNKNOWN cost for the join.
         *
         *   However assuming the cost of SEMI-JOIN output is always the same, we can still make
         *   cost based decisions based on the input cost for different types of SEMI-JOINs.
         *
         *   TODO Decision about the distribution should be based on LocalCostEstimate only when PlanCostEstimate cannot be calculated. Otherwise cost comparator cannot take query.max-memory into account.
         */
    int estimatedSourceDistributedTaskCount = taskCountEstimator.estimateSourceDistributedTaskCount();
    LocalCostEstimate cost = calculateJoinCostWithoutOutput(possibleJoinNode.getSource(), possibleJoinNode.getFilteringSource(), stats, types, replicated, estimatedSourceDistributedTaskCount);
    return new PlanNodeWithCost(cost.toPlanCost(), possibleJoinNode);
}
Also used : StatsProvider(io.prestosql.cost.StatsProvider) TypeProvider(io.prestosql.sql.planner.TypeProvider) LocalCostEstimate(io.prestosql.cost.LocalCostEstimate)

Example 14 with StatsProvider

use of io.prestosql.cost.StatsProvider in project hetu-core by openlookeng.

the class IterativeOptimizer method ruleContext.

private Rule.Context ruleContext(Context context) {
    StatsProvider statsProvider = new CachingStatsProvider(statsCalculator, Optional.of(context.memo), context.lookup, context.session, context.planSymbolAllocator.getTypes());
    CostProvider costProvider = new CachingCostProvider(costCalculator, statsProvider, Optional.of(context.memo), context.session, context.planSymbolAllocator.getTypes());
    return new Rule.Context() {

        @Override
        public Lookup getLookup() {
            return context.lookup;
        }

        @Override
        public PlanNodeIdAllocator getIdAllocator() {
            return context.idAllocator;
        }

        @Override
        public PlanSymbolAllocator getSymbolAllocator() {
            return context.planSymbolAllocator;
        }

        @Override
        public Session getSession() {
            return context.session;
        }

        @Override
        public StatsProvider getStatsProvider() {
            return statsProvider;
        }

        @Override
        public CostProvider getCostProvider() {
            return costProvider;
        }

        @Override
        public void checkTimeoutNotExhausted() {
            context.checkTimeoutNotExhausted();
        }

        @Override
        public WarningCollector getWarningCollector() {
            return context.warningCollector;
        }
    };
}
Also used : CachingStatsProvider(io.prestosql.cost.CachingStatsProvider) CachingStatsProvider(io.prestosql.cost.CachingStatsProvider) StatsProvider(io.prestosql.cost.StatsProvider) CachingCostProvider(io.prestosql.cost.CachingCostProvider) CostProvider(io.prestosql.cost.CostProvider) CachingCostProvider(io.prestosql.cost.CachingCostProvider)

Example 15 with StatsProvider

use of io.prestosql.cost.StatsProvider in project hetu-core by openlookeng.

the class PredicatePushDown method optimize.

@Override
public PlanNode optimize(PlanNode plan, Session session, TypeProvider types, PlanSymbolAllocator planSymbolAllocator, PlanNodeIdAllocator idAllocator, WarningCollector warningCollector) {
    requireNonNull(plan, "plan is null");
    requireNonNull(session, "session is null");
    requireNonNull(types, "types is null");
    requireNonNull(idAllocator, "idAllocator is null");
    RowExpressionPredicateExtractor predicateExtractor = new RowExpressionPredicateExtractor(new RowExpressionDomainTranslator(metadata), metadata, planSymbolAllocator, useTableProperties);
    Memo memo = new Memo(idAllocator, plan);
    Lookup lookup = Lookup.from(planNode -> Stream.of(memo.resolve(planNode)));
    StatsProvider statsProvider = new CachingStatsProvider(costCalculationHandle.getStatsCalculator(), Optional.of(memo), lookup, session, planSymbolAllocator.getTypes(), true);
    CostProvider costProvider = new CachingCostProvider(costCalculationHandle.getCostCalculator(), statsProvider, Optional.of(memo), session, planSymbolAllocator.getTypes());
    FilterPushdownForCTEHandler filterPushdownForCTEHandler = new FilterPushdownForCTEHandler(pushdownForCTE, costProvider, costCalculationHandle.getCostComparator());
    Rewriter rewriter = new Rewriter(planSymbolAllocator, idAllocator, metadata, predicateExtractor, typeAnalyzer, session, dynamicFiltering, filterPushdownForCTEHandler);
    PlanNode rewrittenNode = SimplePlanRewriter.rewriteWith(rewriter, plan, TRUE_CONSTANT);
    if (rewriter.isSecondTraverseRequired()) {
        return SimplePlanRewriter.rewriteWith(rewriter, rewrittenNode, TRUE_CONSTANT);
    }
    return rewrittenNode;
}
Also used : CachingStatsProvider(io.prestosql.cost.CachingStatsProvider) PlanNode(io.prestosql.spi.plan.PlanNode) StatsProvider(io.prestosql.cost.StatsProvider) CachingStatsProvider(io.prestosql.cost.CachingStatsProvider) CachingCostProvider(io.prestosql.cost.CachingCostProvider) RowExpressionDomainTranslator(io.prestosql.sql.relational.RowExpressionDomainTranslator) CostProvider(io.prestosql.cost.CostProvider) CachingCostProvider(io.prestosql.cost.CachingCostProvider) SimplePlanRewriter(io.prestosql.sql.planner.plan.SimplePlanRewriter) Lookup(io.prestosql.sql.planner.iterative.Lookup) Memo(io.prestosql.sql.planner.iterative.Memo) RowExpressionPredicateExtractor(io.prestosql.sql.planner.RowExpressionPredicateExtractor)

Aggregations

StatsProvider (io.prestosql.cost.StatsProvider)17 PlanNode (io.prestosql.spi.plan.PlanNode)11 MoreObjects.toStringHelper (com.google.common.base.MoreObjects.toStringHelper)8 Preconditions.checkState (com.google.common.base.Preconditions.checkState)8 Session (io.prestosql.Session)8 Metadata (io.prestosql.metadata.Metadata)8 NO_MATCH (io.prestosql.sql.planner.assertions.MatchResult.NO_MATCH)8 List (java.util.List)8 CachingStatsProvider (io.prestosql.cost.CachingStatsProvider)7 MatchResult.match (io.prestosql.sql.planner.assertions.MatchResult.match)7 Optional (java.util.Optional)7 CachingCostProvider (io.prestosql.cost.CachingCostProvider)6 CostProvider (io.prestosql.cost.CostProvider)6 Objects.requireNonNull (java.util.Objects.requireNonNull)6 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)5 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)3 Symbol (io.prestosql.spi.plan.Symbol)3 SymbolUtils.toSymbolReference (io.prestosql.sql.planner.SymbolUtils.toSymbolReference)3 TypeProvider (io.prestosql.sql.planner.TypeProvider)3 Map (java.util.Map)3