Search in sources :

Example 1 with StoreCostController

use of org.apache.derby.iapi.store.access.StoreCostController in project derby by apache.

the class FromBaseTable method estimateCost.

/**
 * <p>
 * Estimate the cost of scanning this {@code FromBaseTable} using the
 * given predicate list with the given conglomerate.
 * </p>
 *
 * <p>
 * If the table contains little data, the cost estimate might be adjusted
 * to make it more likely that an index scan will be preferred to a table
 * scan, and a unique index will be preferred to a non-unique index. Even
 * though such a plan may be slightly suboptimal when seen in isolation,
 * using indexes, unique indexes in particular, needs fewer locks and
 * allows more concurrency.
 * </p>
 *
 * @see org.apache.derby.iapi.sql.compile.Optimizable#estimateCost
 *
 * @exception StandardException		Thrown on error
 */
@Override
public CostEstimate estimateCost(OptimizablePredicateList predList, ConglomerateDescriptor cd, CostEstimate outerCost, Optimizer optimizer, RowOrdering rowOrdering) throws StandardException {
    double cost;
    boolean statisticsForTable = false;
    boolean statisticsForConglomerate = false;
    /* unknownPredicateList contains all predicates whose effect on
		 * cost/selectivity can't be calculated by the store.
		 */
    PredicateList unknownPredicateList = null;
    if (optimizer.useStatistics() && predList != null) {
        /* if user has specified that we don't use statistics,
			   pretend that statistics don't exist.
			*/
        statisticsForConglomerate = tableDescriptor.statisticsExist(cd);
        statisticsForTable = tableDescriptor.statisticsExist(null);
        unknownPredicateList = new PredicateList(getContextManager());
        predList.copyPredicatesToOtherList(unknownPredicateList);
        // their statistics need to get updated.
        if (!hasCheckedIndexStats) {
            hasCheckedIndexStats = true;
            // daemon if that's the only index on the table.
            if (qualifiesForStatisticsUpdateCheck(tableDescriptor)) {
                tableDescriptor.markForIndexStatsUpdate(baseRowCount());
            }
        }
    }
    AccessPath currAccessPath = getCurrentAccessPath();
    JoinStrategy currentJoinStrategy = currAccessPath.getJoinStrategy();
    if (optimizerTracingIsOn()) {
        getOptimizerTracer().traceEstimatingCostOfConglomerate(cd, tableNumber);
    }
    /* Get the uniqueness factory for later use (see below) */
    double tableUniquenessFactor = optimizer.uniqueJoinWithOuterTable(predList);
    boolean oneRowResultSetForSomeConglom = isOneRowResultSet(predList);
    /* Get the predicates that can be used for scanning the base table */
    baseTableRestrictionList.removeAllElements();
    currentJoinStrategy.getBasePredicates(predList, baseTableRestrictionList, this);
    /* RESOLVE: Need to figure out how to cache the StoreCostController */
    StoreCostController scc = getStoreCostController(cd);
    CostEstimate costEst = getScratchCostEstimate(optimizer);
    /* Does the conglomerate match at most one row? */
    if (isOneRowResultSet(cd, baseTableRestrictionList)) {
        /*
			** Tell the RowOrdering that this optimizable is always ordered.
			** It will figure out whether it is really always ordered in the
			** context of the outer tables and their orderings.
			*/
        rowOrdering.optimizableAlwaysOrdered(this);
        singleScanRowCount = 1.0;
        /* Yes, the cost is to fetch exactly one row */
        // RESOLVE: NEED TO FIGURE OUT HOW TO GET REFERENCED COLUMN LIST,
        // FIELD STATES, AND ACCESS TYPE
        cost = scc.getFetchFromFullKeyCost((FormatableBitSet) null, 0);
        if (optimizerTracingIsOn()) {
            getOptimizerTracer().traceSingleMatchedRowCost(cost, tableNumber);
        }
        costEst.setCost(cost, 1.0d, 1.0d);
        /*
			** Let the join strategy decide whether the cost of the base
			** scan is a single scan, or a scan per outer row.
			** NOTE: The multiplication should only be done against the
			** total row count, not the singleScanRowCount.
			*/
        double newCost = costEst.getEstimatedCost();
        if (currentJoinStrategy.multiplyBaseCostByOuterRows()) {
            newCost *= outerCost.rowCount();
        }
        costEst.setCost(newCost, costEst.rowCount() * outerCost.rowCount(), costEst.singleScanRowCount());
        /*
			** Choose the lock mode.  If the start/stop conditions are
			** constant, choose row locking, because we will always match
			** the same row.  If they are not constant (i.e. they include
			** a join), we decide whether to do row locking based on
			** the total number of rows for the life of the query.
			*/
        boolean constantStartStop = true;
        for (int i = 0; i < predList.size(); i++) {
            OptimizablePredicate pred = predList.getOptPredicate(i);
            /*
				** The predicates are in index order, so the start and
				** stop keys should be first.
				*/
            if (!(pred.isStartKey() || pred.isStopKey())) {
                break;
            }
            /* Stop when we've found a join */
            if (!pred.getReferencedMap().hasSingleBitSet()) {
                constantStartStop = false;
                break;
            }
        }
        if (constantStartStop) {
            currAccessPath.setLockMode(TransactionController.MODE_RECORD);
            if (optimizerTracingIsOn()) {
                getOptimizerTracer().traceConstantStartStopPositions();
            }
        } else {
            setLockingBasedOnThreshold(optimizer, costEst.rowCount());
        }
        if (optimizerTracingIsOn()) {
            getOptimizerTracer().traceCostOfNScans(tableNumber, outerCost.rowCount(), costEst);
        }
        /* Add in cost of fetching base row for non-covering index */
        if (cd.isIndex() && (!isCoveringIndex(cd))) {
            double singleFetchCost = getBaseCostController().getFetchFromRowLocationCost((FormatableBitSet) null, 0);
            // The estimated row count is always 1 here, although the
            // index scan may actually return 0 rows, depending on whether
            // or not the predicates match a key. It is assumed that a
            // match is more likely than a miss, hence the row count is 1.
            // Note (DERBY-6011): Alternative (non-unique) indexes may come
            // up with row counts lower than 1 because they multiply with
            // the selectivity, especially if the table is almost empty.
            // This makes the optimizer prefer non-unique indexes if there
            // are not so many rows in the table. We still want to use the
            // unique index in that case, as the performance difference
            // between the different scans on a small table is small, and
            // the unique index is likely to lock fewer rows and reduce
            // the chance of deadlocks. Therefore, we compensate by
            // making the row count at least 1 for the non-unique index.
            // See reference to DERBY-6011 further down in this method.
            cost = singleFetchCost * costEst.rowCount();
            costEst.setEstimatedCost(costEst.getEstimatedCost() + cost);
            if (optimizerTracingIsOn()) {
                getOptimizerTracer().traceNonCoveringIndexCost(cost, tableNumber);
            }
        }
    } else {
        /* Conglomerate might match more than one row */
        /*
			** Some predicates are good for start/stop, but we don't know
			** the values they are being compared to at this time, so we
			** estimate their selectivity in language rather than ask the
			** store about them .  The predicates on the first column of
			** the conglomerate reduce the number of pages and rows scanned.
			** The predicates on columns after the first reduce the number
			** of rows scanned, but have a much smaller effect on the number
			** of pages scanned, so we keep track of these selectivities in
			** two separate variables: extraFirstColumnSelectivity and
			** extraStartStopSelectivity. (Theoretically, we could try to
			** figure out the effect of predicates after the first column
			** on the number of pages scanned, but it's too hard, so we
			** use these predicates only to reduce the estimated number of
			** rows.  For comparisons with known values, though, the store
			** can figure out exactly how many rows and pages are scanned.)
			**
			** Other predicates are not good for start/stop.  We keep track
			** of their selectvities separately, because these limit the
			** number of rows, but not the number of pages, and so need to
			** be factored into the row count but not into the cost.
			** These selectivities are factored into extraQualifierSelectivity.
			**
			** statStartStopSelectivity (using statistics) represents the 
			** selectivity of start/stop predicates that can be used to scan 
			** the index. If no statistics exist for the conglomerate then 
			** the value of this variable remains at 1.0
			** 
			** statCompositeSelectivity (using statistics) represents the 
			** selectivity of all the predicates (including NonBaseTable 
			** predicates). This represents the most educated guess [among 
			** all the wild surmises in this routine] as to the number
			** of rows that will be returned from this joinNode.
			** If no statistics exist on the table or no statistics at all
			** can be found to satisfy the predicates at this join opertor,
			** then statCompositeSelectivity is left initialized at 1.0
			*/
        double extraFirstColumnSelectivity = 1.0d;
        double extraStartStopSelectivity = 1.0d;
        double extraQualifierSelectivity = 1.0d;
        double extraNonQualifierSelectivity = 1.0d;
        double statStartStopSelectivity = 1.0d;
        double statCompositeSelectivity = 1.0d;
        int numExtraFirstColumnPreds = 0;
        int numExtraStartStopPreds = 0;
        int numExtraQualifiers = 0;
        int numExtraNonQualifiers = 0;
        /*
			** It is possible for something to be a start or stop predicate
			** without it being possible to use it as a key for cost estimation.
			** For example, with an index on (c1, c2), and the predicate
			** c1 = othertable.c3 and c2 = 1, the comparison on c1 is with
			** an unknown value, so we can't pass it to the store.  This means
			** we can't pass the comparison on c2 to the store, either.
			**
			** The following booleans keep track of whether we have seen
			** gaps in the keys we can pass to the store.
			*/
        boolean startGap = false;
        boolean stopGap = false;
        boolean seenFirstColumn = false;
        /*
			** We need to figure out the number of rows touched to decide
			** whether to use row locking or table locking.  If the start/stop
			** conditions are constant (i.e. no joins), the number of rows
			** touched is the number of rows per scan.  But if the start/stop
			** conditions contain a join, the number of rows touched must
			** take the number of outer rows into account.
			*/
        boolean constantStartStop = true;
        boolean startStopFound = false;
        /* Count the number of start and stop keys */
        int startKeyNum = 0;
        int stopKeyNum = 0;
        OptimizablePredicate pred;
        int predListSize;
        if (predList != null)
            predListSize = baseTableRestrictionList.size();
        else
            predListSize = 0;
        int startStopPredCount = 0;
        ColumnReference firstColumn = null;
        for (int i = 0; i < predListSize; i++) {
            pred = baseTableRestrictionList.getOptPredicate(i);
            boolean startKey = pred.isStartKey();
            boolean stopKey = pred.isStopKey();
            if (startKey || stopKey) {
                startStopFound = true;
                if (!pred.getReferencedMap().hasSingleBitSet()) {
                    constantStartStop = false;
                }
                boolean knownConstant = pred.compareWithKnownConstant(this, true);
                if (startKey) {
                    if (knownConstant && (!startGap)) {
                        startKeyNum++;
                        if (unknownPredicateList != null)
                            unknownPredicateList.removeOptPredicate(pred);
                    } else {
                        startGap = true;
                    }
                }
                if (stopKey) {
                    if (knownConstant && (!stopGap)) {
                        stopKeyNum++;
                        if (unknownPredicateList != null)
                            unknownPredicateList.removeOptPredicate(pred);
                    } else {
                        stopGap = true;
                    }
                }
                /* If either we are seeing startGap or stopGap because start/stop key is
					 * comparison with non-constant, we should multiply the selectivity to
					 * extraFirstColumnSelectivity.  Beetle 4787.
					 */
                if (startGap || stopGap) {
                    // Don't include redundant join predicates in selectivity calculations
                    if (baseTableRestrictionList.isRedundantPredicate(i))
                        continue;
                    if (startKey && stopKey)
                        startStopPredCount++;
                    if (pred.getIndexPosition() == 0) {
                        extraFirstColumnSelectivity *= pred.selectivity(this);
                        if (!seenFirstColumn) {
                            ValueNode relNode = ((Predicate) pred).getAndNode().getLeftOperand();
                            if (relNode instanceof BinaryRelationalOperatorNode)
                                firstColumn = ((BinaryRelationalOperatorNode) relNode).getColumnOperand(this);
                            seenFirstColumn = true;
                        }
                    } else {
                        extraStartStopSelectivity *= pred.selectivity(this);
                        numExtraStartStopPreds++;
                    }
                }
            } else {
                // Don't include redundant join predicates in selectivity calculations
                if (baseTableRestrictionList.isRedundantPredicate(i)) {
                    continue;
                }
                /* If we have "like" predicate on the first index column, it is more likely
					 * to have a smaller range than "between", so we apply extra selectivity 0.2
					 * here.  beetle 4387, 4787.
					 */
                if (pred instanceof Predicate) {
                    ValueNode leftOpnd = ((Predicate) pred).getAndNode().getLeftOperand();
                    if (firstColumn != null && leftOpnd instanceof LikeEscapeOperatorNode) {
                        LikeEscapeOperatorNode likeNode = (LikeEscapeOperatorNode) leftOpnd;
                        if (likeNode.getLeftOperand().requiresTypeFromContext()) {
                            ValueNode receiver = ((TernaryOperatorNode) likeNode).getReceiver();
                            if (receiver instanceof ColumnReference) {
                                ColumnReference cr = (ColumnReference) receiver;
                                if (cr.getTableNumber() == firstColumn.getTableNumber() && cr.getColumnNumber() == firstColumn.getColumnNumber())
                                    extraFirstColumnSelectivity *= 0.2;
                            }
                        }
                    }
                }
                if (pred.isQualifier()) {
                    extraQualifierSelectivity *= pred.selectivity(this);
                    numExtraQualifiers++;
                } else {
                    extraNonQualifierSelectivity *= pred.selectivity(this);
                    numExtraNonQualifiers++;
                }
                /*
					** Strictly speaking, it shouldn't be necessary to
					** indicate a gap here, since there should be no more
					** start/stop predicates, but let's do it, anyway.
					*/
                startGap = true;
                stopGap = true;
            }
        }
        if (unknownPredicateList != null) {
            statCompositeSelectivity = unknownPredicateList.selectivity(this);
            if (statCompositeSelectivity == -1.0d)
                statCompositeSelectivity = 1.0d;
        }
        if (seenFirstColumn && (startStopPredCount > 0)) {
            if (statisticsForConglomerate) {
                statStartStopSelectivity = tableDescriptor.selectivityForConglomerate(cd, startStopPredCount);
            } else if (cd.isIndex()) {
                // DERBY-3790 (Investigate if request for update
                // statistics can be skipped for certain kind of
                // indexes, one instance may be unique indexes based
                // on one column.) But as found in DERBY-6045 (in list
                // multi-probe by primary key not chosen on tables with
                // >256 rows), even though we do not keep the
                // statistics for single-column unique indexes, we
                // should improve the selectivity of such an index
                // when the index is being considered by the optimizer.
                IndexRowGenerator irg = cd.getIndexDescriptor();
                if (irg.isUnique() && irg.numberOfOrderedColumns() == 1 && startStopPredCount == 1) {
                    statStartStopSelectivity = (1 / (double) baseRowCount());
                }
            }
        }
        /*
			** Factor the non-base-table predicates into the extra
			** non-qualifier selectivity, since these will restrict the
			** number of rows, but not the cost.
			*/
        extraNonQualifierSelectivity *= currentJoinStrategy.nonBasePredicateSelectivity(this, predList);
        /* Create the start and stop key arrays, and fill them in */
        DataValueDescriptor[] startKeys;
        DataValueDescriptor[] stopKeys;
        if (startKeyNum > 0)
            startKeys = new DataValueDescriptor[startKeyNum];
        else
            startKeys = null;
        if (stopKeyNum > 0)
            stopKeys = new DataValueDescriptor[stopKeyNum];
        else
            stopKeys = null;
        startKeyNum = 0;
        stopKeyNum = 0;
        startGap = false;
        stopGap = false;
        /* If we have a probe predicate that is being used as a start/stop
			 * key then ssKeySourceInList will hold the InListOperatorNode
			 * from which the probe predicate was built.
			 */
        InListOperatorNode ssKeySourceInList = null;
        for (int i = 0; i < predListSize; i++) {
            pred = baseTableRestrictionList.getOptPredicate(i);
            boolean startKey = pred.isStartKey();
            boolean stopKey = pred.isStopKey();
            if (startKey || stopKey) {
                /* A probe predicate is only useful if it can be used as
					 * as a start/stop key for _first_ column in an index
					 * (i.e. if the column position is 0).  That said, we only
					 * allow a single start/stop key per column position in
					 * the index (see PredicateList.orderUsefulPredicates()).
					 * Those two facts combined mean that we should never have
					 * more than one probe predicate start/stop key for a given
					 * conglomerate.
					 */
                if (SanityManager.DEBUG) {
                    if ((ssKeySourceInList != null) && ((Predicate) pred).isInListProbePredicate()) {
                        SanityManager.THROWASSERT("Found multiple probe predicate start/stop keys" + " for conglomerate '" + cd.getConglomerateName() + "' when at most one was expected.");
                    }
                }
                /* By passing "true" in the next line we indicate that we
					 * should only retrieve the underlying InListOpNode *if*
					 * the predicate is a "probe predicate".
					 */
                ssKeySourceInList = ((Predicate) pred).getSourceInList(true);
                boolean knownConstant = pred.compareWithKnownConstant(this, true);
                if (startKey) {
                    if (knownConstant && (!startGap)) {
                        startKeys[startKeyNum] = pred.getCompareValue(this);
                        startKeyNum++;
                    } else {
                        startGap = true;
                    }
                }
                if (stopKey) {
                    if (knownConstant && (!stopGap)) {
                        stopKeys[stopKeyNum] = pred.getCompareValue(this);
                        stopKeyNum++;
                    } else {
                        stopGap = true;
                    }
                }
            } else {
                startGap = true;
                stopGap = true;
            }
        }
        int startOperator;
        int stopOperator;
        if (baseTableRestrictionList != null) {
            startOperator = baseTableRestrictionList.startOperator(this);
            stopOperator = baseTableRestrictionList.stopOperator(this);
        } else {
            /*
				** If we're doing a full scan, it doesn't matter what the
				** start and stop operators are.
				*/
            startOperator = ScanController.NA;
            stopOperator = ScanController.NA;
        }
        /*
			** Get a row template for this conglomerate.  For now, just tell
			** it we are using all the columns in the row.
			*/
        DataValueDescriptor[] rowTemplate = getRowTemplate(cd, getBaseCostController());
        /* we prefer index than table scan for concurrency reason, by a small
			 * adjustment on estimated row count.  This affects optimizer's decision
			 * especially when few rows are in table. beetle 5006. This makes sense
			 * since the plan may stay long before we actually check and invalidate it.
			 * And new rows may be inserted before we check and invalidate the plan.
			 * Here we only prefer index that has start/stop key from predicates. Non-
			 * constant start/stop key case is taken care of by selectivity later.
			 */
        long baseRC = (startKeys != null || stopKeys != null) ? baseRowCount() : baseRowCount() + 5;
        scc.getScanCost(currentJoinStrategy.scanCostType(), baseRC, 1, forUpdate(), (FormatableBitSet) null, rowTemplate, startKeys, startOperator, stopKeys, stopOperator, false, 0, costEst);
        /* initialPositionCost is the first part of the index scan cost we get above.
			 * It's the cost of initial positioning/fetch of key.  So it's unrelated to
			 * row count of how many rows we fetch from index.  We extract it here so that
			 * we only multiply selectivity to the other part of index scan cost, which is
			 * nearly linear, to make cost calculation more accurate and fair, especially
			 * compared to the plan of "one row result set" (unique index). beetle 4787.
			 */
        double initialPositionCost = 0.0;
        if (cd.isIndex()) {
            initialPositionCost = scc.getFetchFromFullKeyCost((FormatableBitSet) null, 0);
            /* oneRowResultSetForSomeConglom means there's a unique index, but certainly
				 * not this one since we are here.  If store knows this non-unique index
				 * won't return any row or just returns one row (eg., the predicate is a
				 * comparison with constant or almost empty table), we do minor adjustment
				 * on cost (affecting decision for covering index) and rc (decision for
				 * non-covering). The purpose is favoring unique index. beetle 5006.
				 */
            if (oneRowResultSetForSomeConglom && costEst.rowCount() <= 1) {
                costEst.setCost(costEst.getEstimatedCost() * 2, costEst.rowCount() + 2, costEst.singleScanRowCount() + 2);
            }
        }
        if (optimizerTracingIsOn()) {
            getOptimizerTracer().traceCostOfConglomerateScan(tableNumber, cd, costEst, numExtraFirstColumnPreds, extraFirstColumnSelectivity, numExtraStartStopPreds, extraStartStopSelectivity, startStopPredCount, statStartStopSelectivity, numExtraQualifiers, extraQualifierSelectivity, numExtraNonQualifiers, extraNonQualifierSelectivity);
        }
        /* initial row count is the row count without applying
			   any predicates-- we use this at the end of the routine
			   when we use statistics to recompute the row count.
			*/
        double initialRowCount = costEst.rowCount();
        if (statStartStopSelectivity != 1.0d) {
            /*
				** If statistics exist use the selectivity computed 
				** from the statistics to calculate the cost. 
				** NOTE: we apply this selectivity to the cost as well
				** as both the row counts. In the absence of statistics
				** we only applied the FirstColumnSelectivity to the 
				** cost.
				*/
            costEst.setCost(scanCostAfterSelectivity(costEst.getEstimatedCost(), initialPositionCost, statStartStopSelectivity, oneRowResultSetForSomeConglom), costEst.rowCount() * statStartStopSelectivity, costEst.singleScanRowCount() * statStartStopSelectivity);
            if (optimizerTracingIsOn()) {
                getOptimizerTracer().traceCostIncludingStatsForIndex(costEst, tableNumber);
            }
        } else {
            /*
				** Factor in the extra selectivity on the first column
				** of the conglomerate (see comment above).
				** NOTE: In this case we want to apply the selectivity to both
				** the total row count and singleScanRowCount.
				*/
            if (extraFirstColumnSelectivity != 1.0d) {
                costEst.setCost(scanCostAfterSelectivity(costEst.getEstimatedCost(), initialPositionCost, extraFirstColumnSelectivity, oneRowResultSetForSomeConglom), costEst.rowCount() * extraFirstColumnSelectivity, costEst.singleScanRowCount() * extraFirstColumnSelectivity);
                if (optimizerTracingIsOn()) {
                    getOptimizerTracer().traceCostIncludingExtra1stColumnSelectivity(costEst, tableNumber);
                }
            }
            /* Factor in the extra start/stop selectivity (see comment above).
				 * NOTE: In this case we want to apply the selectivity to both
				 * the row count and singleScanRowCount.
				 */
            if (extraStartStopSelectivity != 1.0d) {
                costEst.setCost(costEst.getEstimatedCost(), costEst.rowCount() * extraStartStopSelectivity, costEst.singleScanRowCount() * extraStartStopSelectivity);
                if (optimizerTracingIsOn()) {
                    getOptimizerTracer().traceCostIncludingExtraStartStop(costEst, tableNumber);
                }
            }
        }
        /* If the start and stop key came from an IN-list "probe predicate"
			 * then we need to adjust the cost estimate.  The probe predicate
			 * is of the form "col = ?" and we currently have the estimated
			 * cost of probing the index a single time for "?".  But with an
			 * IN-list we don't just probe the index once; we're going to
			 * probe it once for every value in the IN-list.  And we are going
			 * to potentially return an additional row (or set of rows) for
			 * each probe.  To account for this "multi-probing" we take the
			 * costEstimate and multiply each of its fields by the size of
			 * the IN-list.
			 *
			 * Note: If the IN-list has duplicate values then this simple
			 * multiplication could give us an elevated cost (because we
			 * only probe the index for each *non-duplicate* value in the
			 * IN-list).  But for now, we're saying that's okay.
			 */
        if (ssKeySourceInList != null) {
            int listSize = ssKeySourceInList.getRightOperandList().size();
            double rc = costEst.rowCount() * listSize;
            double ssrc = costEst.singleScanRowCount() * listSize;
            /* If multiplication by listSize returns more rows than are
				 * in the scan then just use the number of rows in the scan.
				 */
            costEst.setCost(costEst.getEstimatedCost() * listSize, rc > initialRowCount ? initialRowCount : rc, ssrc > initialRowCount ? initialRowCount : ssrc);
        }
        /*
			** Figure out whether to do row locking or table locking.
			**
			** If there are no start/stop predicates, we're doing full
			** conglomerate scans, so do table locking.
			*/
        if (!startStopFound) {
            currAccessPath.setLockMode(TransactionController.MODE_TABLE);
            if (optimizerTracingIsOn()) {
                getOptimizerTracer().traceNoStartStopPosition();
            }
        } else {
            /*
				** Figure out the number of rows touched.  If all the
				** start/stop predicates are constant, the number of
				** rows touched is the number of rows per scan.
				** This is also true for join strategies that scan the
				** inner table only once (like hash join) - we can
				** tell if we have one of those, because
				** multiplyBaseCostByOuterRows() will return false.
				*/
            double rowsTouched = costEst.rowCount();
            if ((!constantStartStop) && currentJoinStrategy.multiplyBaseCostByOuterRows()) {
                /*
					** This is a join where the inner table is scanned
					** more than once, so we have to take the number
					** of outer rows into account.  The formula for this
					** works out as follows:
					**
					**	total rows in table = r
					**  number of rows touched per scan = s
					**  number of outer rows = o
					**  proportion of rows touched per scan = s / r
					**  proportion of rows not touched per scan =
					**										1 - (s / r)
					**  proportion of rows not touched for all scans =
					**									(1 - (s / r)) ** o
					**  proportion of rows touched for all scans =
					**									1 - ((1 - (s / r)) ** o)
					**  total rows touched for all scans =
					**							r * (1 - ((1 - (s / r)) ** o))
					**
					** In doing these calculations, we must be careful not
					** to divide by zero.  This could happen if there are
					** no rows in the table.  In this case, let's do table
					** locking.
					*/
                double r = baseRowCount();
                if (r > 0.0) {
                    double s = costEst.rowCount();
                    double o = outerCost.rowCount();
                    double pRowsNotTouchedPerScan = 1.0 - (s / r);
                    double pRowsNotTouchedAllScans = Math.pow(pRowsNotTouchedPerScan, o);
                    double pRowsTouchedAllScans = 1.0 - pRowsNotTouchedAllScans;
                    double rowsTouchedAllScans = r * pRowsTouchedAllScans;
                    rowsTouched = rowsTouchedAllScans;
                } else {
                    /* See comments in setLockingBasedOnThreshold */
                    rowsTouched = optimizer.tableLockThreshold() + 1;
                }
            }
            setLockingBasedOnThreshold(optimizer, rowsTouched);
        }
        /*
			** If the index isn't covering, add the cost of getting the
			** base row.  Only apply extraFirstColumnSelectivity and extraStartStopSelectivity
			** before we do this, don't apply extraQualifierSelectivity etc.  The
			** reason is that the row count here should be the number of index rows
			** (and hence heap rows) we get, and we need to fetch all those rows, even
			** though later on some of them may be filtered out by other predicates.
			** beetle 4787.
			*/
        if (cd.isIndex() && (!isCoveringIndex(cd))) {
            double singleFetchCost = getBaseCostController().getFetchFromRowLocationCost((FormatableBitSet) null, 0);
            // The number of rows we expect to fetch from the base table.
            double rowsToFetch = costEst.rowCount();
            if (oneRowResultSetForSomeConglom) {
                // DERBY-6011: We know that there is a unique index, and
                // that there are predicates that guarantee that at most
                // one row will be fetched from the unique index. The
                // unique alternative always has 1 as estimated row count
                // (see reference to DERBY-6011 further up in this method),
                // even though it could actually return 0 rows.
                // 
                // If the alternative that's being considered here has
                // expected row count less than 1, it is going to have
                // lower estimated cost for fetching base rows. We prefer
                // unique indexes, as they lock fewer rows and allow more
                // concurrency. Therefore, make sure the cost estimate for
                // this alternative includes at least fetching one row from
                // the base table.
                rowsToFetch = Math.max(1.0d, rowsToFetch);
            }
            cost = singleFetchCost * rowsToFetch;
            costEst.setEstimatedCost(costEst.getEstimatedCost() + cost);
            if (optimizerTracingIsOn()) {
                getOptimizerTracer().traceCostOfNoncoveringIndex(costEst, tableNumber);
            }
        }
        /* Factor in the extra qualifier selectivity (see comment above).
			 * NOTE: In this case we want to apply the selectivity to both
			 * the row count and singleScanRowCount.
			 */
        if (extraQualifierSelectivity != 1.0d) {
            costEst.setCost(costEst.getEstimatedCost(), costEst.rowCount() * extraQualifierSelectivity, costEst.singleScanRowCount() * extraQualifierSelectivity);
            if (optimizerTracingIsOn()) {
                getOptimizerTracer().traceCostIncludingExtraQualifierSelectivity(costEst, tableNumber);
            }
        }
        singleScanRowCount = costEst.singleScanRowCount();
        /*
			** Let the join strategy decide whether the cost of the base
			** scan is a single scan, or a scan per outer row.
			** NOTE: In this case we only want to multiply against the
			** total row count, not the singleScanRowCount.
			** NOTE: Do not multiply row count if we determined that
			** conglomerate is a 1 row result set when costing nested
			** loop.  (eg, we will find at most 1 match when probing
			** the hash table.)
			*/
        double newCost = costEst.getEstimatedCost();
        double rowCnt = costEst.rowCount();
        /*
			** RESOLVE - If there is a unique index on the joining
			** columns, the number of matching rows will equal the
			** number of outer rows, even if we're not considering the
			** unique index for this access path. To figure that out,
			** however, would require an analysis phase at the beginning
			** of optimization. So, we'll always multiply the number
			** of outer rows by the number of rows per scan. This will
			** give us a higher than actual row count when there is
			** such a unique index, which will bias the optimizer toward
			** using the unique index. This is probably OK most of the
			** time, since the optimizer would probably choose the
			** unique index, anyway. But it would be better if the
			** optimizer set the row count properly in this case.
			*/
        if (currentJoinStrategy.multiplyBaseCostByOuterRows()) {
            newCost *= outerCost.rowCount();
        }
        rowCnt *= outerCost.rowCount();
        initialRowCount *= outerCost.rowCount();
        /*
			** If this table can generate at most one row per scan,
			** the maximum row count is the number of outer rows.
			** NOTE: This does not completely take care of the RESOLVE
			** in the above comment, since it will only notice
			** one-row result sets for the current join order.
			*/
        if (oneRowResultSetForSomeConglom) {
            if (outerCost.rowCount() < rowCnt) {
                rowCnt = outerCost.rowCount();
            }
        }
        /*
			** The estimated cost may be too high for indexes, if the
			** estimated row count exceeds the maximum. Only do this
			** if we're not doing a full scan, and the start/stop position
			** is not constant (i.e. we're doing a join on the first column
			** of the index) - the reason being that this is when the
			** cost may be inaccurate.
			*/
        if (cd.isIndex() && startStopFound && (!constantStartStop)) {
            /*
				** Does any table outer to this one have a unique key on
				** a subset of the joining columns? If so, the maximum number
				** of rows that this table can return is the number of rows
				** in this table times the number of times the maximum number
				** of times each key can be repeated.
				*/
            double scanUniquenessFactor = optimizer.uniqueJoinWithOuterTable(baseTableRestrictionList);
            if (scanUniquenessFactor > 0.0) {
                /*
					** A positive uniqueness factor means there is a unique
					** outer join key. The value is the reciprocal of the
					** maximum number of duplicates for each unique key
					** (the duplicates can be caused by other joining tables).
					*/
                double maxRows = ((double) baseRowCount()) / scanUniquenessFactor;
                if (rowCnt > maxRows) {
                    /*
						** The estimated row count is too high. Adjust the
						** estimated cost downwards proportionately to
						** match the maximum number of rows.
						*/
                    newCost *= (maxRows / rowCnt);
                }
            }
        }
        /* The estimated total row count may be too high */
        if (tableUniquenessFactor > 0.0) {
            /*
				** A positive uniqueness factor means there is a unique outer
				** join key. The value is the reciprocal of the maximum number
				** of duplicates for each unique key (the duplicates can be
				** caused by other joining tables).
				*/
            double maxRows = ((double) baseRowCount()) / tableUniquenessFactor;
            if (rowCnt > maxRows) {
                /*
					** The estimated row count is too high. Set it to the
					** maximum row count.
					*/
                rowCnt = maxRows;
            }
        }
        costEst.setCost(newCost, rowCnt, costEst.singleScanRowCount());
        if (optimizerTracingIsOn()) {
            getOptimizerTracer().traceCostOfNScans(tableNumber, outerCost.rowCount(), costEst);
        }
        /*
			** Now figure in the cost of the non-qualifier predicates.
			** existsBaseTables have a row count of 1
			*/
        double rc = -1, src = -1;
        if (existsBaseTable)
            rc = src = 1;
        else // beetle 4787
        if (extraNonQualifierSelectivity != 1.0d) {
            rc = oneRowResultSetForSomeConglom ? costEst.rowCount() : costEst.rowCount() * extraNonQualifierSelectivity;
            src = costEst.singleScanRowCount() * extraNonQualifierSelectivity;
        }
        if (// changed
        rc != -1) {
            costEst.setCost(costEst.getEstimatedCost(), rc, src);
            if (optimizerTracingIsOn()) {
                getOptimizerTracer().traceCostIncludingExtraNonQualifierSelectivity(costEst, tableNumber);
            }
        }
        recomputeRowCount: if (statisticsForTable && !oneRowResultSetForSomeConglom && (statCompositeSelectivity != 1.0d)) {
            /* if we have statistics we should use statistics to calculate 
				   row  count-- if it has been determined that this table 
				   returns one row for some conglomerate then there is no need 
				   to do this recalculation
				*/
            double compositeStatRC = initialRowCount * statCompositeSelectivity;
            if (optimizerTracingIsOn()) {
                getOptimizerTracer().traceCompositeSelectivityFromStatistics(statCompositeSelectivity);
            }
            if (tableUniquenessFactor > 0.0) {
                /* If the row count from the composite statistics
					   comes up more than what the table uniqueness 
					   factor indicates then lets stick with the current
					   row count.
					*/
                if (compositeStatRC > (baseRowCount() * tableUniquenessFactor)) {
                    break recomputeRowCount;
                }
            }
            /* set the row count and the single scan row count
				   to the initialRowCount. initialRowCount is the product
				   of the RC from store * RC of the outerCost.
				   Thus RC = initialRowCount * the selectivity from stats.
				   SingleRC = RC / outerCost.rowCount().
				*/
            costEst.setCost(costEst.getEstimatedCost(), compositeStatRC, (existsBaseTable) ? 1 : compositeStatRC / outerCost.rowCount());
            if (optimizerTracingIsOn()) {
                getOptimizerTracer().traceCostIncludingCompositeSelectivityFromStats(costEst, tableNumber);
            }
        }
    }
    /* Put the base predicates back in the predicate list */
    currentJoinStrategy.putBasePredicates(predList, baseTableRestrictionList);
    return costEst;
}
Also used : CostEstimate(org.apache.derby.iapi.sql.compile.CostEstimate) JoinStrategy(org.apache.derby.iapi.sql.compile.JoinStrategy) OptimizablePredicate(org.apache.derby.iapi.sql.compile.OptimizablePredicate) IndexRowGenerator(org.apache.derby.iapi.sql.dictionary.IndexRowGenerator) OptimizablePredicateList(org.apache.derby.iapi.sql.compile.OptimizablePredicateList) StoreCostController(org.apache.derby.iapi.store.access.StoreCostController) OptimizablePredicate(org.apache.derby.iapi.sql.compile.OptimizablePredicate) AccessPath(org.apache.derby.iapi.sql.compile.AccessPath) FormatableBitSet(org.apache.derby.iapi.services.io.FormatableBitSet) DataValueDescriptor(org.apache.derby.iapi.types.DataValueDescriptor)

Example 2 with StoreCostController

use of org.apache.derby.iapi.store.access.StoreCostController in project derby by apache.

the class CompilerContextImpl method closeStoreCostControllers.

/**
 */
private void closeStoreCostControllers() {
    Iterator<StoreCostController> it = storeCostControllers.values().iterator();
    while (it.hasNext()) {
        StoreCostController scc = it.next();
        try {
            scc.close();
        } catch (StandardException se) {
        }
    }
    storeCostControllers.clear();
}
Also used : StandardException(org.apache.derby.shared.common.error.StandardException) StoreCostController(org.apache.derby.iapi.store.access.StoreCostController)

Example 3 with StoreCostController

use of org.apache.derby.iapi.store.access.StoreCostController in project derby by apache.

the class CompilerContextImpl method getStoreCostController.

/**
 * @see CompilerContext#getStoreCostController
 *
 * @exception StandardException		Thrown on error
 */
public StoreCostController getStoreCostController(long conglomerateNumber) throws StandardException {
    Long conglomNum = conglomerateNumber;
    // Try to find the given conglomerate number among the already
    // opened conglomerates.
    StoreCostController retval = storeCostControllers.get(conglomNum);
    if (retval == null) {
        // Not found, so get a StoreCostController from the store.
        retval = lcc.getTransactionCompile().openStoreCost(conglomerateNumber);
        storeCostControllers.put(conglomNum, retval);
    }
    return retval;
}
Also used : StoreCostController(org.apache.derby.iapi.store.access.StoreCostController)

Example 4 with StoreCostController

use of org.apache.derby.iapi.store.access.StoreCostController in project derby by apache.

the class RAMTransaction method openStoreCost.

/**
 * Return an open StoreCostController for the given conglomid.
 * <p>
 * Return an open StoreCostController which can be used to ask about
 * the estimated row counts and costs of ScanController and
 * ConglomerateController operations, on the given conglomerate.
 * <p>
 *
 * @return The open StoreCostController.
 *
 * @param conglomId The identifier of the conglomerate to open.
 *
 * @exception  StandardException  Standard exception policy.
 *
 * @see StoreCostController
 */
public StoreCostController openStoreCost(long conglomId) throws StandardException {
    // Find the conglomerate.
    Conglomerate conglom = findExistingConglomerate(conglomId);
    // Get a scan controller.
    StoreCostController scc = conglom.openStoreCost(this, rawtran);
    return (scc);
}
Also used : StoreCostController(org.apache.derby.iapi.store.access.StoreCostController) Conglomerate(org.apache.derby.iapi.store.access.conglomerate.Conglomerate)

Example 5 with StoreCostController

use of org.apache.derby.iapi.store.access.StoreCostController in project derby by apache.

the class FromBaseTable method baseRowCount.

private long baseRowCount() throws StandardException {
    if (!gotRowCount) {
        StoreCostController scc = getBaseCostController();
        rowCount = scc.getEstimatedRowCount();
        gotRowCount = true;
    }
    return rowCount;
}
Also used : StoreCostController(org.apache.derby.iapi.store.access.StoreCostController)

Aggregations

StoreCostController (org.apache.derby.iapi.store.access.StoreCostController)5 FormatableBitSet (org.apache.derby.iapi.services.io.FormatableBitSet)1 AccessPath (org.apache.derby.iapi.sql.compile.AccessPath)1 CostEstimate (org.apache.derby.iapi.sql.compile.CostEstimate)1 JoinStrategy (org.apache.derby.iapi.sql.compile.JoinStrategy)1 OptimizablePredicate (org.apache.derby.iapi.sql.compile.OptimizablePredicate)1 OptimizablePredicateList (org.apache.derby.iapi.sql.compile.OptimizablePredicateList)1 IndexRowGenerator (org.apache.derby.iapi.sql.dictionary.IndexRowGenerator)1 Conglomerate (org.apache.derby.iapi.store.access.conglomerate.Conglomerate)1 DataValueDescriptor (org.apache.derby.iapi.types.DataValueDescriptor)1 StandardException (org.apache.derby.shared.common.error.StandardException)1