use of org.apache.derby.iapi.sql.compile.JoinStrategy in project derby by apache.
the class HashJoinStrategy method getScanArgs.
/**
* @see JoinStrategy#getScanArgs
*
* @exception StandardException Thrown on error
*/
public int getScanArgs(TransactionController tc, MethodBuilder mb, Optimizable innerTable, OptimizablePredicateList storeRestrictionList, OptimizablePredicateList nonStoreRestrictionList, ExpressionClassBuilderInterface acbi, int bulkFetch, int resultRowTemplate, int colRefItem, int indexColItem, int lockMode, boolean tableLocked, int isolationLevel, int maxMemoryPerTable, boolean genInListVals) throws StandardException {
/* We do not currently support IN-list "multi-probing" for hash scans
* (though we could do so in the future). So if we're doing a hash
* join then we shouldn't have any IN-list probe predicates in the
* store restriction list at this point. The reason is that, in the
* absence of proper multi-probing logic, such predicates would act
* as restrictions on the rows read from disk. That would be wrong
* because a probe predicate is of the form "col = <val>" where <val>
* is the first value in the IN-list. Enforcement of that restriction
* would lead to incorrect results--we need to return all rows having
* any value that appears in the IN-list, not just those rows matching
* the first value. Checks elsewhere in the code should ensure that
* no probe predicates have made it this far, but if we're running in
* SANE mode it doesn't hurt to verify.
*/
if (SanityManager.DEBUG) {
for (int i = storeRestrictionList.size() - 1; i >= 0; i--) {
Predicate pred = (Predicate) storeRestrictionList.getOptPredicate(i);
if (pred.isInListProbePredicate()) {
SanityManager.THROWASSERT("Found IN-list probing " + "(" + pred.binaryRelOpColRefsToString() + ") while generating HASH join, which should " + "not happen.");
}
}
}
ExpressionClassBuilder acb = (ExpressionClassBuilder) acbi;
fillInScanArgs1(tc, mb, innerTable, storeRestrictionList, acb, resultRowTemplate);
nonStoreRestrictionList.generateQualifiers(acb, mb, innerTable, true);
mb.push(innerTable.initialCapacity());
mb.push(innerTable.loadFactor());
mb.push(innerTable.maxCapacity((JoinStrategy) this, maxMemoryPerTable));
/* Get the hash key columns and wrap them in a formattable */
int[] hashKeyColumns = innerTable.hashKeyColumns();
FormatableIntHolder[] fihArray = FormatableIntHolder.getFormatableIntHolders(hashKeyColumns);
FormatableArrayHolder hashKeyHolder = new FormatableArrayHolder(fihArray);
int hashKeyItem = acb.addItem(hashKeyHolder);
mb.push(hashKeyItem);
fillInScanArgs2(mb, innerTable, bulkFetch, colRefItem, indexColItem, lockMode, tableLocked, isolationLevel);
return 28;
}
use of org.apache.derby.iapi.sql.compile.JoinStrategy in project derby by apache.
the class OptimizerImpl method getJoinStrategy.
/**
* @see Optimizer#getJoinStrategy
*/
public JoinStrategy getJoinStrategy(String whichStrategy) {
JoinStrategy retval = null;
String upperValue = StringUtil.SQLToUpperCase(whichStrategy);
for (int i = 0; i < joinStrategies.length; i++) {
if (upperValue.equals(joinStrategies[i].getName())) {
retval = joinStrategies[i];
}
}
return retval;
}
use of org.apache.derby.iapi.sql.compile.JoinStrategy in project derby by apache.
the class FromBaseTable method generateResultSet.
/**
* Generation on a FromBaseTable for a SELECT. This logic was separated
* out so that it could be shared with PREPARE SELECT FILTER.
*
* @param acb The ExpressionClassBuilder for the class being built
* @param mb The execute() method to be built
*
* @exception StandardException Thrown on error
*/
@Override
void generateResultSet(ExpressionClassBuilder acb, MethodBuilder mb) throws StandardException {
/* We must have been a best conglomerate descriptor here */
if (SanityManager.DEBUG)
SanityManager.ASSERT(getTrulyTheBestAccessPath().getConglomerateDescriptor() != null);
/* Get the next ResultSet #, so that we can number this ResultSetNode, its
* ResultColumnList and ResultSet.
*/
assignResultSetNumber();
/*
** If we are doing a special scan to get the last row
** of an index, generate it separately.
*/
if (specialMaxScan) {
generateMaxSpecialResultSet(acb, mb);
return;
}
/*
** If we are doing a special distinct scan, generate
** it separately.
*/
if (distinctScan) {
generateDistinctScan(acb, mb);
return;
}
if (raDependentScan) {
generateRefActionDependentTableScan(acb, mb);
return;
}
JoinStrategy trulyTheBestJoinStrategy = getTrulyTheBestAccessPath().getJoinStrategy();
// the table scan generator is what we return
acb.pushGetResultSetFactoryExpression(mb);
int nargs = getScanArguments(acb, mb);
mb.callMethod(VMOpcode.INVOKEINTERFACE, (String) null, trulyTheBestJoinStrategy.resultSetMethodName((bulkFetch != UNSET), multiProbing, validatingCheckConstraint), ClassName.NoPutResultSet, nargs);
/* If this table is the target of an update or a delete, then we must
* wrap the Expression up in an assignment expression before
* returning.
* NOTE - scanExpress is a ResultSet. We will need to cast it to the
* appropriate subclass.
* For example, for a DELETE, instead of returning a call to the
* ResultSetFactory, we will generate and return:
* this.SCANRESULTSET = (cast to appropriate ResultSet type)
* The outer cast back to ResultSet is needed so that
* we invoke the appropriate method.
* (call to the ResultSetFactory)
*/
if ((updateOrDelete == UPDATE) || (updateOrDelete == DELETE)) {
mb.cast(ClassName.CursorResultSet);
mb.putField(acb.getRowLocationScanResultSetName(), ClassName.CursorResultSet);
mb.cast(ClassName.NoPutResultSet);
}
}
use of org.apache.derby.iapi.sql.compile.JoinStrategy in project derby by apache.
the class FromBaseTable method estimateCost.
/**
* <p>
* Estimate the cost of scanning this {@code FromBaseTable} using the
* given predicate list with the given conglomerate.
* </p>
*
* <p>
* If the table contains little data, the cost estimate might be adjusted
* to make it more likely that an index scan will be preferred to a table
* scan, and a unique index will be preferred to a non-unique index. Even
* though such a plan may be slightly suboptimal when seen in isolation,
* using indexes, unique indexes in particular, needs fewer locks and
* allows more concurrency.
* </p>
*
* @see org.apache.derby.iapi.sql.compile.Optimizable#estimateCost
*
* @exception StandardException Thrown on error
*/
@Override
public CostEstimate estimateCost(OptimizablePredicateList predList, ConglomerateDescriptor cd, CostEstimate outerCost, Optimizer optimizer, RowOrdering rowOrdering) throws StandardException {
double cost;
boolean statisticsForTable = false;
boolean statisticsForConglomerate = false;
/* unknownPredicateList contains all predicates whose effect on
* cost/selectivity can't be calculated by the store.
*/
PredicateList unknownPredicateList = null;
if (optimizer.useStatistics() && predList != null) {
/* if user has specified that we don't use statistics,
pretend that statistics don't exist.
*/
statisticsForConglomerate = tableDescriptor.statisticsExist(cd);
statisticsForTable = tableDescriptor.statisticsExist(null);
unknownPredicateList = new PredicateList(getContextManager());
predList.copyPredicatesToOtherList(unknownPredicateList);
// their statistics need to get updated.
if (!hasCheckedIndexStats) {
hasCheckedIndexStats = true;
// daemon if that's the only index on the table.
if (qualifiesForStatisticsUpdateCheck(tableDescriptor)) {
tableDescriptor.markForIndexStatsUpdate(baseRowCount());
}
}
}
AccessPath currAccessPath = getCurrentAccessPath();
JoinStrategy currentJoinStrategy = currAccessPath.getJoinStrategy();
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceEstimatingCostOfConglomerate(cd, tableNumber);
}
/* Get the uniqueness factory for later use (see below) */
double tableUniquenessFactor = optimizer.uniqueJoinWithOuterTable(predList);
boolean oneRowResultSetForSomeConglom = isOneRowResultSet(predList);
/* Get the predicates that can be used for scanning the base table */
baseTableRestrictionList.removeAllElements();
currentJoinStrategy.getBasePredicates(predList, baseTableRestrictionList, this);
/* RESOLVE: Need to figure out how to cache the StoreCostController */
StoreCostController scc = getStoreCostController(cd);
CostEstimate costEst = getScratchCostEstimate(optimizer);
/* Does the conglomerate match at most one row? */
if (isOneRowResultSet(cd, baseTableRestrictionList)) {
/*
** Tell the RowOrdering that this optimizable is always ordered.
** It will figure out whether it is really always ordered in the
** context of the outer tables and their orderings.
*/
rowOrdering.optimizableAlwaysOrdered(this);
singleScanRowCount = 1.0;
/* Yes, the cost is to fetch exactly one row */
// RESOLVE: NEED TO FIGURE OUT HOW TO GET REFERENCED COLUMN LIST,
// FIELD STATES, AND ACCESS TYPE
cost = scc.getFetchFromFullKeyCost((FormatableBitSet) null, 0);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceSingleMatchedRowCost(cost, tableNumber);
}
costEst.setCost(cost, 1.0d, 1.0d);
/*
** Let the join strategy decide whether the cost of the base
** scan is a single scan, or a scan per outer row.
** NOTE: The multiplication should only be done against the
** total row count, not the singleScanRowCount.
*/
double newCost = costEst.getEstimatedCost();
if (currentJoinStrategy.multiplyBaseCostByOuterRows()) {
newCost *= outerCost.rowCount();
}
costEst.setCost(newCost, costEst.rowCount() * outerCost.rowCount(), costEst.singleScanRowCount());
/*
** Choose the lock mode. If the start/stop conditions are
** constant, choose row locking, because we will always match
** the same row. If they are not constant (i.e. they include
** a join), we decide whether to do row locking based on
** the total number of rows for the life of the query.
*/
boolean constantStartStop = true;
for (int i = 0; i < predList.size(); i++) {
OptimizablePredicate pred = predList.getOptPredicate(i);
/*
** The predicates are in index order, so the start and
** stop keys should be first.
*/
if (!(pred.isStartKey() || pred.isStopKey())) {
break;
}
/* Stop when we've found a join */
if (!pred.getReferencedMap().hasSingleBitSet()) {
constantStartStop = false;
break;
}
}
if (constantStartStop) {
currAccessPath.setLockMode(TransactionController.MODE_RECORD);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceConstantStartStopPositions();
}
} else {
setLockingBasedOnThreshold(optimizer, costEst.rowCount());
}
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostOfNScans(tableNumber, outerCost.rowCount(), costEst);
}
/* Add in cost of fetching base row for non-covering index */
if (cd.isIndex() && (!isCoveringIndex(cd))) {
double singleFetchCost = getBaseCostController().getFetchFromRowLocationCost((FormatableBitSet) null, 0);
// The estimated row count is always 1 here, although the
// index scan may actually return 0 rows, depending on whether
// or not the predicates match a key. It is assumed that a
// match is more likely than a miss, hence the row count is 1.
// Note (DERBY-6011): Alternative (non-unique) indexes may come
// up with row counts lower than 1 because they multiply with
// the selectivity, especially if the table is almost empty.
// This makes the optimizer prefer non-unique indexes if there
// are not so many rows in the table. We still want to use the
// unique index in that case, as the performance difference
// between the different scans on a small table is small, and
// the unique index is likely to lock fewer rows and reduce
// the chance of deadlocks. Therefore, we compensate by
// making the row count at least 1 for the non-unique index.
// See reference to DERBY-6011 further down in this method.
cost = singleFetchCost * costEst.rowCount();
costEst.setEstimatedCost(costEst.getEstimatedCost() + cost);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceNonCoveringIndexCost(cost, tableNumber);
}
}
} else {
/* Conglomerate might match more than one row */
/*
** Some predicates are good for start/stop, but we don't know
** the values they are being compared to at this time, so we
** estimate their selectivity in language rather than ask the
** store about them . The predicates on the first column of
** the conglomerate reduce the number of pages and rows scanned.
** The predicates on columns after the first reduce the number
** of rows scanned, but have a much smaller effect on the number
** of pages scanned, so we keep track of these selectivities in
** two separate variables: extraFirstColumnSelectivity and
** extraStartStopSelectivity. (Theoretically, we could try to
** figure out the effect of predicates after the first column
** on the number of pages scanned, but it's too hard, so we
** use these predicates only to reduce the estimated number of
** rows. For comparisons with known values, though, the store
** can figure out exactly how many rows and pages are scanned.)
**
** Other predicates are not good for start/stop. We keep track
** of their selectvities separately, because these limit the
** number of rows, but not the number of pages, and so need to
** be factored into the row count but not into the cost.
** These selectivities are factored into extraQualifierSelectivity.
**
** statStartStopSelectivity (using statistics) represents the
** selectivity of start/stop predicates that can be used to scan
** the index. If no statistics exist for the conglomerate then
** the value of this variable remains at 1.0
**
** statCompositeSelectivity (using statistics) represents the
** selectivity of all the predicates (including NonBaseTable
** predicates). This represents the most educated guess [among
** all the wild surmises in this routine] as to the number
** of rows that will be returned from this joinNode.
** If no statistics exist on the table or no statistics at all
** can be found to satisfy the predicates at this join opertor,
** then statCompositeSelectivity is left initialized at 1.0
*/
double extraFirstColumnSelectivity = 1.0d;
double extraStartStopSelectivity = 1.0d;
double extraQualifierSelectivity = 1.0d;
double extraNonQualifierSelectivity = 1.0d;
double statStartStopSelectivity = 1.0d;
double statCompositeSelectivity = 1.0d;
int numExtraFirstColumnPreds = 0;
int numExtraStartStopPreds = 0;
int numExtraQualifiers = 0;
int numExtraNonQualifiers = 0;
/*
** It is possible for something to be a start or stop predicate
** without it being possible to use it as a key for cost estimation.
** For example, with an index on (c1, c2), and the predicate
** c1 = othertable.c3 and c2 = 1, the comparison on c1 is with
** an unknown value, so we can't pass it to the store. This means
** we can't pass the comparison on c2 to the store, either.
**
** The following booleans keep track of whether we have seen
** gaps in the keys we can pass to the store.
*/
boolean startGap = false;
boolean stopGap = false;
boolean seenFirstColumn = false;
/*
** We need to figure out the number of rows touched to decide
** whether to use row locking or table locking. If the start/stop
** conditions are constant (i.e. no joins), the number of rows
** touched is the number of rows per scan. But if the start/stop
** conditions contain a join, the number of rows touched must
** take the number of outer rows into account.
*/
boolean constantStartStop = true;
boolean startStopFound = false;
/* Count the number of start and stop keys */
int startKeyNum = 0;
int stopKeyNum = 0;
OptimizablePredicate pred;
int predListSize;
if (predList != null)
predListSize = baseTableRestrictionList.size();
else
predListSize = 0;
int startStopPredCount = 0;
ColumnReference firstColumn = null;
for (int i = 0; i < predListSize; i++) {
pred = baseTableRestrictionList.getOptPredicate(i);
boolean startKey = pred.isStartKey();
boolean stopKey = pred.isStopKey();
if (startKey || stopKey) {
startStopFound = true;
if (!pred.getReferencedMap().hasSingleBitSet()) {
constantStartStop = false;
}
boolean knownConstant = pred.compareWithKnownConstant(this, true);
if (startKey) {
if (knownConstant && (!startGap)) {
startKeyNum++;
if (unknownPredicateList != null)
unknownPredicateList.removeOptPredicate(pred);
} else {
startGap = true;
}
}
if (stopKey) {
if (knownConstant && (!stopGap)) {
stopKeyNum++;
if (unknownPredicateList != null)
unknownPredicateList.removeOptPredicate(pred);
} else {
stopGap = true;
}
}
/* If either we are seeing startGap or stopGap because start/stop key is
* comparison with non-constant, we should multiply the selectivity to
* extraFirstColumnSelectivity. Beetle 4787.
*/
if (startGap || stopGap) {
// Don't include redundant join predicates in selectivity calculations
if (baseTableRestrictionList.isRedundantPredicate(i))
continue;
if (startKey && stopKey)
startStopPredCount++;
if (pred.getIndexPosition() == 0) {
extraFirstColumnSelectivity *= pred.selectivity(this);
if (!seenFirstColumn) {
ValueNode relNode = ((Predicate) pred).getAndNode().getLeftOperand();
if (relNode instanceof BinaryRelationalOperatorNode)
firstColumn = ((BinaryRelationalOperatorNode) relNode).getColumnOperand(this);
seenFirstColumn = true;
}
} else {
extraStartStopSelectivity *= pred.selectivity(this);
numExtraStartStopPreds++;
}
}
} else {
// Don't include redundant join predicates in selectivity calculations
if (baseTableRestrictionList.isRedundantPredicate(i)) {
continue;
}
/* If we have "like" predicate on the first index column, it is more likely
* to have a smaller range than "between", so we apply extra selectivity 0.2
* here. beetle 4387, 4787.
*/
if (pred instanceof Predicate) {
ValueNode leftOpnd = ((Predicate) pred).getAndNode().getLeftOperand();
if (firstColumn != null && leftOpnd instanceof LikeEscapeOperatorNode) {
LikeEscapeOperatorNode likeNode = (LikeEscapeOperatorNode) leftOpnd;
if (likeNode.getLeftOperand().requiresTypeFromContext()) {
ValueNode receiver = ((TernaryOperatorNode) likeNode).getReceiver();
if (receiver instanceof ColumnReference) {
ColumnReference cr = (ColumnReference) receiver;
if (cr.getTableNumber() == firstColumn.getTableNumber() && cr.getColumnNumber() == firstColumn.getColumnNumber())
extraFirstColumnSelectivity *= 0.2;
}
}
}
}
if (pred.isQualifier()) {
extraQualifierSelectivity *= pred.selectivity(this);
numExtraQualifiers++;
} else {
extraNonQualifierSelectivity *= pred.selectivity(this);
numExtraNonQualifiers++;
}
/*
** Strictly speaking, it shouldn't be necessary to
** indicate a gap here, since there should be no more
** start/stop predicates, but let's do it, anyway.
*/
startGap = true;
stopGap = true;
}
}
if (unknownPredicateList != null) {
statCompositeSelectivity = unknownPredicateList.selectivity(this);
if (statCompositeSelectivity == -1.0d)
statCompositeSelectivity = 1.0d;
}
if (seenFirstColumn && (startStopPredCount > 0)) {
if (statisticsForConglomerate) {
statStartStopSelectivity = tableDescriptor.selectivityForConglomerate(cd, startStopPredCount);
} else if (cd.isIndex()) {
// DERBY-3790 (Investigate if request for update
// statistics can be skipped for certain kind of
// indexes, one instance may be unique indexes based
// on one column.) But as found in DERBY-6045 (in list
// multi-probe by primary key not chosen on tables with
// >256 rows), even though we do not keep the
// statistics for single-column unique indexes, we
// should improve the selectivity of such an index
// when the index is being considered by the optimizer.
IndexRowGenerator irg = cd.getIndexDescriptor();
if (irg.isUnique() && irg.numberOfOrderedColumns() == 1 && startStopPredCount == 1) {
statStartStopSelectivity = (1 / (double) baseRowCount());
}
}
}
/*
** Factor the non-base-table predicates into the extra
** non-qualifier selectivity, since these will restrict the
** number of rows, but not the cost.
*/
extraNonQualifierSelectivity *= currentJoinStrategy.nonBasePredicateSelectivity(this, predList);
/* Create the start and stop key arrays, and fill them in */
DataValueDescriptor[] startKeys;
DataValueDescriptor[] stopKeys;
if (startKeyNum > 0)
startKeys = new DataValueDescriptor[startKeyNum];
else
startKeys = null;
if (stopKeyNum > 0)
stopKeys = new DataValueDescriptor[stopKeyNum];
else
stopKeys = null;
startKeyNum = 0;
stopKeyNum = 0;
startGap = false;
stopGap = false;
/* If we have a probe predicate that is being used as a start/stop
* key then ssKeySourceInList will hold the InListOperatorNode
* from which the probe predicate was built.
*/
InListOperatorNode ssKeySourceInList = null;
for (int i = 0; i < predListSize; i++) {
pred = baseTableRestrictionList.getOptPredicate(i);
boolean startKey = pred.isStartKey();
boolean stopKey = pred.isStopKey();
if (startKey || stopKey) {
/* A probe predicate is only useful if it can be used as
* as a start/stop key for _first_ column in an index
* (i.e. if the column position is 0). That said, we only
* allow a single start/stop key per column position in
* the index (see PredicateList.orderUsefulPredicates()).
* Those two facts combined mean that we should never have
* more than one probe predicate start/stop key for a given
* conglomerate.
*/
if (SanityManager.DEBUG) {
if ((ssKeySourceInList != null) && ((Predicate) pred).isInListProbePredicate()) {
SanityManager.THROWASSERT("Found multiple probe predicate start/stop keys" + " for conglomerate '" + cd.getConglomerateName() + "' when at most one was expected.");
}
}
/* By passing "true" in the next line we indicate that we
* should only retrieve the underlying InListOpNode *if*
* the predicate is a "probe predicate".
*/
ssKeySourceInList = ((Predicate) pred).getSourceInList(true);
boolean knownConstant = pred.compareWithKnownConstant(this, true);
if (startKey) {
if (knownConstant && (!startGap)) {
startKeys[startKeyNum] = pred.getCompareValue(this);
startKeyNum++;
} else {
startGap = true;
}
}
if (stopKey) {
if (knownConstant && (!stopGap)) {
stopKeys[stopKeyNum] = pred.getCompareValue(this);
stopKeyNum++;
} else {
stopGap = true;
}
}
} else {
startGap = true;
stopGap = true;
}
}
int startOperator;
int stopOperator;
if (baseTableRestrictionList != null) {
startOperator = baseTableRestrictionList.startOperator(this);
stopOperator = baseTableRestrictionList.stopOperator(this);
} else {
/*
** If we're doing a full scan, it doesn't matter what the
** start and stop operators are.
*/
startOperator = ScanController.NA;
stopOperator = ScanController.NA;
}
/*
** Get a row template for this conglomerate. For now, just tell
** it we are using all the columns in the row.
*/
DataValueDescriptor[] rowTemplate = getRowTemplate(cd, getBaseCostController());
/* we prefer index than table scan for concurrency reason, by a small
* adjustment on estimated row count. This affects optimizer's decision
* especially when few rows are in table. beetle 5006. This makes sense
* since the plan may stay long before we actually check and invalidate it.
* And new rows may be inserted before we check and invalidate the plan.
* Here we only prefer index that has start/stop key from predicates. Non-
* constant start/stop key case is taken care of by selectivity later.
*/
long baseRC = (startKeys != null || stopKeys != null) ? baseRowCount() : baseRowCount() + 5;
scc.getScanCost(currentJoinStrategy.scanCostType(), baseRC, 1, forUpdate(), (FormatableBitSet) null, rowTemplate, startKeys, startOperator, stopKeys, stopOperator, false, 0, costEst);
/* initialPositionCost is the first part of the index scan cost we get above.
* It's the cost of initial positioning/fetch of key. So it's unrelated to
* row count of how many rows we fetch from index. We extract it here so that
* we only multiply selectivity to the other part of index scan cost, which is
* nearly linear, to make cost calculation more accurate and fair, especially
* compared to the plan of "one row result set" (unique index). beetle 4787.
*/
double initialPositionCost = 0.0;
if (cd.isIndex()) {
initialPositionCost = scc.getFetchFromFullKeyCost((FormatableBitSet) null, 0);
/* oneRowResultSetForSomeConglom means there's a unique index, but certainly
* not this one since we are here. If store knows this non-unique index
* won't return any row or just returns one row (eg., the predicate is a
* comparison with constant or almost empty table), we do minor adjustment
* on cost (affecting decision for covering index) and rc (decision for
* non-covering). The purpose is favoring unique index. beetle 5006.
*/
if (oneRowResultSetForSomeConglom && costEst.rowCount() <= 1) {
costEst.setCost(costEst.getEstimatedCost() * 2, costEst.rowCount() + 2, costEst.singleScanRowCount() + 2);
}
}
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostOfConglomerateScan(tableNumber, cd, costEst, numExtraFirstColumnPreds, extraFirstColumnSelectivity, numExtraStartStopPreds, extraStartStopSelectivity, startStopPredCount, statStartStopSelectivity, numExtraQualifiers, extraQualifierSelectivity, numExtraNonQualifiers, extraNonQualifierSelectivity);
}
/* initial row count is the row count without applying
any predicates-- we use this at the end of the routine
when we use statistics to recompute the row count.
*/
double initialRowCount = costEst.rowCount();
if (statStartStopSelectivity != 1.0d) {
/*
** If statistics exist use the selectivity computed
** from the statistics to calculate the cost.
** NOTE: we apply this selectivity to the cost as well
** as both the row counts. In the absence of statistics
** we only applied the FirstColumnSelectivity to the
** cost.
*/
costEst.setCost(scanCostAfterSelectivity(costEst.getEstimatedCost(), initialPositionCost, statStartStopSelectivity, oneRowResultSetForSomeConglom), costEst.rowCount() * statStartStopSelectivity, costEst.singleScanRowCount() * statStartStopSelectivity);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingStatsForIndex(costEst, tableNumber);
}
} else {
/*
** Factor in the extra selectivity on the first column
** of the conglomerate (see comment above).
** NOTE: In this case we want to apply the selectivity to both
** the total row count and singleScanRowCount.
*/
if (extraFirstColumnSelectivity != 1.0d) {
costEst.setCost(scanCostAfterSelectivity(costEst.getEstimatedCost(), initialPositionCost, extraFirstColumnSelectivity, oneRowResultSetForSomeConglom), costEst.rowCount() * extraFirstColumnSelectivity, costEst.singleScanRowCount() * extraFirstColumnSelectivity);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingExtra1stColumnSelectivity(costEst, tableNumber);
}
}
/* Factor in the extra start/stop selectivity (see comment above).
* NOTE: In this case we want to apply the selectivity to both
* the row count and singleScanRowCount.
*/
if (extraStartStopSelectivity != 1.0d) {
costEst.setCost(costEst.getEstimatedCost(), costEst.rowCount() * extraStartStopSelectivity, costEst.singleScanRowCount() * extraStartStopSelectivity);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingExtraStartStop(costEst, tableNumber);
}
}
}
/* If the start and stop key came from an IN-list "probe predicate"
* then we need to adjust the cost estimate. The probe predicate
* is of the form "col = ?" and we currently have the estimated
* cost of probing the index a single time for "?". But with an
* IN-list we don't just probe the index once; we're going to
* probe it once for every value in the IN-list. And we are going
* to potentially return an additional row (or set of rows) for
* each probe. To account for this "multi-probing" we take the
* costEstimate and multiply each of its fields by the size of
* the IN-list.
*
* Note: If the IN-list has duplicate values then this simple
* multiplication could give us an elevated cost (because we
* only probe the index for each *non-duplicate* value in the
* IN-list). But for now, we're saying that's okay.
*/
if (ssKeySourceInList != null) {
int listSize = ssKeySourceInList.getRightOperandList().size();
double rc = costEst.rowCount() * listSize;
double ssrc = costEst.singleScanRowCount() * listSize;
/* If multiplication by listSize returns more rows than are
* in the scan then just use the number of rows in the scan.
*/
costEst.setCost(costEst.getEstimatedCost() * listSize, rc > initialRowCount ? initialRowCount : rc, ssrc > initialRowCount ? initialRowCount : ssrc);
}
/*
** Figure out whether to do row locking or table locking.
**
** If there are no start/stop predicates, we're doing full
** conglomerate scans, so do table locking.
*/
if (!startStopFound) {
currAccessPath.setLockMode(TransactionController.MODE_TABLE);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceNoStartStopPosition();
}
} else {
/*
** Figure out the number of rows touched. If all the
** start/stop predicates are constant, the number of
** rows touched is the number of rows per scan.
** This is also true for join strategies that scan the
** inner table only once (like hash join) - we can
** tell if we have one of those, because
** multiplyBaseCostByOuterRows() will return false.
*/
double rowsTouched = costEst.rowCount();
if ((!constantStartStop) && currentJoinStrategy.multiplyBaseCostByOuterRows()) {
/*
** This is a join where the inner table is scanned
** more than once, so we have to take the number
** of outer rows into account. The formula for this
** works out as follows:
**
** total rows in table = r
** number of rows touched per scan = s
** number of outer rows = o
** proportion of rows touched per scan = s / r
** proportion of rows not touched per scan =
** 1 - (s / r)
** proportion of rows not touched for all scans =
** (1 - (s / r)) ** o
** proportion of rows touched for all scans =
** 1 - ((1 - (s / r)) ** o)
** total rows touched for all scans =
** r * (1 - ((1 - (s / r)) ** o))
**
** In doing these calculations, we must be careful not
** to divide by zero. This could happen if there are
** no rows in the table. In this case, let's do table
** locking.
*/
double r = baseRowCount();
if (r > 0.0) {
double s = costEst.rowCount();
double o = outerCost.rowCount();
double pRowsNotTouchedPerScan = 1.0 - (s / r);
double pRowsNotTouchedAllScans = Math.pow(pRowsNotTouchedPerScan, o);
double pRowsTouchedAllScans = 1.0 - pRowsNotTouchedAllScans;
double rowsTouchedAllScans = r * pRowsTouchedAllScans;
rowsTouched = rowsTouchedAllScans;
} else {
/* See comments in setLockingBasedOnThreshold */
rowsTouched = optimizer.tableLockThreshold() + 1;
}
}
setLockingBasedOnThreshold(optimizer, rowsTouched);
}
/*
** If the index isn't covering, add the cost of getting the
** base row. Only apply extraFirstColumnSelectivity and extraStartStopSelectivity
** before we do this, don't apply extraQualifierSelectivity etc. The
** reason is that the row count here should be the number of index rows
** (and hence heap rows) we get, and we need to fetch all those rows, even
** though later on some of them may be filtered out by other predicates.
** beetle 4787.
*/
if (cd.isIndex() && (!isCoveringIndex(cd))) {
double singleFetchCost = getBaseCostController().getFetchFromRowLocationCost((FormatableBitSet) null, 0);
// The number of rows we expect to fetch from the base table.
double rowsToFetch = costEst.rowCount();
if (oneRowResultSetForSomeConglom) {
// DERBY-6011: We know that there is a unique index, and
// that there are predicates that guarantee that at most
// one row will be fetched from the unique index. The
// unique alternative always has 1 as estimated row count
// (see reference to DERBY-6011 further up in this method),
// even though it could actually return 0 rows.
//
// If the alternative that's being considered here has
// expected row count less than 1, it is going to have
// lower estimated cost for fetching base rows. We prefer
// unique indexes, as they lock fewer rows and allow more
// concurrency. Therefore, make sure the cost estimate for
// this alternative includes at least fetching one row from
// the base table.
rowsToFetch = Math.max(1.0d, rowsToFetch);
}
cost = singleFetchCost * rowsToFetch;
costEst.setEstimatedCost(costEst.getEstimatedCost() + cost);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostOfNoncoveringIndex(costEst, tableNumber);
}
}
/* Factor in the extra qualifier selectivity (see comment above).
* NOTE: In this case we want to apply the selectivity to both
* the row count and singleScanRowCount.
*/
if (extraQualifierSelectivity != 1.0d) {
costEst.setCost(costEst.getEstimatedCost(), costEst.rowCount() * extraQualifierSelectivity, costEst.singleScanRowCount() * extraQualifierSelectivity);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingExtraQualifierSelectivity(costEst, tableNumber);
}
}
singleScanRowCount = costEst.singleScanRowCount();
/*
** Let the join strategy decide whether the cost of the base
** scan is a single scan, or a scan per outer row.
** NOTE: In this case we only want to multiply against the
** total row count, not the singleScanRowCount.
** NOTE: Do not multiply row count if we determined that
** conglomerate is a 1 row result set when costing nested
** loop. (eg, we will find at most 1 match when probing
** the hash table.)
*/
double newCost = costEst.getEstimatedCost();
double rowCnt = costEst.rowCount();
/*
** RESOLVE - If there is a unique index on the joining
** columns, the number of matching rows will equal the
** number of outer rows, even if we're not considering the
** unique index for this access path. To figure that out,
** however, would require an analysis phase at the beginning
** of optimization. So, we'll always multiply the number
** of outer rows by the number of rows per scan. This will
** give us a higher than actual row count when there is
** such a unique index, which will bias the optimizer toward
** using the unique index. This is probably OK most of the
** time, since the optimizer would probably choose the
** unique index, anyway. But it would be better if the
** optimizer set the row count properly in this case.
*/
if (currentJoinStrategy.multiplyBaseCostByOuterRows()) {
newCost *= outerCost.rowCount();
}
rowCnt *= outerCost.rowCount();
initialRowCount *= outerCost.rowCount();
/*
** If this table can generate at most one row per scan,
** the maximum row count is the number of outer rows.
** NOTE: This does not completely take care of the RESOLVE
** in the above comment, since it will only notice
** one-row result sets for the current join order.
*/
if (oneRowResultSetForSomeConglom) {
if (outerCost.rowCount() < rowCnt) {
rowCnt = outerCost.rowCount();
}
}
/*
** The estimated cost may be too high for indexes, if the
** estimated row count exceeds the maximum. Only do this
** if we're not doing a full scan, and the start/stop position
** is not constant (i.e. we're doing a join on the first column
** of the index) - the reason being that this is when the
** cost may be inaccurate.
*/
if (cd.isIndex() && startStopFound && (!constantStartStop)) {
/*
** Does any table outer to this one have a unique key on
** a subset of the joining columns? If so, the maximum number
** of rows that this table can return is the number of rows
** in this table times the number of times the maximum number
** of times each key can be repeated.
*/
double scanUniquenessFactor = optimizer.uniqueJoinWithOuterTable(baseTableRestrictionList);
if (scanUniquenessFactor > 0.0) {
/*
** A positive uniqueness factor means there is a unique
** outer join key. The value is the reciprocal of the
** maximum number of duplicates for each unique key
** (the duplicates can be caused by other joining tables).
*/
double maxRows = ((double) baseRowCount()) / scanUniquenessFactor;
if (rowCnt > maxRows) {
/*
** The estimated row count is too high. Adjust the
** estimated cost downwards proportionately to
** match the maximum number of rows.
*/
newCost *= (maxRows / rowCnt);
}
}
}
/* The estimated total row count may be too high */
if (tableUniquenessFactor > 0.0) {
/*
** A positive uniqueness factor means there is a unique outer
** join key. The value is the reciprocal of the maximum number
** of duplicates for each unique key (the duplicates can be
** caused by other joining tables).
*/
double maxRows = ((double) baseRowCount()) / tableUniquenessFactor;
if (rowCnt > maxRows) {
/*
** The estimated row count is too high. Set it to the
** maximum row count.
*/
rowCnt = maxRows;
}
}
costEst.setCost(newCost, rowCnt, costEst.singleScanRowCount());
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostOfNScans(tableNumber, outerCost.rowCount(), costEst);
}
/*
** Now figure in the cost of the non-qualifier predicates.
** existsBaseTables have a row count of 1
*/
double rc = -1, src = -1;
if (existsBaseTable)
rc = src = 1;
else // beetle 4787
if (extraNonQualifierSelectivity != 1.0d) {
rc = oneRowResultSetForSomeConglom ? costEst.rowCount() : costEst.rowCount() * extraNonQualifierSelectivity;
src = costEst.singleScanRowCount() * extraNonQualifierSelectivity;
}
if (// changed
rc != -1) {
costEst.setCost(costEst.getEstimatedCost(), rc, src);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingExtraNonQualifierSelectivity(costEst, tableNumber);
}
}
recomputeRowCount: if (statisticsForTable && !oneRowResultSetForSomeConglom && (statCompositeSelectivity != 1.0d)) {
/* if we have statistics we should use statistics to calculate
row count-- if it has been determined that this table
returns one row for some conglomerate then there is no need
to do this recalculation
*/
double compositeStatRC = initialRowCount * statCompositeSelectivity;
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCompositeSelectivityFromStatistics(statCompositeSelectivity);
}
if (tableUniquenessFactor > 0.0) {
/* If the row count from the composite statistics
comes up more than what the table uniqueness
factor indicates then lets stick with the current
row count.
*/
if (compositeStatRC > (baseRowCount() * tableUniquenessFactor)) {
break recomputeRowCount;
}
}
/* set the row count and the single scan row count
to the initialRowCount. initialRowCount is the product
of the RC from store * RC of the outerCost.
Thus RC = initialRowCount * the selectivity from stats.
SingleRC = RC / outerCost.rowCount().
*/
costEst.setCost(costEst.getEstimatedCost(), compositeStatRC, (existsBaseTable) ? 1 : compositeStatRC / outerCost.rowCount());
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingCompositeSelectivityFromStats(costEst, tableNumber);
}
}
}
/* Put the base predicates back in the predicate list */
currentJoinStrategy.putBasePredicates(predList, baseTableRestrictionList);
return costEst;
}
use of org.apache.derby.iapi.sql.compile.JoinStrategy in project derby by apache.
the class FromBaseTable method getScanArguments.
private int getScanArguments(ExpressionClassBuilder acb, MethodBuilder mb) throws StandardException {
// Put the result row template in the saved objects.
int resultRowTemplate = acb.addItem(getResultColumns().buildRowTemplate(referencedCols, false));
// pass in the referenced columns on the saved objects
// chain
int colRefItem = -1;
if (referencedCols != null) {
colRefItem = acb.addItem(referencedCols);
}
// beetle entry 3865: updateable cursor using index
int indexColItem = -1;
if (isCursorTargetTable() || getUpdateLocks) {
ConglomerateDescriptor cd = getTrulyTheBestAccessPath().getConglomerateDescriptor();
if (cd.isIndex()) {
int[] baseColPos = cd.getIndexDescriptor().baseColumnPositions();
boolean[] isAscending = cd.getIndexDescriptor().isAscending();
int[] indexCols = new int[baseColPos.length];
for (int i = 0; i < indexCols.length; i++) indexCols[i] = isAscending[i] ? baseColPos[i] : -baseColPos[i];
indexColItem = acb.addItem(indexCols);
}
}
AccessPath ap = getTrulyTheBestAccessPath();
JoinStrategy trulyTheBestJoinStrategy = ap.getJoinStrategy();
/*
** We can only do bulkFetch on NESTEDLOOP
*/
if (SanityManager.DEBUG) {
if ((!trulyTheBestJoinStrategy.bulkFetchOK()) && (bulkFetch != UNSET)) {
SanityManager.THROWASSERT("bulkFetch should not be set " + "for the join strategy " + trulyTheBestJoinStrategy.getName());
}
}
int nargs = trulyTheBestJoinStrategy.getScanArgs(getLanguageConnectionContext().getTransactionCompile(), mb, this, storeRestrictionList, nonStoreRestrictionList, acb, bulkFetch, resultRowTemplate, colRefItem, indexColItem, getTrulyTheBestAccessPath().getLockMode(), (tableDescriptor.getLockGranularity() == TableDescriptor.TABLE_LOCK_GRANULARITY), getCompilerContext().getScanIsolationLevel(), ap.getOptimizer().getMaxMemoryPerTable(), multiProbing);
return nargs;
}
Aggregations