use of org.apache.derby.iapi.types.DataValueDescriptor in project derby by apache.
the class BackingStoreHashtable method spillToDisk.
// end of doSpaceAccounting
/**
* Determine whether a new row should be spilled to disk and, if so, do it.
*
* @param columnValues Actual columns from source row.
* @param rowLocation Optional row location.
*
* @return true if the row was spilled to disk, false if not
*
* @exception StandardException Standard exception policy.
*/
private boolean spillToDisk(DataValueDescriptor[] columnValues, RowLocation rowLocation) throws StandardException {
// Once we have started spilling all new rows will go to disk, even if we have freed up some
// memory by moving duplicates to disk. This simplifies handling of duplicates and accounting.
DataValueDescriptor[] diskRow = null;
if (diskHashtable == null) {
if (max_inmemory_rowcnt > 0) {
if (inmemory_rowcnt < max_inmemory_rowcnt) {
// Do not spill
return false;
}
} else if (max_inmemory_size > getEstimatedMemUsage(!includeRowLocations() ? columnValues : new LocatedRow(columnValues, rowLocation))) {
return false;
}
// Want to start spilling
diskRow = makeDiskRow(columnValues, rowLocation);
diskHashtable = new DiskHashtable(tc, diskRow, // TODO-COLLATION, set non default collation if necessary.
(int[]) null, key_column_numbers, remove_duplicates, keepAfterCommit);
}
Object key = KeyHasher.buildHashKey(columnValues, key_column_numbers);
Object duplicateValue = hash_table.get(key);
if (duplicateValue != null) {
if (remove_duplicates)
// a degenerate case of spilling
return true;
// This simplifies finding duplicates: they are either all in memory or all on disk.
if (duplicateValue instanceof List) {
List duplicateVec = (List) duplicateValue;
for (int i = duplicateVec.size() - 1; i >= 0; i--) {
diskHashtable.put(key, makeDiskRow(duplicateVec.get(i)));
}
} else {
diskHashtable.put(key, makeDiskRow(duplicateValue));
}
hash_table.remove(key);
}
if (diskRow == null) {
diskRow = makeDiskRow(columnValues, rowLocation);
}
diskHashtable.put(key, diskRow);
return true;
}
use of org.apache.derby.iapi.types.DataValueDescriptor in project derby by apache.
the class FromBaseTable method estimateCost.
/**
* <p>
* Estimate the cost of scanning this {@code FromBaseTable} using the
* given predicate list with the given conglomerate.
* </p>
*
* <p>
* If the table contains little data, the cost estimate might be adjusted
* to make it more likely that an index scan will be preferred to a table
* scan, and a unique index will be preferred to a non-unique index. Even
* though such a plan may be slightly suboptimal when seen in isolation,
* using indexes, unique indexes in particular, needs fewer locks and
* allows more concurrency.
* </p>
*
* @see org.apache.derby.iapi.sql.compile.Optimizable#estimateCost
*
* @exception StandardException Thrown on error
*/
@Override
public CostEstimate estimateCost(OptimizablePredicateList predList, ConglomerateDescriptor cd, CostEstimate outerCost, Optimizer optimizer, RowOrdering rowOrdering) throws StandardException {
double cost;
boolean statisticsForTable = false;
boolean statisticsForConglomerate = false;
/* unknownPredicateList contains all predicates whose effect on
* cost/selectivity can't be calculated by the store.
*/
PredicateList unknownPredicateList = null;
if (optimizer.useStatistics() && predList != null) {
/* if user has specified that we don't use statistics,
pretend that statistics don't exist.
*/
statisticsForConglomerate = tableDescriptor.statisticsExist(cd);
statisticsForTable = tableDescriptor.statisticsExist(null);
unknownPredicateList = new PredicateList(getContextManager());
predList.copyPredicatesToOtherList(unknownPredicateList);
// their statistics need to get updated.
if (!hasCheckedIndexStats) {
hasCheckedIndexStats = true;
// daemon if that's the only index on the table.
if (qualifiesForStatisticsUpdateCheck(tableDescriptor)) {
tableDescriptor.markForIndexStatsUpdate(baseRowCount());
}
}
}
AccessPath currAccessPath = getCurrentAccessPath();
JoinStrategy currentJoinStrategy = currAccessPath.getJoinStrategy();
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceEstimatingCostOfConglomerate(cd, tableNumber);
}
/* Get the uniqueness factory for later use (see below) */
double tableUniquenessFactor = optimizer.uniqueJoinWithOuterTable(predList);
boolean oneRowResultSetForSomeConglom = isOneRowResultSet(predList);
/* Get the predicates that can be used for scanning the base table */
baseTableRestrictionList.removeAllElements();
currentJoinStrategy.getBasePredicates(predList, baseTableRestrictionList, this);
/* RESOLVE: Need to figure out how to cache the StoreCostController */
StoreCostController scc = getStoreCostController(cd);
CostEstimate costEst = getScratchCostEstimate(optimizer);
/* Does the conglomerate match at most one row? */
if (isOneRowResultSet(cd, baseTableRestrictionList)) {
/*
** Tell the RowOrdering that this optimizable is always ordered.
** It will figure out whether it is really always ordered in the
** context of the outer tables and their orderings.
*/
rowOrdering.optimizableAlwaysOrdered(this);
singleScanRowCount = 1.0;
/* Yes, the cost is to fetch exactly one row */
// RESOLVE: NEED TO FIGURE OUT HOW TO GET REFERENCED COLUMN LIST,
// FIELD STATES, AND ACCESS TYPE
cost = scc.getFetchFromFullKeyCost((FormatableBitSet) null, 0);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceSingleMatchedRowCost(cost, tableNumber);
}
costEst.setCost(cost, 1.0d, 1.0d);
/*
** Let the join strategy decide whether the cost of the base
** scan is a single scan, or a scan per outer row.
** NOTE: The multiplication should only be done against the
** total row count, not the singleScanRowCount.
*/
double newCost = costEst.getEstimatedCost();
if (currentJoinStrategy.multiplyBaseCostByOuterRows()) {
newCost *= outerCost.rowCount();
}
costEst.setCost(newCost, costEst.rowCount() * outerCost.rowCount(), costEst.singleScanRowCount());
/*
** Choose the lock mode. If the start/stop conditions are
** constant, choose row locking, because we will always match
** the same row. If they are not constant (i.e. they include
** a join), we decide whether to do row locking based on
** the total number of rows for the life of the query.
*/
boolean constantStartStop = true;
for (int i = 0; i < predList.size(); i++) {
OptimizablePredicate pred = predList.getOptPredicate(i);
/*
** The predicates are in index order, so the start and
** stop keys should be first.
*/
if (!(pred.isStartKey() || pred.isStopKey())) {
break;
}
/* Stop when we've found a join */
if (!pred.getReferencedMap().hasSingleBitSet()) {
constantStartStop = false;
break;
}
}
if (constantStartStop) {
currAccessPath.setLockMode(TransactionController.MODE_RECORD);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceConstantStartStopPositions();
}
} else {
setLockingBasedOnThreshold(optimizer, costEst.rowCount());
}
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostOfNScans(tableNumber, outerCost.rowCount(), costEst);
}
/* Add in cost of fetching base row for non-covering index */
if (cd.isIndex() && (!isCoveringIndex(cd))) {
double singleFetchCost = getBaseCostController().getFetchFromRowLocationCost((FormatableBitSet) null, 0);
// The estimated row count is always 1 here, although the
// index scan may actually return 0 rows, depending on whether
// or not the predicates match a key. It is assumed that a
// match is more likely than a miss, hence the row count is 1.
// Note (DERBY-6011): Alternative (non-unique) indexes may come
// up with row counts lower than 1 because they multiply with
// the selectivity, especially if the table is almost empty.
// This makes the optimizer prefer non-unique indexes if there
// are not so many rows in the table. We still want to use the
// unique index in that case, as the performance difference
// between the different scans on a small table is small, and
// the unique index is likely to lock fewer rows and reduce
// the chance of deadlocks. Therefore, we compensate by
// making the row count at least 1 for the non-unique index.
// See reference to DERBY-6011 further down in this method.
cost = singleFetchCost * costEst.rowCount();
costEst.setEstimatedCost(costEst.getEstimatedCost() + cost);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceNonCoveringIndexCost(cost, tableNumber);
}
}
} else {
/* Conglomerate might match more than one row */
/*
** Some predicates are good for start/stop, but we don't know
** the values they are being compared to at this time, so we
** estimate their selectivity in language rather than ask the
** store about them . The predicates on the first column of
** the conglomerate reduce the number of pages and rows scanned.
** The predicates on columns after the first reduce the number
** of rows scanned, but have a much smaller effect on the number
** of pages scanned, so we keep track of these selectivities in
** two separate variables: extraFirstColumnSelectivity and
** extraStartStopSelectivity. (Theoretically, we could try to
** figure out the effect of predicates after the first column
** on the number of pages scanned, but it's too hard, so we
** use these predicates only to reduce the estimated number of
** rows. For comparisons with known values, though, the store
** can figure out exactly how many rows and pages are scanned.)
**
** Other predicates are not good for start/stop. We keep track
** of their selectvities separately, because these limit the
** number of rows, but not the number of pages, and so need to
** be factored into the row count but not into the cost.
** These selectivities are factored into extraQualifierSelectivity.
**
** statStartStopSelectivity (using statistics) represents the
** selectivity of start/stop predicates that can be used to scan
** the index. If no statistics exist for the conglomerate then
** the value of this variable remains at 1.0
**
** statCompositeSelectivity (using statistics) represents the
** selectivity of all the predicates (including NonBaseTable
** predicates). This represents the most educated guess [among
** all the wild surmises in this routine] as to the number
** of rows that will be returned from this joinNode.
** If no statistics exist on the table or no statistics at all
** can be found to satisfy the predicates at this join opertor,
** then statCompositeSelectivity is left initialized at 1.0
*/
double extraFirstColumnSelectivity = 1.0d;
double extraStartStopSelectivity = 1.0d;
double extraQualifierSelectivity = 1.0d;
double extraNonQualifierSelectivity = 1.0d;
double statStartStopSelectivity = 1.0d;
double statCompositeSelectivity = 1.0d;
int numExtraFirstColumnPreds = 0;
int numExtraStartStopPreds = 0;
int numExtraQualifiers = 0;
int numExtraNonQualifiers = 0;
/*
** It is possible for something to be a start or stop predicate
** without it being possible to use it as a key for cost estimation.
** For example, with an index on (c1, c2), and the predicate
** c1 = othertable.c3 and c2 = 1, the comparison on c1 is with
** an unknown value, so we can't pass it to the store. This means
** we can't pass the comparison on c2 to the store, either.
**
** The following booleans keep track of whether we have seen
** gaps in the keys we can pass to the store.
*/
boolean startGap = false;
boolean stopGap = false;
boolean seenFirstColumn = false;
/*
** We need to figure out the number of rows touched to decide
** whether to use row locking or table locking. If the start/stop
** conditions are constant (i.e. no joins), the number of rows
** touched is the number of rows per scan. But if the start/stop
** conditions contain a join, the number of rows touched must
** take the number of outer rows into account.
*/
boolean constantStartStop = true;
boolean startStopFound = false;
/* Count the number of start and stop keys */
int startKeyNum = 0;
int stopKeyNum = 0;
OptimizablePredicate pred;
int predListSize;
if (predList != null)
predListSize = baseTableRestrictionList.size();
else
predListSize = 0;
int startStopPredCount = 0;
ColumnReference firstColumn = null;
for (int i = 0; i < predListSize; i++) {
pred = baseTableRestrictionList.getOptPredicate(i);
boolean startKey = pred.isStartKey();
boolean stopKey = pred.isStopKey();
if (startKey || stopKey) {
startStopFound = true;
if (!pred.getReferencedMap().hasSingleBitSet()) {
constantStartStop = false;
}
boolean knownConstant = pred.compareWithKnownConstant(this, true);
if (startKey) {
if (knownConstant && (!startGap)) {
startKeyNum++;
if (unknownPredicateList != null)
unknownPredicateList.removeOptPredicate(pred);
} else {
startGap = true;
}
}
if (stopKey) {
if (knownConstant && (!stopGap)) {
stopKeyNum++;
if (unknownPredicateList != null)
unknownPredicateList.removeOptPredicate(pred);
} else {
stopGap = true;
}
}
/* If either we are seeing startGap or stopGap because start/stop key is
* comparison with non-constant, we should multiply the selectivity to
* extraFirstColumnSelectivity. Beetle 4787.
*/
if (startGap || stopGap) {
// Don't include redundant join predicates in selectivity calculations
if (baseTableRestrictionList.isRedundantPredicate(i))
continue;
if (startKey && stopKey)
startStopPredCount++;
if (pred.getIndexPosition() == 0) {
extraFirstColumnSelectivity *= pred.selectivity(this);
if (!seenFirstColumn) {
ValueNode relNode = ((Predicate) pred).getAndNode().getLeftOperand();
if (relNode instanceof BinaryRelationalOperatorNode)
firstColumn = ((BinaryRelationalOperatorNode) relNode).getColumnOperand(this);
seenFirstColumn = true;
}
} else {
extraStartStopSelectivity *= pred.selectivity(this);
numExtraStartStopPreds++;
}
}
} else {
// Don't include redundant join predicates in selectivity calculations
if (baseTableRestrictionList.isRedundantPredicate(i)) {
continue;
}
/* If we have "like" predicate on the first index column, it is more likely
* to have a smaller range than "between", so we apply extra selectivity 0.2
* here. beetle 4387, 4787.
*/
if (pred instanceof Predicate) {
ValueNode leftOpnd = ((Predicate) pred).getAndNode().getLeftOperand();
if (firstColumn != null && leftOpnd instanceof LikeEscapeOperatorNode) {
LikeEscapeOperatorNode likeNode = (LikeEscapeOperatorNode) leftOpnd;
if (likeNode.getLeftOperand().requiresTypeFromContext()) {
ValueNode receiver = ((TernaryOperatorNode) likeNode).getReceiver();
if (receiver instanceof ColumnReference) {
ColumnReference cr = (ColumnReference) receiver;
if (cr.getTableNumber() == firstColumn.getTableNumber() && cr.getColumnNumber() == firstColumn.getColumnNumber())
extraFirstColumnSelectivity *= 0.2;
}
}
}
}
if (pred.isQualifier()) {
extraQualifierSelectivity *= pred.selectivity(this);
numExtraQualifiers++;
} else {
extraNonQualifierSelectivity *= pred.selectivity(this);
numExtraNonQualifiers++;
}
/*
** Strictly speaking, it shouldn't be necessary to
** indicate a gap here, since there should be no more
** start/stop predicates, but let's do it, anyway.
*/
startGap = true;
stopGap = true;
}
}
if (unknownPredicateList != null) {
statCompositeSelectivity = unknownPredicateList.selectivity(this);
if (statCompositeSelectivity == -1.0d)
statCompositeSelectivity = 1.0d;
}
if (seenFirstColumn && (startStopPredCount > 0)) {
if (statisticsForConglomerate) {
statStartStopSelectivity = tableDescriptor.selectivityForConglomerate(cd, startStopPredCount);
} else if (cd.isIndex()) {
// DERBY-3790 (Investigate if request for update
// statistics can be skipped for certain kind of
// indexes, one instance may be unique indexes based
// on one column.) But as found in DERBY-6045 (in list
// multi-probe by primary key not chosen on tables with
// >256 rows), even though we do not keep the
// statistics for single-column unique indexes, we
// should improve the selectivity of such an index
// when the index is being considered by the optimizer.
IndexRowGenerator irg = cd.getIndexDescriptor();
if (irg.isUnique() && irg.numberOfOrderedColumns() == 1 && startStopPredCount == 1) {
statStartStopSelectivity = (1 / (double) baseRowCount());
}
}
}
/*
** Factor the non-base-table predicates into the extra
** non-qualifier selectivity, since these will restrict the
** number of rows, but not the cost.
*/
extraNonQualifierSelectivity *= currentJoinStrategy.nonBasePredicateSelectivity(this, predList);
/* Create the start and stop key arrays, and fill them in */
DataValueDescriptor[] startKeys;
DataValueDescriptor[] stopKeys;
if (startKeyNum > 0)
startKeys = new DataValueDescriptor[startKeyNum];
else
startKeys = null;
if (stopKeyNum > 0)
stopKeys = new DataValueDescriptor[stopKeyNum];
else
stopKeys = null;
startKeyNum = 0;
stopKeyNum = 0;
startGap = false;
stopGap = false;
/* If we have a probe predicate that is being used as a start/stop
* key then ssKeySourceInList will hold the InListOperatorNode
* from which the probe predicate was built.
*/
InListOperatorNode ssKeySourceInList = null;
for (int i = 0; i < predListSize; i++) {
pred = baseTableRestrictionList.getOptPredicate(i);
boolean startKey = pred.isStartKey();
boolean stopKey = pred.isStopKey();
if (startKey || stopKey) {
/* A probe predicate is only useful if it can be used as
* as a start/stop key for _first_ column in an index
* (i.e. if the column position is 0). That said, we only
* allow a single start/stop key per column position in
* the index (see PredicateList.orderUsefulPredicates()).
* Those two facts combined mean that we should never have
* more than one probe predicate start/stop key for a given
* conglomerate.
*/
if (SanityManager.DEBUG) {
if ((ssKeySourceInList != null) && ((Predicate) pred).isInListProbePredicate()) {
SanityManager.THROWASSERT("Found multiple probe predicate start/stop keys" + " for conglomerate '" + cd.getConglomerateName() + "' when at most one was expected.");
}
}
/* By passing "true" in the next line we indicate that we
* should only retrieve the underlying InListOpNode *if*
* the predicate is a "probe predicate".
*/
ssKeySourceInList = ((Predicate) pred).getSourceInList(true);
boolean knownConstant = pred.compareWithKnownConstant(this, true);
if (startKey) {
if (knownConstant && (!startGap)) {
startKeys[startKeyNum] = pred.getCompareValue(this);
startKeyNum++;
} else {
startGap = true;
}
}
if (stopKey) {
if (knownConstant && (!stopGap)) {
stopKeys[stopKeyNum] = pred.getCompareValue(this);
stopKeyNum++;
} else {
stopGap = true;
}
}
} else {
startGap = true;
stopGap = true;
}
}
int startOperator;
int stopOperator;
if (baseTableRestrictionList != null) {
startOperator = baseTableRestrictionList.startOperator(this);
stopOperator = baseTableRestrictionList.stopOperator(this);
} else {
/*
** If we're doing a full scan, it doesn't matter what the
** start and stop operators are.
*/
startOperator = ScanController.NA;
stopOperator = ScanController.NA;
}
/*
** Get a row template for this conglomerate. For now, just tell
** it we are using all the columns in the row.
*/
DataValueDescriptor[] rowTemplate = getRowTemplate(cd, getBaseCostController());
/* we prefer index than table scan for concurrency reason, by a small
* adjustment on estimated row count. This affects optimizer's decision
* especially when few rows are in table. beetle 5006. This makes sense
* since the plan may stay long before we actually check and invalidate it.
* And new rows may be inserted before we check and invalidate the plan.
* Here we only prefer index that has start/stop key from predicates. Non-
* constant start/stop key case is taken care of by selectivity later.
*/
long baseRC = (startKeys != null || stopKeys != null) ? baseRowCount() : baseRowCount() + 5;
scc.getScanCost(currentJoinStrategy.scanCostType(), baseRC, 1, forUpdate(), (FormatableBitSet) null, rowTemplate, startKeys, startOperator, stopKeys, stopOperator, false, 0, costEst);
/* initialPositionCost is the first part of the index scan cost we get above.
* It's the cost of initial positioning/fetch of key. So it's unrelated to
* row count of how many rows we fetch from index. We extract it here so that
* we only multiply selectivity to the other part of index scan cost, which is
* nearly linear, to make cost calculation more accurate and fair, especially
* compared to the plan of "one row result set" (unique index). beetle 4787.
*/
double initialPositionCost = 0.0;
if (cd.isIndex()) {
initialPositionCost = scc.getFetchFromFullKeyCost((FormatableBitSet) null, 0);
/* oneRowResultSetForSomeConglom means there's a unique index, but certainly
* not this one since we are here. If store knows this non-unique index
* won't return any row or just returns one row (eg., the predicate is a
* comparison with constant or almost empty table), we do minor adjustment
* on cost (affecting decision for covering index) and rc (decision for
* non-covering). The purpose is favoring unique index. beetle 5006.
*/
if (oneRowResultSetForSomeConglom && costEst.rowCount() <= 1) {
costEst.setCost(costEst.getEstimatedCost() * 2, costEst.rowCount() + 2, costEst.singleScanRowCount() + 2);
}
}
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostOfConglomerateScan(tableNumber, cd, costEst, numExtraFirstColumnPreds, extraFirstColumnSelectivity, numExtraStartStopPreds, extraStartStopSelectivity, startStopPredCount, statStartStopSelectivity, numExtraQualifiers, extraQualifierSelectivity, numExtraNonQualifiers, extraNonQualifierSelectivity);
}
/* initial row count is the row count without applying
any predicates-- we use this at the end of the routine
when we use statistics to recompute the row count.
*/
double initialRowCount = costEst.rowCount();
if (statStartStopSelectivity != 1.0d) {
/*
** If statistics exist use the selectivity computed
** from the statistics to calculate the cost.
** NOTE: we apply this selectivity to the cost as well
** as both the row counts. In the absence of statistics
** we only applied the FirstColumnSelectivity to the
** cost.
*/
costEst.setCost(scanCostAfterSelectivity(costEst.getEstimatedCost(), initialPositionCost, statStartStopSelectivity, oneRowResultSetForSomeConglom), costEst.rowCount() * statStartStopSelectivity, costEst.singleScanRowCount() * statStartStopSelectivity);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingStatsForIndex(costEst, tableNumber);
}
} else {
/*
** Factor in the extra selectivity on the first column
** of the conglomerate (see comment above).
** NOTE: In this case we want to apply the selectivity to both
** the total row count and singleScanRowCount.
*/
if (extraFirstColumnSelectivity != 1.0d) {
costEst.setCost(scanCostAfterSelectivity(costEst.getEstimatedCost(), initialPositionCost, extraFirstColumnSelectivity, oneRowResultSetForSomeConglom), costEst.rowCount() * extraFirstColumnSelectivity, costEst.singleScanRowCount() * extraFirstColumnSelectivity);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingExtra1stColumnSelectivity(costEst, tableNumber);
}
}
/* Factor in the extra start/stop selectivity (see comment above).
* NOTE: In this case we want to apply the selectivity to both
* the row count and singleScanRowCount.
*/
if (extraStartStopSelectivity != 1.0d) {
costEst.setCost(costEst.getEstimatedCost(), costEst.rowCount() * extraStartStopSelectivity, costEst.singleScanRowCount() * extraStartStopSelectivity);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingExtraStartStop(costEst, tableNumber);
}
}
}
/* If the start and stop key came from an IN-list "probe predicate"
* then we need to adjust the cost estimate. The probe predicate
* is of the form "col = ?" and we currently have the estimated
* cost of probing the index a single time for "?". But with an
* IN-list we don't just probe the index once; we're going to
* probe it once for every value in the IN-list. And we are going
* to potentially return an additional row (or set of rows) for
* each probe. To account for this "multi-probing" we take the
* costEstimate and multiply each of its fields by the size of
* the IN-list.
*
* Note: If the IN-list has duplicate values then this simple
* multiplication could give us an elevated cost (because we
* only probe the index for each *non-duplicate* value in the
* IN-list). But for now, we're saying that's okay.
*/
if (ssKeySourceInList != null) {
int listSize = ssKeySourceInList.getRightOperandList().size();
double rc = costEst.rowCount() * listSize;
double ssrc = costEst.singleScanRowCount() * listSize;
/* If multiplication by listSize returns more rows than are
* in the scan then just use the number of rows in the scan.
*/
costEst.setCost(costEst.getEstimatedCost() * listSize, rc > initialRowCount ? initialRowCount : rc, ssrc > initialRowCount ? initialRowCount : ssrc);
}
/*
** Figure out whether to do row locking or table locking.
**
** If there are no start/stop predicates, we're doing full
** conglomerate scans, so do table locking.
*/
if (!startStopFound) {
currAccessPath.setLockMode(TransactionController.MODE_TABLE);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceNoStartStopPosition();
}
} else {
/*
** Figure out the number of rows touched. If all the
** start/stop predicates are constant, the number of
** rows touched is the number of rows per scan.
** This is also true for join strategies that scan the
** inner table only once (like hash join) - we can
** tell if we have one of those, because
** multiplyBaseCostByOuterRows() will return false.
*/
double rowsTouched = costEst.rowCount();
if ((!constantStartStop) && currentJoinStrategy.multiplyBaseCostByOuterRows()) {
/*
** This is a join where the inner table is scanned
** more than once, so we have to take the number
** of outer rows into account. The formula for this
** works out as follows:
**
** total rows in table = r
** number of rows touched per scan = s
** number of outer rows = o
** proportion of rows touched per scan = s / r
** proportion of rows not touched per scan =
** 1 - (s / r)
** proportion of rows not touched for all scans =
** (1 - (s / r)) ** o
** proportion of rows touched for all scans =
** 1 - ((1 - (s / r)) ** o)
** total rows touched for all scans =
** r * (1 - ((1 - (s / r)) ** o))
**
** In doing these calculations, we must be careful not
** to divide by zero. This could happen if there are
** no rows in the table. In this case, let's do table
** locking.
*/
double r = baseRowCount();
if (r > 0.0) {
double s = costEst.rowCount();
double o = outerCost.rowCount();
double pRowsNotTouchedPerScan = 1.0 - (s / r);
double pRowsNotTouchedAllScans = Math.pow(pRowsNotTouchedPerScan, o);
double pRowsTouchedAllScans = 1.0 - pRowsNotTouchedAllScans;
double rowsTouchedAllScans = r * pRowsTouchedAllScans;
rowsTouched = rowsTouchedAllScans;
} else {
/* See comments in setLockingBasedOnThreshold */
rowsTouched = optimizer.tableLockThreshold() + 1;
}
}
setLockingBasedOnThreshold(optimizer, rowsTouched);
}
/*
** If the index isn't covering, add the cost of getting the
** base row. Only apply extraFirstColumnSelectivity and extraStartStopSelectivity
** before we do this, don't apply extraQualifierSelectivity etc. The
** reason is that the row count here should be the number of index rows
** (and hence heap rows) we get, and we need to fetch all those rows, even
** though later on some of them may be filtered out by other predicates.
** beetle 4787.
*/
if (cd.isIndex() && (!isCoveringIndex(cd))) {
double singleFetchCost = getBaseCostController().getFetchFromRowLocationCost((FormatableBitSet) null, 0);
// The number of rows we expect to fetch from the base table.
double rowsToFetch = costEst.rowCount();
if (oneRowResultSetForSomeConglom) {
// DERBY-6011: We know that there is a unique index, and
// that there are predicates that guarantee that at most
// one row will be fetched from the unique index. The
// unique alternative always has 1 as estimated row count
// (see reference to DERBY-6011 further up in this method),
// even though it could actually return 0 rows.
//
// If the alternative that's being considered here has
// expected row count less than 1, it is going to have
// lower estimated cost for fetching base rows. We prefer
// unique indexes, as they lock fewer rows and allow more
// concurrency. Therefore, make sure the cost estimate for
// this alternative includes at least fetching one row from
// the base table.
rowsToFetch = Math.max(1.0d, rowsToFetch);
}
cost = singleFetchCost * rowsToFetch;
costEst.setEstimatedCost(costEst.getEstimatedCost() + cost);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostOfNoncoveringIndex(costEst, tableNumber);
}
}
/* Factor in the extra qualifier selectivity (see comment above).
* NOTE: In this case we want to apply the selectivity to both
* the row count and singleScanRowCount.
*/
if (extraQualifierSelectivity != 1.0d) {
costEst.setCost(costEst.getEstimatedCost(), costEst.rowCount() * extraQualifierSelectivity, costEst.singleScanRowCount() * extraQualifierSelectivity);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingExtraQualifierSelectivity(costEst, tableNumber);
}
}
singleScanRowCount = costEst.singleScanRowCount();
/*
** Let the join strategy decide whether the cost of the base
** scan is a single scan, or a scan per outer row.
** NOTE: In this case we only want to multiply against the
** total row count, not the singleScanRowCount.
** NOTE: Do not multiply row count if we determined that
** conglomerate is a 1 row result set when costing nested
** loop. (eg, we will find at most 1 match when probing
** the hash table.)
*/
double newCost = costEst.getEstimatedCost();
double rowCnt = costEst.rowCount();
/*
** RESOLVE - If there is a unique index on the joining
** columns, the number of matching rows will equal the
** number of outer rows, even if we're not considering the
** unique index for this access path. To figure that out,
** however, would require an analysis phase at the beginning
** of optimization. So, we'll always multiply the number
** of outer rows by the number of rows per scan. This will
** give us a higher than actual row count when there is
** such a unique index, which will bias the optimizer toward
** using the unique index. This is probably OK most of the
** time, since the optimizer would probably choose the
** unique index, anyway. But it would be better if the
** optimizer set the row count properly in this case.
*/
if (currentJoinStrategy.multiplyBaseCostByOuterRows()) {
newCost *= outerCost.rowCount();
}
rowCnt *= outerCost.rowCount();
initialRowCount *= outerCost.rowCount();
/*
** If this table can generate at most one row per scan,
** the maximum row count is the number of outer rows.
** NOTE: This does not completely take care of the RESOLVE
** in the above comment, since it will only notice
** one-row result sets for the current join order.
*/
if (oneRowResultSetForSomeConglom) {
if (outerCost.rowCount() < rowCnt) {
rowCnt = outerCost.rowCount();
}
}
/*
** The estimated cost may be too high for indexes, if the
** estimated row count exceeds the maximum. Only do this
** if we're not doing a full scan, and the start/stop position
** is not constant (i.e. we're doing a join on the first column
** of the index) - the reason being that this is when the
** cost may be inaccurate.
*/
if (cd.isIndex() && startStopFound && (!constantStartStop)) {
/*
** Does any table outer to this one have a unique key on
** a subset of the joining columns? If so, the maximum number
** of rows that this table can return is the number of rows
** in this table times the number of times the maximum number
** of times each key can be repeated.
*/
double scanUniquenessFactor = optimizer.uniqueJoinWithOuterTable(baseTableRestrictionList);
if (scanUniquenessFactor > 0.0) {
/*
** A positive uniqueness factor means there is a unique
** outer join key. The value is the reciprocal of the
** maximum number of duplicates for each unique key
** (the duplicates can be caused by other joining tables).
*/
double maxRows = ((double) baseRowCount()) / scanUniquenessFactor;
if (rowCnt > maxRows) {
/*
** The estimated row count is too high. Adjust the
** estimated cost downwards proportionately to
** match the maximum number of rows.
*/
newCost *= (maxRows / rowCnt);
}
}
}
/* The estimated total row count may be too high */
if (tableUniquenessFactor > 0.0) {
/*
** A positive uniqueness factor means there is a unique outer
** join key. The value is the reciprocal of the maximum number
** of duplicates for each unique key (the duplicates can be
** caused by other joining tables).
*/
double maxRows = ((double) baseRowCount()) / tableUniquenessFactor;
if (rowCnt > maxRows) {
/*
** The estimated row count is too high. Set it to the
** maximum row count.
*/
rowCnt = maxRows;
}
}
costEst.setCost(newCost, rowCnt, costEst.singleScanRowCount());
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostOfNScans(tableNumber, outerCost.rowCount(), costEst);
}
/*
** Now figure in the cost of the non-qualifier predicates.
** existsBaseTables have a row count of 1
*/
double rc = -1, src = -1;
if (existsBaseTable)
rc = src = 1;
else // beetle 4787
if (extraNonQualifierSelectivity != 1.0d) {
rc = oneRowResultSetForSomeConglom ? costEst.rowCount() : costEst.rowCount() * extraNonQualifierSelectivity;
src = costEst.singleScanRowCount() * extraNonQualifierSelectivity;
}
if (// changed
rc != -1) {
costEst.setCost(costEst.getEstimatedCost(), rc, src);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingExtraNonQualifierSelectivity(costEst, tableNumber);
}
}
recomputeRowCount: if (statisticsForTable && !oneRowResultSetForSomeConglom && (statCompositeSelectivity != 1.0d)) {
/* if we have statistics we should use statistics to calculate
row count-- if it has been determined that this table
returns one row for some conglomerate then there is no need
to do this recalculation
*/
double compositeStatRC = initialRowCount * statCompositeSelectivity;
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCompositeSelectivityFromStatistics(statCompositeSelectivity);
}
if (tableUniquenessFactor > 0.0) {
/* If the row count from the composite statistics
comes up more than what the table uniqueness
factor indicates then lets stick with the current
row count.
*/
if (compositeStatRC > (baseRowCount() * tableUniquenessFactor)) {
break recomputeRowCount;
}
}
/* set the row count and the single scan row count
to the initialRowCount. initialRowCount is the product
of the RC from store * RC of the outerCost.
Thus RC = initialRowCount * the selectivity from stats.
SingleRC = RC / outerCost.rowCount().
*/
costEst.setCost(costEst.getEstimatedCost(), compositeStatRC, (existsBaseTable) ? 1 : compositeStatRC / outerCost.rowCount());
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingCompositeSelectivityFromStats(costEst, tableNumber);
}
}
}
/* Put the base predicates back in the predicate list */
currentJoinStrategy.putBasePredicates(predList, baseTableRestrictionList);
return costEst;
}
use of org.apache.derby.iapi.types.DataValueDescriptor in project derby by apache.
the class AlterTableConstantAction method compressTable.
/**
* routine to process compress table or ALTER TABLE <t> DROP COLUMN <c>;
* <p>
* Uses class level variable "compressTable" to determine if processing
* compress table or drop column:
* if (!compressTable)
* must be drop column.
* <p>
* Handles rebuilding of base conglomerate and all necessary indexes.
*/
private void compressTable() throws StandardException {
long newHeapConglom;
Properties properties = new Properties();
RowLocation rl;
if (SanityManager.DEBUG) {
if (lockGranularity != '\0') {
SanityManager.THROWASSERT("lockGranularity expected to be '\0', not " + lockGranularity);
}
SanityManager.ASSERT(!compressTable || columnInfo == null, "columnInfo expected to be null");
SanityManager.ASSERT(constraintActions == null, "constraintActions expected to be null");
}
ExecRow emptyHeapRow = td.getEmptyExecRow();
int[] collation_ids = td.getColumnCollationIds();
compressHeapCC = tc.openConglomerate(td.getHeapConglomerateId(), false, TransactionController.OPENMODE_FORUPDATE, TransactionController.MODE_TABLE, TransactionController.ISOLATION_SERIALIZABLE);
rl = compressHeapCC.newRowLocationTemplate();
// Get the properties on the old heap
compressHeapCC.getInternalTablePropertySet(properties);
compressHeapCC.close();
compressHeapCC = null;
// Create an array to put base row template
baseRow = new ExecRow[bulkFetchSize];
baseRowArray = new DataValueDescriptor[bulkFetchSize][];
validRow = new boolean[bulkFetchSize];
/* Set up index info */
getAffectedIndexes();
// Get an array of RowLocation template
compressRL = new RowLocation[bulkFetchSize];
indexRows = new ExecIndexRow[numIndexes];
if (!compressTable) {
// must be a drop column, thus the number of columns in the
// new template row and the collation template is one less.
ExecRow newRow = activation.getExecutionFactory().getValueRow(emptyHeapRow.nColumns() - 1);
int[] new_collation_ids = new int[collation_ids.length - 1];
for (int i = 0; i < newRow.nColumns(); i++) {
newRow.setColumn(i + 1, i < droppedColumnPosition - 1 ? emptyHeapRow.getColumn(i + 1) : emptyHeapRow.getColumn(i + 1 + 1));
new_collation_ids[i] = collation_ids[(i < droppedColumnPosition - 1) ? i : (i + 1)];
}
emptyHeapRow = newRow;
collation_ids = new_collation_ids;
}
setUpAllSorts(emptyHeapRow, rl);
// Start by opening a full scan on the base table.
openBulkFetchScan(td.getHeapConglomerateId());
// Get the estimated row count for the sorters
estimatedRowCount = compressHeapGSC.getEstimatedRowCount();
// Create the array of base row template
for (int i = 0; i < bulkFetchSize; i++) {
// create a base row template
baseRow[i] = td.getEmptyExecRow();
baseRowArray[i] = baseRow[i].getRowArray();
compressRL[i] = compressHeapGSC.newRowLocationTemplate();
}
newHeapConglom = tc.createAndLoadConglomerate("heap", emptyHeapRow.getRowArray(), // column sort order - not required for heap
null, collation_ids, properties, TransactionController.IS_DEFAULT, this, (long[]) null);
closeBulkFetchScan();
// Set the "estimated" row count
ScanController compressHeapSC = tc.openScan(newHeapConglom, false, TransactionController.OPENMODE_FORUPDATE, TransactionController.MODE_TABLE, TransactionController.ISOLATION_SERIALIZABLE, (FormatableBitSet) null, (DataValueDescriptor[]) null, 0, (Qualifier[][]) null, (DataValueDescriptor[]) null, 0);
compressHeapSC.setEstimatedRowCount(rowCount);
compressHeapSC.close();
// RESOLVE DJD CLEANUP
compressHeapSC = null;
/*
** Inform the data dictionary that we are about to write to it.
** There are several calls to data dictionary "get" methods here
** that might be done in "read" mode in the data dictionary, but
** it seemed safer to do this whole operation in "write" mode.
**
** We tell the data dictionary we're done writing at the end of
** the transaction.
*/
dd.startWriting(lcc);
// Update all indexes
if (compressIRGs.length > 0) {
updateAllIndexes(newHeapConglom, dd);
}
/* Update the DataDictionary
* RESOLVE - this will change in 1.4 because we will get
* back the same conglomerate number
*/
// Get the ConglomerateDescriptor for the heap
long oldHeapConglom = td.getHeapConglomerateId();
ConglomerateDescriptor cd = td.getConglomerateDescriptor(oldHeapConglom);
// Update sys.sysconglomerates with new conglomerate #
dd.updateConglomerateDescriptor(cd, newHeapConglom, tc);
// Now that the updated information is available in the system tables,
// we should invalidate all statements that use the old conglomerates
dm.invalidateFor(td, DependencyManager.COMPRESS_TABLE, lcc);
// Drop the old conglomerate
tc.dropConglomerate(oldHeapConglom);
cleanUp();
}
use of org.apache.derby.iapi.types.DataValueDescriptor in project derby by apache.
the class AlterTableConstantAction method getColumnMax.
/**
* computes the minimum/maximum value in a column of a table.
*/
private long getColumnMax(TableDescriptor td, String columnName, long increment) throws StandardException {
String maxStr = (increment > 0) ? "MAX" : "MIN";
String maxStmt = "SELECT " + maxStr + "(" + IdUtil.normalToDelimited(columnName) + ") FROM " + IdUtil.mkQualifiedName(td.getSchemaName(), td.getName());
PreparedStatement ps = lcc.prepareInternalStatement(maxStmt);
// This is a substatement, for now we do not set any timeout for it
// We might change this later by linking timeout to parent statement
ResultSet rs = ps.executeSubStatement(lcc, false, 0L);
DataValueDescriptor[] rowArray = rs.getNextRow().getRowArray();
rs.close();
rs.finish();
return rowArray[0].getLong();
}
use of org.apache.derby.iapi.types.DataValueDescriptor in project derby by apache.
the class UnaryDateTimestampOperatorNode method bindExpression.
/**
* Called by UnaryOperatorNode.bindExpression.
*
* If the operand is a constant then evaluate the function at compile time. Otherwise,
* if the operand input type is the same as the output type then discard this node altogether.
* If the function is "date" and the input is a timestamp then change this node to a cast.
*
* @param fromList The FROM list for the query this
* expression is in, for binding columns.
* @param subqueryList The subquery list being built as we find SubqueryNodes
* @param aggregates The aggregate list being built as we find AggregateNodes
*
* @return The new top of the expression tree.
*
* @exception StandardException Thrown on error
*/
@Override
ValueNode bindExpression(FromList fromList, SubqueryList subqueryList, List<AggregateNode> aggregates) throws StandardException {
// Is this function the identity operator?
boolean isIdentity = false;
boolean operandIsNumber = false;
bindOperand(fromList, subqueryList, aggregates);
DataTypeDescriptor operandType = operand.getTypeServices();
switch(operandType.getJDBCTypeId()) {
case Types.BIGINT:
case Types.INTEGER:
case Types.SMALLINT:
case Types.TINYINT:
case Types.DECIMAL:
case Types.NUMERIC:
case Types.DOUBLE:
case Types.FLOAT:
if (TIMESTAMP_METHOD_NAME.equals(methodName))
invalidOperandType();
operandIsNumber = true;
break;
case Types.CHAR:
case Types.VARCHAR:
break;
case Types.DATE:
if (TIMESTAMP_METHOD_NAME.equals(methodName))
invalidOperandType();
isIdentity = true;
break;
case Types.NULL:
break;
case Types.TIMESTAMP:
if (TIMESTAMP_METHOD_NAME.equals(methodName))
isIdentity = true;
break;
default:
invalidOperandType();
}
if (operand instanceof ConstantNode) {
DataValueFactory dvf = getLanguageConnectionContext().getDataValueFactory();
DataValueDescriptor sourceValue = ((ConstantNode) operand).getValue();
DataValueDescriptor destValue;
if (sourceValue.isNull()) {
destValue = (TIMESTAMP_METHOD_NAME.equals(methodName)) ? dvf.getNullTimestamp((DateTimeDataValue) null) : dvf.getNullDate((DateTimeDataValue) null);
} else {
destValue = (TIMESTAMP_METHOD_NAME.equals(methodName)) ? dvf.getTimestamp(sourceValue) : dvf.getDate(sourceValue);
}
return new UserTypeConstantNode(destValue, getContextManager());
}
if (isIdentity)
return operand;
return this;
}
Aggregations