use of org.apache.derby.iapi.sql.dictionary.IndexRowGenerator in project derby by apache.
the class DataDictionaryImpl method bootstrapOneIndex.
private ConglomerateDescriptor bootstrapOneIndex(SchemaDescriptor sd, TransactionController tc, DataDescriptorGenerator ddg, TabInfoImpl ti, int indexNumber, long heapConglomerateNumber) throws StandardException {
boolean isUnique;
ConglomerateController cc;
ExecRow baseRow;
ExecIndexRow indexableRow;
int numColumns;
long conglomId;
RowLocation rl;
CatalogRowFactory rf = ti.getCatalogRowFactory();
IndexRowGenerator irg;
ConglomerateDescriptor conglomerateDescriptor;
initSystemIndexVariables(ti, indexNumber);
irg = ti.getIndexRowGenerator(indexNumber);
numColumns = ti.getIndexColumnCount(indexNumber);
/* Is the index unique */
isUnique = ti.isIndexUnique(indexNumber);
// create an index row template
indexableRow = irg.getIndexRowTemplate();
baseRow = rf.makeEmptyRowForCurrentVersion();
// Get a RowLocation template
cc = tc.openConglomerate(heapConglomerateNumber, false, 0, TransactionController.MODE_RECORD, TransactionController.ISOLATION_REPEATABLE_READ);
rl = cc.newRowLocationTemplate();
cc.close();
// Get an index row based on the base row
irg.getIndexRow(baseRow, rl, indexableRow, (FormatableBitSet) null);
// Describe the properties of the index to the store using Properties
// RESOLVE: The following properties assume a BTREE index.
Properties indexProperties = ti.getCreateIndexProperties(indexNumber);
// Tell it the conglomerate id of the base table
indexProperties.put("baseConglomerateId", Long.toString(heapConglomerateNumber));
// All indexes are unique because they contain the RowLocation.
// The number of uniqueness columns must include the RowLocation
// if the user did not specify a unique index.
indexProperties.put("nUniqueColumns", Integer.toString(isUnique ? numColumns : numColumns + 1));
// By convention, the row location column is the last column
indexProperties.put("rowLocationColumn", Integer.toString(numColumns));
// For now, all columns are key fields, including the RowLocation
indexProperties.put("nKeyFields", Integer.toString(numColumns + 1));
/* Create and add the conglomerate (index) */
conglomId = tc.createConglomerate(// we're requesting an index conglomerate
"BTREE", indexableRow.getRowArray(), // default sort order
null, // default collation id's for collumns in all system congloms
null, // default properties
indexProperties, // not temporary
TransactionController.IS_DEFAULT);
conglomerateDescriptor = ddg.newConglomerateDescriptor(conglomId, rf.getIndexName(indexNumber), true, irg, false, rf.getCanonicalIndexUUID(indexNumber), rf.getCanonicalTableUUID(), sd.getUUID());
ti.setIndexConglomerate(conglomerateDescriptor);
return conglomerateDescriptor;
}
use of org.apache.derby.iapi.sql.dictionary.IndexRowGenerator in project derby by apache.
the class DataDictionaryImpl method initSystemIndexVariables.
private void initSystemIndexVariables(TabInfoImpl ti, int indexNumber) throws StandardException {
int numCols = ti.getIndexColumnCount(indexNumber);
int[] baseColumnPositions = new int[numCols];
for (int colCtr = 0; colCtr < numCols; colCtr++) {
baseColumnPositions[colCtr] = ti.getBaseColumnPosition(indexNumber, colCtr);
}
boolean[] isAscending = new boolean[baseColumnPositions.length];
for (int i = 0; i < baseColumnPositions.length; i++) isAscending[i] = true;
IndexRowGenerator irg = null;
irg = new IndexRowGenerator("BTREE", ti.isIndexUnique(indexNumber), false, false, false, baseColumnPositions, isAscending, baseColumnPositions.length);
// For now, assume that all index columns are ordered columns
ti.setIndexRowGenerator(indexNumber, irg);
}
use of org.apache.derby.iapi.sql.dictionary.IndexRowGenerator in project derby by apache.
the class IndexStatisticsDaemonImpl method updateIndexStatsMinion.
/**
* Updates the index statistics for the given table and the specified
* indexes.
* <p>
* <strong>API note</strong>: Using {@code null} to update the statistics
* for all conglomerates is preferred over explicitly passing an array with
* all the conglomerates for the table. Doing so allows for some
* optimizations, and will cause a disposable statistics check to be
* performed.
*
* @param lcc language connection context used to perform the work
* @param td the table to update index stats for
* @param cds the conglomerates to update statistics for (non-index
* conglomerates will be ignored), {@code null} means all indexes
* @param asBackgroundTask whether the updates are done automatically as
* part of a background task or if explicitly invoked by the user
* @throws StandardException if something goes wrong
*/
private void updateIndexStatsMinion(LanguageConnectionContext lcc, TableDescriptor td, ConglomerateDescriptor[] cds, boolean asBackgroundTask) throws StandardException {
// can only properly identify disposable stats if cds == null,
// which means we are processing all indexes on the conglomerate.
final boolean identifyDisposableStats = (cds == null);
// Fetch descriptors if we're updating statistics for all indexes.
if (cds == null) {
cds = td.getConglomerateDescriptors();
}
// Extract/derive information from the table descriptor
long[] conglomerateNumber = new long[cds.length];
ExecIndexRow[] indexRow = new ExecIndexRow[cds.length];
TransactionController tc = lcc.getTransactionExecute();
ConglomerateController heapCC = tc.openConglomerate(td.getHeapConglomerateId(), false, 0, TransactionController.MODE_RECORD, asBackgroundTask ? TransactionController.ISOLATION_READ_UNCOMMITTED : TransactionController.ISOLATION_REPEATABLE_READ);
// create a list of indexes that should have statistics, by looking
// at all indexes on the conglomerate, and conditionally skipping
// unique single column indexes. This set is the "non disposable
// stat list".
UUID[] non_disposable_objectUUID = new UUID[cds.length];
try {
for (int i = 0; i < cds.length; i++) {
// Skip non-index conglomerates
if (!cds[i].isIndex()) {
conglomerateNumber[i] = -1;
continue;
}
IndexRowGenerator irg = cds[i].getIndexDescriptor();
// or we are running in soft-upgrade-mode on a pre 10.9 db.
if (skipDisposableStats) {
if (irg.isUnique() && irg.numberOfOrderedColumns() == 1) {
conglomerateNumber[i] = -1;
continue;
}
}
// at this point have found a stat for an existing
// index which is not a single column unique index, add it
// to the list of "non disposable stats"
conglomerateNumber[i] = cds[i].getConglomerateNumber();
non_disposable_objectUUID[i] = cds[i].getUUID();
indexRow[i] = irg.getNullIndexRow(td.getColumnDescriptorList(), heapCC.newRowLocationTemplate());
}
} finally {
heapCC.close();
}
if (identifyDisposableStats) {
// Note this loop is not controlled by the skipDisposableStats
// flag. The above loop controls if we drop single column unique
// index stats or not. In all cases we are going to drop
// stats with no associated index (orphaned stats).
List<StatisticsDescriptor> existingStats = td.getStatistics();
StatisticsDescriptor[] stats = (StatisticsDescriptor[]) existingStats.toArray(new StatisticsDescriptor[existingStats.size()]);
// those entries that don't have a matching conglomerate in the
for (int si = 0; si < stats.length; si++) {
UUID referencedIndex = stats[si].getReferenceID();
boolean isValid = false;
for (int ci = 0; ci < conglomerateNumber.length; ci++) {
if (referencedIndex.equals(non_disposable_objectUUID[ci])) {
isValid = true;
break;
}
}
// mechanism in case of another bug like DERBY-5681 in Derby.
if (!isValid) {
String msg = "dropping disposable statistics entry " + stats[si].getUUID() + " for index " + stats[si].getReferenceID() + " (cols=" + stats[si].getColumnCount() + ")";
logAlways(td, null, msg);
trace(1, msg + " on table " + stats[si].getTableUUID());
DataDictionary dd = lcc.getDataDictionary();
if (!lcc.dataDictionaryInWriteMode()) {
dd.startWriting(lcc);
}
dd.dropStatisticsDescriptors(td.getUUID(), stats[si].getReferenceID(), tc);
if (asBackgroundTask) {
lcc.internalCommit(true);
}
}
}
}
// [x][0] = conglomerate number, [x][1] = start time, [x][2] = stop time
long[][] scanTimes = new long[conglomerateNumber.length][3];
int sci = 0;
for (int indexNumber = 0; indexNumber < conglomerateNumber.length; indexNumber++) {
if (conglomerateNumber[indexNumber] == -1)
continue;
// Check if daemon has been disabled.
if (asBackgroundTask) {
if (isShuttingDown()) {
break;
}
}
scanTimes[sci][0] = conglomerateNumber[indexNumber];
scanTimes[sci][1] = System.currentTimeMillis();
// Subtract one for the RowLocation added for indexes.
int numCols = indexRow[indexNumber].nColumns() - 1;
long[] cardinality = new long[numCols];
KeyComparator cmp = new KeyComparator(indexRow[indexNumber]);
/* Read uncommitted, with record locking. Actually CS store may
not hold record locks */
GroupFetchScanController gsc = tc.openGroupFetchScan(conglomerateNumber[indexNumber], // hold
false, 0, // locking
TransactionController.MODE_RECORD, TransactionController.ISOLATION_READ_UNCOMMITTED, // scancolumnlist-- want everything.
null, // startkeyvalue-- start from the beginning.
null, 0, // qualifiers, none!
null, // stopkeyvalue,
null, 0);
try {
int rowsFetched = 0;
boolean giving_up_on_shutdown = false;
while ((rowsFetched = cmp.fetchRows(gsc)) > 0) {
// I/O that is processed as a convenient point.
if (asBackgroundTask) {
if (isShuttingDown()) {
giving_up_on_shutdown = true;
break;
}
}
for (int i = 0; i < rowsFetched; i++) {
int whichPositionChanged = cmp.compareWithPrevKey(i);
if (whichPositionChanged >= 0) {
for (int j = whichPositionChanged; j < numCols; j++) cardinality[j]++;
}
}
}
if (giving_up_on_shutdown)
break;
gsc.setEstimatedRowCount(cmp.getRowCount());
} finally // try
{
gsc.close();
gsc = null;
}
scanTimes[sci++][2] = System.currentTimeMillis();
// We have scanned the indexes, so let's give this a few attempts
// before giving up.
int retries = 0;
while (true) {
try {
writeUpdatedStats(lcc, td, non_disposable_objectUUID[indexNumber], cmp.getRowCount(), cardinality, asBackgroundTask);
break;
} catch (StandardException se) {
retries++;
if (se.isLockTimeout() && retries < 3) {
trace(2, "lock timeout when writing stats, retrying");
sleep(100 * retries);
} else {
// o too many lock timeouts
throw se;
}
}
}
}
log(asBackgroundTask, td, fmtScanTimes(scanTimes));
}
use of org.apache.derby.iapi.sql.dictionary.IndexRowGenerator in project derby by apache.
the class FromBaseTable method newResultColumns.
/**
* Create a new ResultColumnList to reflect the columns in the
* index described by the given ConglomerateDescriptor. The columns
* in the new ResultColumnList are based on the columns in the given
* ResultColumnList, which reflects the columns in the base table.
*
* @param oldColumns The original list of columns, which reflects
* the columns in the base table.
* @param idxCD The ConglomerateDescriptor, which describes
* the index that the new ResultColumnList will
* reflect.
* @param heapCD The ConglomerateDescriptor for the base heap
* @param cloneRCs Whether or not to clone the RCs
*
* @return A new ResultColumnList that reflects the columns in the index.
*
* @exception StandardException Thrown on error
*/
private ResultColumnList newResultColumns(ResultColumnList oldColumns, ConglomerateDescriptor idxCD, ConglomerateDescriptor heapCD, boolean cloneRCs) throws StandardException {
IndexRowGenerator irg = idxCD.getIndexDescriptor();
int[] baseCols = irg.baseColumnPositions();
ResultColumnList newCols = new ResultColumnList((getContextManager()));
for (int i = 0; i < baseCols.length; i++) {
int basePosition = baseCols[i];
ResultColumn oldCol = oldColumns.getResultColumn(basePosition);
ResultColumn newCol;
if (SanityManager.DEBUG) {
SanityManager.ASSERT(oldCol != null, "Couldn't find base column " + basePosition + "\n. RCL is\n" + oldColumns);
}
/* If we're cloning the RCs its because we are
* building an RCL for the index when doing
* a non-covering index scan. Set the expression
* for the old RC to be a VCN pointing to the
* new RC.
*/
if (cloneRCs) {
newCol = oldCol.cloneMe();
oldCol.setExpression(new VirtualColumnNode(this, newCol, oldCol.getVirtualColumnId(), getContextManager()));
} else {
newCol = oldCol;
}
newCols.addResultColumn(newCol);
}
/*
** The conglomerate is an index, so we need to generate a RowLocation
** as the last column of the result set. Notify the ResultColumnList
** that it needs to do this. Also tell the RCL whether this is
** the target of an update, so it can tell the conglomerate controller
** when it is getting the RowLocation template.
*/
newCols.setIndexRow(heapCD.getConglomerateNumber(), forUpdate());
return newCols;
}
use of org.apache.derby.iapi.sql.dictionary.IndexRowGenerator in project derby by apache.
the class FromBaseTable method estimateCost.
/**
* <p>
* Estimate the cost of scanning this {@code FromBaseTable} using the
* given predicate list with the given conglomerate.
* </p>
*
* <p>
* If the table contains little data, the cost estimate might be adjusted
* to make it more likely that an index scan will be preferred to a table
* scan, and a unique index will be preferred to a non-unique index. Even
* though such a plan may be slightly suboptimal when seen in isolation,
* using indexes, unique indexes in particular, needs fewer locks and
* allows more concurrency.
* </p>
*
* @see org.apache.derby.iapi.sql.compile.Optimizable#estimateCost
*
* @exception StandardException Thrown on error
*/
@Override
public CostEstimate estimateCost(OptimizablePredicateList predList, ConglomerateDescriptor cd, CostEstimate outerCost, Optimizer optimizer, RowOrdering rowOrdering) throws StandardException {
double cost;
boolean statisticsForTable = false;
boolean statisticsForConglomerate = false;
/* unknownPredicateList contains all predicates whose effect on
* cost/selectivity can't be calculated by the store.
*/
PredicateList unknownPredicateList = null;
if (optimizer.useStatistics() && predList != null) {
/* if user has specified that we don't use statistics,
pretend that statistics don't exist.
*/
statisticsForConglomerate = tableDescriptor.statisticsExist(cd);
statisticsForTable = tableDescriptor.statisticsExist(null);
unknownPredicateList = new PredicateList(getContextManager());
predList.copyPredicatesToOtherList(unknownPredicateList);
// their statistics need to get updated.
if (!hasCheckedIndexStats) {
hasCheckedIndexStats = true;
// daemon if that's the only index on the table.
if (qualifiesForStatisticsUpdateCheck(tableDescriptor)) {
tableDescriptor.markForIndexStatsUpdate(baseRowCount());
}
}
}
AccessPath currAccessPath = getCurrentAccessPath();
JoinStrategy currentJoinStrategy = currAccessPath.getJoinStrategy();
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceEstimatingCostOfConglomerate(cd, tableNumber);
}
/* Get the uniqueness factory for later use (see below) */
double tableUniquenessFactor = optimizer.uniqueJoinWithOuterTable(predList);
boolean oneRowResultSetForSomeConglom = isOneRowResultSet(predList);
/* Get the predicates that can be used for scanning the base table */
baseTableRestrictionList.removeAllElements();
currentJoinStrategy.getBasePredicates(predList, baseTableRestrictionList, this);
/* RESOLVE: Need to figure out how to cache the StoreCostController */
StoreCostController scc = getStoreCostController(cd);
CostEstimate costEst = getScratchCostEstimate(optimizer);
/* Does the conglomerate match at most one row? */
if (isOneRowResultSet(cd, baseTableRestrictionList)) {
/*
** Tell the RowOrdering that this optimizable is always ordered.
** It will figure out whether it is really always ordered in the
** context of the outer tables and their orderings.
*/
rowOrdering.optimizableAlwaysOrdered(this);
singleScanRowCount = 1.0;
/* Yes, the cost is to fetch exactly one row */
// RESOLVE: NEED TO FIGURE OUT HOW TO GET REFERENCED COLUMN LIST,
// FIELD STATES, AND ACCESS TYPE
cost = scc.getFetchFromFullKeyCost((FormatableBitSet) null, 0);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceSingleMatchedRowCost(cost, tableNumber);
}
costEst.setCost(cost, 1.0d, 1.0d);
/*
** Let the join strategy decide whether the cost of the base
** scan is a single scan, or a scan per outer row.
** NOTE: The multiplication should only be done against the
** total row count, not the singleScanRowCount.
*/
double newCost = costEst.getEstimatedCost();
if (currentJoinStrategy.multiplyBaseCostByOuterRows()) {
newCost *= outerCost.rowCount();
}
costEst.setCost(newCost, costEst.rowCount() * outerCost.rowCount(), costEst.singleScanRowCount());
/*
** Choose the lock mode. If the start/stop conditions are
** constant, choose row locking, because we will always match
** the same row. If they are not constant (i.e. they include
** a join), we decide whether to do row locking based on
** the total number of rows for the life of the query.
*/
boolean constantStartStop = true;
for (int i = 0; i < predList.size(); i++) {
OptimizablePredicate pred = predList.getOptPredicate(i);
/*
** The predicates are in index order, so the start and
** stop keys should be first.
*/
if (!(pred.isStartKey() || pred.isStopKey())) {
break;
}
/* Stop when we've found a join */
if (!pred.getReferencedMap().hasSingleBitSet()) {
constantStartStop = false;
break;
}
}
if (constantStartStop) {
currAccessPath.setLockMode(TransactionController.MODE_RECORD);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceConstantStartStopPositions();
}
} else {
setLockingBasedOnThreshold(optimizer, costEst.rowCount());
}
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostOfNScans(tableNumber, outerCost.rowCount(), costEst);
}
/* Add in cost of fetching base row for non-covering index */
if (cd.isIndex() && (!isCoveringIndex(cd))) {
double singleFetchCost = getBaseCostController().getFetchFromRowLocationCost((FormatableBitSet) null, 0);
// The estimated row count is always 1 here, although the
// index scan may actually return 0 rows, depending on whether
// or not the predicates match a key. It is assumed that a
// match is more likely than a miss, hence the row count is 1.
// Note (DERBY-6011): Alternative (non-unique) indexes may come
// up with row counts lower than 1 because they multiply with
// the selectivity, especially if the table is almost empty.
// This makes the optimizer prefer non-unique indexes if there
// are not so many rows in the table. We still want to use the
// unique index in that case, as the performance difference
// between the different scans on a small table is small, and
// the unique index is likely to lock fewer rows and reduce
// the chance of deadlocks. Therefore, we compensate by
// making the row count at least 1 for the non-unique index.
// See reference to DERBY-6011 further down in this method.
cost = singleFetchCost * costEst.rowCount();
costEst.setEstimatedCost(costEst.getEstimatedCost() + cost);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceNonCoveringIndexCost(cost, tableNumber);
}
}
} else {
/* Conglomerate might match more than one row */
/*
** Some predicates are good for start/stop, but we don't know
** the values they are being compared to at this time, so we
** estimate their selectivity in language rather than ask the
** store about them . The predicates on the first column of
** the conglomerate reduce the number of pages and rows scanned.
** The predicates on columns after the first reduce the number
** of rows scanned, but have a much smaller effect on the number
** of pages scanned, so we keep track of these selectivities in
** two separate variables: extraFirstColumnSelectivity and
** extraStartStopSelectivity. (Theoretically, we could try to
** figure out the effect of predicates after the first column
** on the number of pages scanned, but it's too hard, so we
** use these predicates only to reduce the estimated number of
** rows. For comparisons with known values, though, the store
** can figure out exactly how many rows and pages are scanned.)
**
** Other predicates are not good for start/stop. We keep track
** of their selectvities separately, because these limit the
** number of rows, but not the number of pages, and so need to
** be factored into the row count but not into the cost.
** These selectivities are factored into extraQualifierSelectivity.
**
** statStartStopSelectivity (using statistics) represents the
** selectivity of start/stop predicates that can be used to scan
** the index. If no statistics exist for the conglomerate then
** the value of this variable remains at 1.0
**
** statCompositeSelectivity (using statistics) represents the
** selectivity of all the predicates (including NonBaseTable
** predicates). This represents the most educated guess [among
** all the wild surmises in this routine] as to the number
** of rows that will be returned from this joinNode.
** If no statistics exist on the table or no statistics at all
** can be found to satisfy the predicates at this join opertor,
** then statCompositeSelectivity is left initialized at 1.0
*/
double extraFirstColumnSelectivity = 1.0d;
double extraStartStopSelectivity = 1.0d;
double extraQualifierSelectivity = 1.0d;
double extraNonQualifierSelectivity = 1.0d;
double statStartStopSelectivity = 1.0d;
double statCompositeSelectivity = 1.0d;
int numExtraFirstColumnPreds = 0;
int numExtraStartStopPreds = 0;
int numExtraQualifiers = 0;
int numExtraNonQualifiers = 0;
/*
** It is possible for something to be a start or stop predicate
** without it being possible to use it as a key for cost estimation.
** For example, with an index on (c1, c2), and the predicate
** c1 = othertable.c3 and c2 = 1, the comparison on c1 is with
** an unknown value, so we can't pass it to the store. This means
** we can't pass the comparison on c2 to the store, either.
**
** The following booleans keep track of whether we have seen
** gaps in the keys we can pass to the store.
*/
boolean startGap = false;
boolean stopGap = false;
boolean seenFirstColumn = false;
/*
** We need to figure out the number of rows touched to decide
** whether to use row locking or table locking. If the start/stop
** conditions are constant (i.e. no joins), the number of rows
** touched is the number of rows per scan. But if the start/stop
** conditions contain a join, the number of rows touched must
** take the number of outer rows into account.
*/
boolean constantStartStop = true;
boolean startStopFound = false;
/* Count the number of start and stop keys */
int startKeyNum = 0;
int stopKeyNum = 0;
OptimizablePredicate pred;
int predListSize;
if (predList != null)
predListSize = baseTableRestrictionList.size();
else
predListSize = 0;
int startStopPredCount = 0;
ColumnReference firstColumn = null;
for (int i = 0; i < predListSize; i++) {
pred = baseTableRestrictionList.getOptPredicate(i);
boolean startKey = pred.isStartKey();
boolean stopKey = pred.isStopKey();
if (startKey || stopKey) {
startStopFound = true;
if (!pred.getReferencedMap().hasSingleBitSet()) {
constantStartStop = false;
}
boolean knownConstant = pred.compareWithKnownConstant(this, true);
if (startKey) {
if (knownConstant && (!startGap)) {
startKeyNum++;
if (unknownPredicateList != null)
unknownPredicateList.removeOptPredicate(pred);
} else {
startGap = true;
}
}
if (stopKey) {
if (knownConstant && (!stopGap)) {
stopKeyNum++;
if (unknownPredicateList != null)
unknownPredicateList.removeOptPredicate(pred);
} else {
stopGap = true;
}
}
/* If either we are seeing startGap or stopGap because start/stop key is
* comparison with non-constant, we should multiply the selectivity to
* extraFirstColumnSelectivity. Beetle 4787.
*/
if (startGap || stopGap) {
// Don't include redundant join predicates in selectivity calculations
if (baseTableRestrictionList.isRedundantPredicate(i))
continue;
if (startKey && stopKey)
startStopPredCount++;
if (pred.getIndexPosition() == 0) {
extraFirstColumnSelectivity *= pred.selectivity(this);
if (!seenFirstColumn) {
ValueNode relNode = ((Predicate) pred).getAndNode().getLeftOperand();
if (relNode instanceof BinaryRelationalOperatorNode)
firstColumn = ((BinaryRelationalOperatorNode) relNode).getColumnOperand(this);
seenFirstColumn = true;
}
} else {
extraStartStopSelectivity *= pred.selectivity(this);
numExtraStartStopPreds++;
}
}
} else {
// Don't include redundant join predicates in selectivity calculations
if (baseTableRestrictionList.isRedundantPredicate(i)) {
continue;
}
/* If we have "like" predicate on the first index column, it is more likely
* to have a smaller range than "between", so we apply extra selectivity 0.2
* here. beetle 4387, 4787.
*/
if (pred instanceof Predicate) {
ValueNode leftOpnd = ((Predicate) pred).getAndNode().getLeftOperand();
if (firstColumn != null && leftOpnd instanceof LikeEscapeOperatorNode) {
LikeEscapeOperatorNode likeNode = (LikeEscapeOperatorNode) leftOpnd;
if (likeNode.getLeftOperand().requiresTypeFromContext()) {
ValueNode receiver = ((TernaryOperatorNode) likeNode).getReceiver();
if (receiver instanceof ColumnReference) {
ColumnReference cr = (ColumnReference) receiver;
if (cr.getTableNumber() == firstColumn.getTableNumber() && cr.getColumnNumber() == firstColumn.getColumnNumber())
extraFirstColumnSelectivity *= 0.2;
}
}
}
}
if (pred.isQualifier()) {
extraQualifierSelectivity *= pred.selectivity(this);
numExtraQualifiers++;
} else {
extraNonQualifierSelectivity *= pred.selectivity(this);
numExtraNonQualifiers++;
}
/*
** Strictly speaking, it shouldn't be necessary to
** indicate a gap here, since there should be no more
** start/stop predicates, but let's do it, anyway.
*/
startGap = true;
stopGap = true;
}
}
if (unknownPredicateList != null) {
statCompositeSelectivity = unknownPredicateList.selectivity(this);
if (statCompositeSelectivity == -1.0d)
statCompositeSelectivity = 1.0d;
}
if (seenFirstColumn && (startStopPredCount > 0)) {
if (statisticsForConglomerate) {
statStartStopSelectivity = tableDescriptor.selectivityForConglomerate(cd, startStopPredCount);
} else if (cd.isIndex()) {
// DERBY-3790 (Investigate if request for update
// statistics can be skipped for certain kind of
// indexes, one instance may be unique indexes based
// on one column.) But as found in DERBY-6045 (in list
// multi-probe by primary key not chosen on tables with
// >256 rows), even though we do not keep the
// statistics for single-column unique indexes, we
// should improve the selectivity of such an index
// when the index is being considered by the optimizer.
IndexRowGenerator irg = cd.getIndexDescriptor();
if (irg.isUnique() && irg.numberOfOrderedColumns() == 1 && startStopPredCount == 1) {
statStartStopSelectivity = (1 / (double) baseRowCount());
}
}
}
/*
** Factor the non-base-table predicates into the extra
** non-qualifier selectivity, since these will restrict the
** number of rows, but not the cost.
*/
extraNonQualifierSelectivity *= currentJoinStrategy.nonBasePredicateSelectivity(this, predList);
/* Create the start and stop key arrays, and fill them in */
DataValueDescriptor[] startKeys;
DataValueDescriptor[] stopKeys;
if (startKeyNum > 0)
startKeys = new DataValueDescriptor[startKeyNum];
else
startKeys = null;
if (stopKeyNum > 0)
stopKeys = new DataValueDescriptor[stopKeyNum];
else
stopKeys = null;
startKeyNum = 0;
stopKeyNum = 0;
startGap = false;
stopGap = false;
/* If we have a probe predicate that is being used as a start/stop
* key then ssKeySourceInList will hold the InListOperatorNode
* from which the probe predicate was built.
*/
InListOperatorNode ssKeySourceInList = null;
for (int i = 0; i < predListSize; i++) {
pred = baseTableRestrictionList.getOptPredicate(i);
boolean startKey = pred.isStartKey();
boolean stopKey = pred.isStopKey();
if (startKey || stopKey) {
/* A probe predicate is only useful if it can be used as
* as a start/stop key for _first_ column in an index
* (i.e. if the column position is 0). That said, we only
* allow a single start/stop key per column position in
* the index (see PredicateList.orderUsefulPredicates()).
* Those two facts combined mean that we should never have
* more than one probe predicate start/stop key for a given
* conglomerate.
*/
if (SanityManager.DEBUG) {
if ((ssKeySourceInList != null) && ((Predicate) pred).isInListProbePredicate()) {
SanityManager.THROWASSERT("Found multiple probe predicate start/stop keys" + " for conglomerate '" + cd.getConglomerateName() + "' when at most one was expected.");
}
}
/* By passing "true" in the next line we indicate that we
* should only retrieve the underlying InListOpNode *if*
* the predicate is a "probe predicate".
*/
ssKeySourceInList = ((Predicate) pred).getSourceInList(true);
boolean knownConstant = pred.compareWithKnownConstant(this, true);
if (startKey) {
if (knownConstant && (!startGap)) {
startKeys[startKeyNum] = pred.getCompareValue(this);
startKeyNum++;
} else {
startGap = true;
}
}
if (stopKey) {
if (knownConstant && (!stopGap)) {
stopKeys[stopKeyNum] = pred.getCompareValue(this);
stopKeyNum++;
} else {
stopGap = true;
}
}
} else {
startGap = true;
stopGap = true;
}
}
int startOperator;
int stopOperator;
if (baseTableRestrictionList != null) {
startOperator = baseTableRestrictionList.startOperator(this);
stopOperator = baseTableRestrictionList.stopOperator(this);
} else {
/*
** If we're doing a full scan, it doesn't matter what the
** start and stop operators are.
*/
startOperator = ScanController.NA;
stopOperator = ScanController.NA;
}
/*
** Get a row template for this conglomerate. For now, just tell
** it we are using all the columns in the row.
*/
DataValueDescriptor[] rowTemplate = getRowTemplate(cd, getBaseCostController());
/* we prefer index than table scan for concurrency reason, by a small
* adjustment on estimated row count. This affects optimizer's decision
* especially when few rows are in table. beetle 5006. This makes sense
* since the plan may stay long before we actually check and invalidate it.
* And new rows may be inserted before we check and invalidate the plan.
* Here we only prefer index that has start/stop key from predicates. Non-
* constant start/stop key case is taken care of by selectivity later.
*/
long baseRC = (startKeys != null || stopKeys != null) ? baseRowCount() : baseRowCount() + 5;
scc.getScanCost(currentJoinStrategy.scanCostType(), baseRC, 1, forUpdate(), (FormatableBitSet) null, rowTemplate, startKeys, startOperator, stopKeys, stopOperator, false, 0, costEst);
/* initialPositionCost is the first part of the index scan cost we get above.
* It's the cost of initial positioning/fetch of key. So it's unrelated to
* row count of how many rows we fetch from index. We extract it here so that
* we only multiply selectivity to the other part of index scan cost, which is
* nearly linear, to make cost calculation more accurate and fair, especially
* compared to the plan of "one row result set" (unique index). beetle 4787.
*/
double initialPositionCost = 0.0;
if (cd.isIndex()) {
initialPositionCost = scc.getFetchFromFullKeyCost((FormatableBitSet) null, 0);
/* oneRowResultSetForSomeConglom means there's a unique index, but certainly
* not this one since we are here. If store knows this non-unique index
* won't return any row or just returns one row (eg., the predicate is a
* comparison with constant or almost empty table), we do minor adjustment
* on cost (affecting decision for covering index) and rc (decision for
* non-covering). The purpose is favoring unique index. beetle 5006.
*/
if (oneRowResultSetForSomeConglom && costEst.rowCount() <= 1) {
costEst.setCost(costEst.getEstimatedCost() * 2, costEst.rowCount() + 2, costEst.singleScanRowCount() + 2);
}
}
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostOfConglomerateScan(tableNumber, cd, costEst, numExtraFirstColumnPreds, extraFirstColumnSelectivity, numExtraStartStopPreds, extraStartStopSelectivity, startStopPredCount, statStartStopSelectivity, numExtraQualifiers, extraQualifierSelectivity, numExtraNonQualifiers, extraNonQualifierSelectivity);
}
/* initial row count is the row count without applying
any predicates-- we use this at the end of the routine
when we use statistics to recompute the row count.
*/
double initialRowCount = costEst.rowCount();
if (statStartStopSelectivity != 1.0d) {
/*
** If statistics exist use the selectivity computed
** from the statistics to calculate the cost.
** NOTE: we apply this selectivity to the cost as well
** as both the row counts. In the absence of statistics
** we only applied the FirstColumnSelectivity to the
** cost.
*/
costEst.setCost(scanCostAfterSelectivity(costEst.getEstimatedCost(), initialPositionCost, statStartStopSelectivity, oneRowResultSetForSomeConglom), costEst.rowCount() * statStartStopSelectivity, costEst.singleScanRowCount() * statStartStopSelectivity);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingStatsForIndex(costEst, tableNumber);
}
} else {
/*
** Factor in the extra selectivity on the first column
** of the conglomerate (see comment above).
** NOTE: In this case we want to apply the selectivity to both
** the total row count and singleScanRowCount.
*/
if (extraFirstColumnSelectivity != 1.0d) {
costEst.setCost(scanCostAfterSelectivity(costEst.getEstimatedCost(), initialPositionCost, extraFirstColumnSelectivity, oneRowResultSetForSomeConglom), costEst.rowCount() * extraFirstColumnSelectivity, costEst.singleScanRowCount() * extraFirstColumnSelectivity);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingExtra1stColumnSelectivity(costEst, tableNumber);
}
}
/* Factor in the extra start/stop selectivity (see comment above).
* NOTE: In this case we want to apply the selectivity to both
* the row count and singleScanRowCount.
*/
if (extraStartStopSelectivity != 1.0d) {
costEst.setCost(costEst.getEstimatedCost(), costEst.rowCount() * extraStartStopSelectivity, costEst.singleScanRowCount() * extraStartStopSelectivity);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingExtraStartStop(costEst, tableNumber);
}
}
}
/* If the start and stop key came from an IN-list "probe predicate"
* then we need to adjust the cost estimate. The probe predicate
* is of the form "col = ?" and we currently have the estimated
* cost of probing the index a single time for "?". But with an
* IN-list we don't just probe the index once; we're going to
* probe it once for every value in the IN-list. And we are going
* to potentially return an additional row (or set of rows) for
* each probe. To account for this "multi-probing" we take the
* costEstimate and multiply each of its fields by the size of
* the IN-list.
*
* Note: If the IN-list has duplicate values then this simple
* multiplication could give us an elevated cost (because we
* only probe the index for each *non-duplicate* value in the
* IN-list). But for now, we're saying that's okay.
*/
if (ssKeySourceInList != null) {
int listSize = ssKeySourceInList.getRightOperandList().size();
double rc = costEst.rowCount() * listSize;
double ssrc = costEst.singleScanRowCount() * listSize;
/* If multiplication by listSize returns more rows than are
* in the scan then just use the number of rows in the scan.
*/
costEst.setCost(costEst.getEstimatedCost() * listSize, rc > initialRowCount ? initialRowCount : rc, ssrc > initialRowCount ? initialRowCount : ssrc);
}
/*
** Figure out whether to do row locking or table locking.
**
** If there are no start/stop predicates, we're doing full
** conglomerate scans, so do table locking.
*/
if (!startStopFound) {
currAccessPath.setLockMode(TransactionController.MODE_TABLE);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceNoStartStopPosition();
}
} else {
/*
** Figure out the number of rows touched. If all the
** start/stop predicates are constant, the number of
** rows touched is the number of rows per scan.
** This is also true for join strategies that scan the
** inner table only once (like hash join) - we can
** tell if we have one of those, because
** multiplyBaseCostByOuterRows() will return false.
*/
double rowsTouched = costEst.rowCount();
if ((!constantStartStop) && currentJoinStrategy.multiplyBaseCostByOuterRows()) {
/*
** This is a join where the inner table is scanned
** more than once, so we have to take the number
** of outer rows into account. The formula for this
** works out as follows:
**
** total rows in table = r
** number of rows touched per scan = s
** number of outer rows = o
** proportion of rows touched per scan = s / r
** proportion of rows not touched per scan =
** 1 - (s / r)
** proportion of rows not touched for all scans =
** (1 - (s / r)) ** o
** proportion of rows touched for all scans =
** 1 - ((1 - (s / r)) ** o)
** total rows touched for all scans =
** r * (1 - ((1 - (s / r)) ** o))
**
** In doing these calculations, we must be careful not
** to divide by zero. This could happen if there are
** no rows in the table. In this case, let's do table
** locking.
*/
double r = baseRowCount();
if (r > 0.0) {
double s = costEst.rowCount();
double o = outerCost.rowCount();
double pRowsNotTouchedPerScan = 1.0 - (s / r);
double pRowsNotTouchedAllScans = Math.pow(pRowsNotTouchedPerScan, o);
double pRowsTouchedAllScans = 1.0 - pRowsNotTouchedAllScans;
double rowsTouchedAllScans = r * pRowsTouchedAllScans;
rowsTouched = rowsTouchedAllScans;
} else {
/* See comments in setLockingBasedOnThreshold */
rowsTouched = optimizer.tableLockThreshold() + 1;
}
}
setLockingBasedOnThreshold(optimizer, rowsTouched);
}
/*
** If the index isn't covering, add the cost of getting the
** base row. Only apply extraFirstColumnSelectivity and extraStartStopSelectivity
** before we do this, don't apply extraQualifierSelectivity etc. The
** reason is that the row count here should be the number of index rows
** (and hence heap rows) we get, and we need to fetch all those rows, even
** though later on some of them may be filtered out by other predicates.
** beetle 4787.
*/
if (cd.isIndex() && (!isCoveringIndex(cd))) {
double singleFetchCost = getBaseCostController().getFetchFromRowLocationCost((FormatableBitSet) null, 0);
// The number of rows we expect to fetch from the base table.
double rowsToFetch = costEst.rowCount();
if (oneRowResultSetForSomeConglom) {
// DERBY-6011: We know that there is a unique index, and
// that there are predicates that guarantee that at most
// one row will be fetched from the unique index. The
// unique alternative always has 1 as estimated row count
// (see reference to DERBY-6011 further up in this method),
// even though it could actually return 0 rows.
//
// If the alternative that's being considered here has
// expected row count less than 1, it is going to have
// lower estimated cost for fetching base rows. We prefer
// unique indexes, as they lock fewer rows and allow more
// concurrency. Therefore, make sure the cost estimate for
// this alternative includes at least fetching one row from
// the base table.
rowsToFetch = Math.max(1.0d, rowsToFetch);
}
cost = singleFetchCost * rowsToFetch;
costEst.setEstimatedCost(costEst.getEstimatedCost() + cost);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostOfNoncoveringIndex(costEst, tableNumber);
}
}
/* Factor in the extra qualifier selectivity (see comment above).
* NOTE: In this case we want to apply the selectivity to both
* the row count and singleScanRowCount.
*/
if (extraQualifierSelectivity != 1.0d) {
costEst.setCost(costEst.getEstimatedCost(), costEst.rowCount() * extraQualifierSelectivity, costEst.singleScanRowCount() * extraQualifierSelectivity);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingExtraQualifierSelectivity(costEst, tableNumber);
}
}
singleScanRowCount = costEst.singleScanRowCount();
/*
** Let the join strategy decide whether the cost of the base
** scan is a single scan, or a scan per outer row.
** NOTE: In this case we only want to multiply against the
** total row count, not the singleScanRowCount.
** NOTE: Do not multiply row count if we determined that
** conglomerate is a 1 row result set when costing nested
** loop. (eg, we will find at most 1 match when probing
** the hash table.)
*/
double newCost = costEst.getEstimatedCost();
double rowCnt = costEst.rowCount();
/*
** RESOLVE - If there is a unique index on the joining
** columns, the number of matching rows will equal the
** number of outer rows, even if we're not considering the
** unique index for this access path. To figure that out,
** however, would require an analysis phase at the beginning
** of optimization. So, we'll always multiply the number
** of outer rows by the number of rows per scan. This will
** give us a higher than actual row count when there is
** such a unique index, which will bias the optimizer toward
** using the unique index. This is probably OK most of the
** time, since the optimizer would probably choose the
** unique index, anyway. But it would be better if the
** optimizer set the row count properly in this case.
*/
if (currentJoinStrategy.multiplyBaseCostByOuterRows()) {
newCost *= outerCost.rowCount();
}
rowCnt *= outerCost.rowCount();
initialRowCount *= outerCost.rowCount();
/*
** If this table can generate at most one row per scan,
** the maximum row count is the number of outer rows.
** NOTE: This does not completely take care of the RESOLVE
** in the above comment, since it will only notice
** one-row result sets for the current join order.
*/
if (oneRowResultSetForSomeConglom) {
if (outerCost.rowCount() < rowCnt) {
rowCnt = outerCost.rowCount();
}
}
/*
** The estimated cost may be too high for indexes, if the
** estimated row count exceeds the maximum. Only do this
** if we're not doing a full scan, and the start/stop position
** is not constant (i.e. we're doing a join on the first column
** of the index) - the reason being that this is when the
** cost may be inaccurate.
*/
if (cd.isIndex() && startStopFound && (!constantStartStop)) {
/*
** Does any table outer to this one have a unique key on
** a subset of the joining columns? If so, the maximum number
** of rows that this table can return is the number of rows
** in this table times the number of times the maximum number
** of times each key can be repeated.
*/
double scanUniquenessFactor = optimizer.uniqueJoinWithOuterTable(baseTableRestrictionList);
if (scanUniquenessFactor > 0.0) {
/*
** A positive uniqueness factor means there is a unique
** outer join key. The value is the reciprocal of the
** maximum number of duplicates for each unique key
** (the duplicates can be caused by other joining tables).
*/
double maxRows = ((double) baseRowCount()) / scanUniquenessFactor;
if (rowCnt > maxRows) {
/*
** The estimated row count is too high. Adjust the
** estimated cost downwards proportionately to
** match the maximum number of rows.
*/
newCost *= (maxRows / rowCnt);
}
}
}
/* The estimated total row count may be too high */
if (tableUniquenessFactor > 0.0) {
/*
** A positive uniqueness factor means there is a unique outer
** join key. The value is the reciprocal of the maximum number
** of duplicates for each unique key (the duplicates can be
** caused by other joining tables).
*/
double maxRows = ((double) baseRowCount()) / tableUniquenessFactor;
if (rowCnt > maxRows) {
/*
** The estimated row count is too high. Set it to the
** maximum row count.
*/
rowCnt = maxRows;
}
}
costEst.setCost(newCost, rowCnt, costEst.singleScanRowCount());
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostOfNScans(tableNumber, outerCost.rowCount(), costEst);
}
/*
** Now figure in the cost of the non-qualifier predicates.
** existsBaseTables have a row count of 1
*/
double rc = -1, src = -1;
if (existsBaseTable)
rc = src = 1;
else // beetle 4787
if (extraNonQualifierSelectivity != 1.0d) {
rc = oneRowResultSetForSomeConglom ? costEst.rowCount() : costEst.rowCount() * extraNonQualifierSelectivity;
src = costEst.singleScanRowCount() * extraNonQualifierSelectivity;
}
if (// changed
rc != -1) {
costEst.setCost(costEst.getEstimatedCost(), rc, src);
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingExtraNonQualifierSelectivity(costEst, tableNumber);
}
}
recomputeRowCount: if (statisticsForTable && !oneRowResultSetForSomeConglom && (statCompositeSelectivity != 1.0d)) {
/* if we have statistics we should use statistics to calculate
row count-- if it has been determined that this table
returns one row for some conglomerate then there is no need
to do this recalculation
*/
double compositeStatRC = initialRowCount * statCompositeSelectivity;
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCompositeSelectivityFromStatistics(statCompositeSelectivity);
}
if (tableUniquenessFactor > 0.0) {
/* If the row count from the composite statistics
comes up more than what the table uniqueness
factor indicates then lets stick with the current
row count.
*/
if (compositeStatRC > (baseRowCount() * tableUniquenessFactor)) {
break recomputeRowCount;
}
}
/* set the row count and the single scan row count
to the initialRowCount. initialRowCount is the product
of the RC from store * RC of the outerCost.
Thus RC = initialRowCount * the selectivity from stats.
SingleRC = RC / outerCost.rowCount().
*/
costEst.setCost(costEst.getEstimatedCost(), compositeStatRC, (existsBaseTable) ? 1 : compositeStatRC / outerCost.rowCount());
if (optimizerTracingIsOn()) {
getOptimizerTracer().traceCostIncludingCompositeSelectivityFromStats(costEst, tableNumber);
}
}
}
/* Put the base predicates back in the predicate list */
currentJoinStrategy.putBasePredicates(predList, baseTableRestrictionList);
return costEst;
}
Aggregations