use of org.apache.phoenix.schema.PColumn in project phoenix by apache.
the class QueryOptimizer method orderPlansBestToWorst.
/**
* Order the plans among all the possible ones from best to worst.
* If option COST_BASED_OPTIMIZER_ENABLED is on and stats are available, we order the plans based on
* their costs, otherwise we use the following simple algorithm:
* 1) If the query is a point lookup (i.e. we have a set of exact row keys), choose that one immediately.
* 2) If the query has an ORDER BY and a LIMIT, choose the plan that has all the ORDER BY expression
* in the same order as the row key columns.
* 3) If there are more than one plan that meets (1&2), choose the plan with:
* a) the most row key columns that may be used to form the start/stop scan key (i.e. bound slots).
* b) the plan that preserves ordering for a group by.
* c) the non local index table plan
* @param plans the list of candidate plans
* @return list of plans ordered from best to worst.
*/
private List<QueryPlan> orderPlansBestToWorst(SelectStatement select, List<QueryPlan> plans, boolean stopAtBestPlan) {
final QueryPlan dataPlan = plans.get(0);
if (plans.size() == 1) {
return plans;
}
if (this.costBased) {
Collections.sort(plans, new Comparator<QueryPlan>() {
@Override
public int compare(QueryPlan plan1, QueryPlan plan2) {
return plan1.getCost().compareTo(plan2.getCost());
}
});
// back to static ordering.
if (!plans.get(0).getCost().isUnknown()) {
return stopAtBestPlan ? plans.subList(0, 1) : plans;
}
}
/**
* If we have a plan(s) that are just point lookups (i.e. fully qualified row
* keys), then favor those first.
*/
List<QueryPlan> candidates = Lists.newArrayListWithExpectedSize(plans.size());
if (stopAtBestPlan) {
// If we're stopping at the best plan, only consider point lookups if there are any
for (QueryPlan plan : plans) {
if (plan.getContext().getScanRanges().isPointLookup()) {
candidates.add(plan);
}
}
} else {
candidates.addAll(plans);
}
/**
* If we have a plan(s) that removes the order by, choose from among these,
* as this is typically the most expensive operation. Once we have stats, if
* there's a limit on the query, we might choose a different plan. For example
* if the limit was a very large number and the combination of applying other
* filters on the row key are estimated to choose fewer rows, we'd choose that
* one.
*/
List<QueryPlan> stillCandidates = plans;
List<QueryPlan> bestCandidates = candidates;
if (!candidates.isEmpty()) {
stillCandidates = candidates;
bestCandidates = Lists.<QueryPlan>newArrayListWithExpectedSize(candidates.size());
}
for (QueryPlan plan : stillCandidates) {
// If ORDER BY optimized out (or not present at all)
if (plan.getOrderBy().getOrderByExpressions().isEmpty()) {
bestCandidates.add(plan);
}
}
if (bestCandidates.isEmpty()) {
bestCandidates.addAll(stillCandidates);
}
int nViewConstants = 0;
PTable dataTable = dataPlan.getTableRef().getTable();
if (dataTable.getType() == PTableType.VIEW) {
for (PColumn column : dataTable.getColumns()) {
if (column.getViewConstant() != null) {
nViewConstants++;
}
}
}
final int boundRanges = nViewConstants;
final boolean useDataOverIndexHint = select.getHint().hasHint(Hint.USE_DATA_OVER_INDEX_TABLE);
final int comparisonOfDataVersusIndexTable = useDataOverIndexHint ? -1 : 1;
Collections.sort(bestCandidates, new Comparator<QueryPlan>() {
@Override
public int compare(QueryPlan plan1, QueryPlan plan2) {
PTable table1 = plan1.getTableRef().getTable();
PTable table2 = plan2.getTableRef().getTable();
int boundCount1 = plan1.getContext().getScanRanges().getBoundPkColumnCount();
int boundCount2 = plan2.getContext().getScanRanges().getBoundPkColumnCount();
// For shared indexes (i.e. indexes on views and local indexes),
// a) add back any view constants as these won't be in the index, and
// b) ignore the viewIndexId which will be part of the row key columns.
int c = (boundCount2 + (table2.getViewIndexId() == null ? 0 : (boundRanges - 1))) - (boundCount1 + (table1.getViewIndexId() == null ? 0 : (boundRanges - 1)));
if (c != 0)
return c;
if (plan1.getGroupBy() != null && plan2.getGroupBy() != null) {
if (plan1.getGroupBy().isOrderPreserving() != plan2.getGroupBy().isOrderPreserving()) {
return plan1.getGroupBy().isOrderPreserving() ? -1 : 1;
}
}
// Use smaller table (table with fewest kv columns)
if (!useDataOverIndexHint || (table1.getType() == PTableType.INDEX && table2.getType() == PTableType.INDEX)) {
c = (table1.getColumns().size() - table1.getPKColumns().size()) - (table2.getColumns().size() - table2.getPKColumns().size());
if (c != 0)
return c;
}
if (table1.getIndexType() == IndexType.LOCAL && table2.getIndexType() != IndexType.LOCAL) {
return plan1.getContext().getScanRanges().getRanges().isEmpty() ? -1 : 1;
}
if (table2.getIndexType() == IndexType.LOCAL && table1.getIndexType() != IndexType.LOCAL) {
return plan2.getContext().getScanRanges().getRanges().isEmpty() ? 1 : -1;
}
if (table1.getType() == PTableType.INDEX && table2.getType() != PTableType.INDEX) {
return -comparisonOfDataVersusIndexTable;
}
if (table2.getType() == PTableType.INDEX && table1.getType() != PTableType.INDEX) {
return comparisonOfDataVersusIndexTable;
}
return 0;
}
});
return stopAtBestPlan ? bestCandidates.subList(0, 1) : bestCandidates;
}
use of org.apache.phoenix.schema.PColumn in project phoenix by apache.
the class QueryOptimizer method addPlan.
private QueryPlan addPlan(PhoenixStatement statement, SelectStatement select, PTable index, List<? extends PDatum> targetColumns, ParallelIteratorFactory parallelIteratorFactory, QueryPlan dataPlan, boolean isHinted) throws SQLException {
int nColumns = dataPlan.getProjector().getColumnCount();
String tableAlias = dataPlan.getTableRef().getTableAlias();
// double quote in case it's case sensitive
String alias = tableAlias == null ? null : '"' + tableAlias + '"';
String schemaName = index.getParentSchemaName().getString();
schemaName = schemaName.length() == 0 ? null : '"' + schemaName + '"';
String tableName = '"' + index.getTableName().getString() + '"';
TableNode table = FACTORY.namedTable(alias, FACTORY.table(schemaName, tableName), select.getTableSamplingRate());
SelectStatement indexSelect = FACTORY.select(select, table);
ColumnResolver resolver = FromCompiler.getResolverForQuery(indexSelect, statement.getConnection());
// We will or will not do tuple projection according to the data plan.
boolean isProjected = dataPlan.getContext().getResolver().getTables().get(0).getTable().getType() == PTableType.PROJECTED;
// Check index state of now potentially updated index table to make sure it's active
TableRef indexTableRef = resolver.getTables().get(0);
PTable indexTable = indexTableRef.getTable();
PIndexState indexState = indexTable.getIndexState();
Map<TableRef, QueryPlan> dataPlans = Collections.singletonMap(indexTableRef, dataPlan);
if (indexState == PIndexState.ACTIVE || indexState == PIndexState.PENDING_ACTIVE || (indexState == PIndexState.PENDING_DISABLE && isUnderPendingDisableThreshold(indexTableRef.getCurrentTime(), indexTable.getIndexDisableTimestamp()))) {
try {
// translate nodes that match expressions that are indexed to the associated column parse node
indexSelect = ParseNodeRewriter.rewrite(indexSelect, new IndexExpressionParseNodeRewriter(index, null, statement.getConnection(), indexSelect.getUdfParseNodes()));
QueryCompiler compiler = new QueryCompiler(statement, indexSelect, resolver, targetColumns, parallelIteratorFactory, dataPlan.getContext().getSequenceManager(), isProjected, true, dataPlans);
QueryPlan plan = compiler.compile();
// then we can use the index even the query doesn't have where clause.
if (index.getIndexType() == IndexType.LOCAL && indexSelect.getWhere() == null && !plan.getContext().getDataColumns().isEmpty()) {
return null;
}
indexTableRef = plan.getTableRef();
indexTable = indexTableRef.getTable();
indexState = indexTable.getIndexState();
// must contain all columns from the data table to be able to be used.
if (indexState == PIndexState.ACTIVE || indexState == PIndexState.PENDING_ACTIVE || (indexState == PIndexState.PENDING_DISABLE && isUnderPendingDisableThreshold(indexTableRef.getCurrentTime(), indexTable.getIndexDisableTimestamp()))) {
if (plan.getProjector().getColumnCount() == nColumns) {
return plan;
} else if (index.getIndexType() == IndexType.GLOBAL) {
String schemaNameStr = index.getSchemaName() == null ? null : index.getSchemaName().getString();
String tableNameStr = index.getTableName() == null ? null : index.getTableName().getString();
throw new ColumnNotFoundException(schemaNameStr, tableNameStr, null, "*");
}
}
} catch (ColumnNotFoundException e) {
/* Means that a column is being used that's not in our index.
* Since we currently don't keep stats, we don't know the selectivity of the index.
* For now, if this is a hinted plan, we will try rewriting the query as a subquery;
* otherwise we just don't use this index (as opposed to trying to join back from
* the index table to the data table.
*/
SelectStatement dataSelect = (SelectStatement) dataPlan.getStatement();
ParseNode where = dataSelect.getWhere();
if (isHinted && where != null) {
StatementContext context = new StatementContext(statement, resolver);
WhereConditionRewriter whereRewriter = new WhereConditionRewriter(FromCompiler.getResolver(dataPlan.getTableRef()), context);
where = where.accept(whereRewriter);
if (where != null) {
PTable dataTable = dataPlan.getTableRef().getTable();
List<PColumn> pkColumns = dataTable.getPKColumns();
List<AliasedNode> aliasedNodes = Lists.<AliasedNode>newArrayListWithExpectedSize(pkColumns.size());
List<ParseNode> nodes = Lists.<ParseNode>newArrayListWithExpectedSize(pkColumns.size());
boolean isSalted = dataTable.getBucketNum() != null;
boolean isTenantSpecific = dataTable.isMultiTenant() && statement.getConnection().getTenantId() != null;
int posOffset = (isSalted ? 1 : 0) + (isTenantSpecific ? 1 : 0);
for (int i = posOffset; i < pkColumns.size(); i++) {
PColumn column = pkColumns.get(i);
String indexColName = IndexUtil.getIndexColumnName(column);
ParseNode indexColNode = new ColumnParseNode(null, '"' + indexColName + '"', indexColName);
PDataType indexColType = IndexUtil.getIndexColumnDataType(column);
PDataType dataColType = column.getDataType();
if (indexColType != dataColType) {
indexColNode = FACTORY.cast(indexColNode, dataColType, null, null);
}
aliasedNodes.add(FACTORY.aliasedNode(null, indexColNode));
nodes.add(new ColumnParseNode(null, '"' + column.getName().getString() + '"'));
}
SelectStatement innerSelect = FACTORY.select(indexSelect.getFrom(), indexSelect.getHint(), false, aliasedNodes, where, null, null, null, null, null, indexSelect.getBindCount(), false, indexSelect.hasSequence(), Collections.<SelectStatement>emptyList(), indexSelect.getUdfParseNodes());
ParseNode outerWhere = FACTORY.in(nodes.size() == 1 ? nodes.get(0) : FACTORY.rowValueConstructor(nodes), FACTORY.subquery(innerSelect, false), false, true);
ParseNode extractedCondition = whereRewriter.getExtractedCondition();
if (extractedCondition != null) {
outerWhere = FACTORY.and(Lists.newArrayList(outerWhere, extractedCondition));
}
HintNode hint = HintNode.combine(HintNode.subtract(indexSelect.getHint(), new Hint[] { Hint.INDEX, Hint.NO_CHILD_PARENT_JOIN_OPTIMIZATION }), FACTORY.hint("NO_INDEX"));
SelectStatement query = FACTORY.select(dataSelect, hint, outerWhere);
ColumnResolver queryResolver = FromCompiler.getResolverForQuery(query, statement.getConnection());
query = SubqueryRewriter.transform(query, queryResolver, statement.getConnection());
queryResolver = FromCompiler.getResolverForQuery(query, statement.getConnection());
query = StatementNormalizer.normalize(query, queryResolver);
QueryPlan plan = new QueryCompiler(statement, query, queryResolver, targetColumns, parallelIteratorFactory, dataPlan.getContext().getSequenceManager(), isProjected, true, dataPlans).compile();
return plan;
}
}
}
}
return null;
}
use of org.apache.phoenix.schema.PColumn in project phoenix by apache.
the class PArrayDataTypeEncoder method getEstimatedByteSize.
/**
* @param colValueMap map from column to value
* @return estimated encoded size
*/
public static int getEstimatedByteSize(PTable table, int rowLength, Map<PColumn, byte[]> colValueMap) {
// iterate over column familiies
int rowSize = 0;
for (PColumnFamily family : table.getColumnFamilies()) {
Collection<PColumn> columns = family.getColumns();
// we add a non null value to the start so that we can represent absent values in the array with negative offsets
int numColumns = columns.size() + 1;
int cellSize = 1;
int nulls = 0;
int maxOffset = 0;
// iterate over columns
for (PColumn column : columns) {
if (colValueMap.containsKey(column)) {
byte[] colValue = colValueMap.get(column);
// the column value is null
if (colValue == null || colValue.length == 0) {
++nulls;
maxOffset = cellSize;
} else {
// count the bytes written to serialize nulls
if (nulls > 0) {
cellSize += (1 + Math.ceil(nulls / 255));
nulls = 0;
}
maxOffset = cellSize;
cellSize += colValue.length;
}
} else // the column value is absent
{
++nulls;
maxOffset = cellSize;
}
}
// count the bytes used for the offset array
cellSize += PArrayDataType.useShortForOffsetArray(maxOffset, PArrayDataType.IMMUTABLE_SERIALIZATION_VERSION) ? numColumns * Bytes.SIZEOF_SHORT : numColumns * Bytes.SIZEOF_INT;
cellSize += 4;
// count the bytes used for header information
cellSize += 5;
// add the size of the single cell containing all column values
rowSize += KeyValue.getKeyValueDataStructureSize(rowLength, family.getName().getBytes().length, QueryConstants.SINGLE_KEYVALUE_COLUMN_QUALIFIER_BYTES.length, cellSize);
}
return rowSize;
}
use of org.apache.phoenix.schema.PColumn in project phoenix by apache.
the class SchemaUtil method estimateRowSize.
/**
* Imperfect estimate of row size given a PTable
* TODO: keep row count in stats table and use total size / row count instead
* @param table
* @return estimate of size in bytes of a row
*/
public static long estimateRowSize(PTable table) {
int keyLength = estimateKeyLength(table);
long rowSize = 0;
for (PColumn column : table.getColumns()) {
if (!SchemaUtil.isPKColumn(column)) {
PDataType type = column.getDataType();
Integer maxLength = column.getMaxLength();
int valueLength = !type.isFixedWidth() ? VAR_KV_LENGTH_ESTIMATE : maxLength == null ? type.getByteSize() : maxLength;
rowSize += KeyValue.getKeyValueDataStructureSize(keyLength, column.getFamilyName().getBytes().length, column.getName().getBytes().length, valueLength);
}
}
byte[] emptyKeyValueKV = EncodedColumnsUtil.getEmptyKeyValueInfo(table).getFirst();
// Empty key value
rowSize += KeyValue.getKeyValueDataStructureSize(keyLength, getEmptyColumnFamily(table).length, emptyKeyValueKV.length, 0);
return rowSize;
}
use of org.apache.phoenix.schema.PColumn in project phoenix by apache.
the class PhoenixRuntime method getPkColsDataTypesForSql.
/**
* @param columns - Initialized empty list to be filled with the pairs of column family name and column name for columns that are used
* as row key for the query plan. Column family names are optional and hence the first part of the pair is nullable.
* Column names and family names are enclosed in double quotes to allow for case sensitivity and for presence of
* special characters. Salting column and view index id column are not included. If the connection is tenant specific
* and the table used by the query plan is multi-tenant, then the tenant id column is not included as well.
* @param dataTypes - Initialized empty list to be filled with the corresponding data type for the columns in @param columns.
* @param plan - query plan to get info for
* @param conn - phoenix connection used to generate the query plan. Caller should take care of closing the connection appropriately.
* @param forDataTable - if true, then column names and data types correspond to the data table even if the query plan uses
* the secondary index table. If false, and if the query plan uses the secondary index table, then the column names and data
* types correspond to the index table.
* @throws SQLException
*/
@Deprecated
public static void getPkColsDataTypesForSql(List<Pair<String, String>> columns, List<String> dataTypes, QueryPlan plan, Connection conn, boolean forDataTable) throws SQLException {
checkNotNull(columns);
checkNotNull(dataTypes);
checkNotNull(plan);
checkNotNull(conn);
List<PColumn> pkColumns = getPkColumns(plan.getTableRef().getTable(), conn, forDataTable);
String columnName;
String familyName;
for (PColumn pCol : pkColumns) {
String sqlTypeName = getSqlTypeName(pCol);
dataTypes.add(sqlTypeName);
columnName = addQuotes(pCol.getName().getString());
familyName = pCol.getFamilyName() != null ? addQuotes(pCol.getFamilyName().getString()) : null;
columns.add(new Pair<String, String>(familyName, columnName));
}
}
Aggregations