Search in sources :

Example 91 with PColumn

use of org.apache.phoenix.schema.PColumn in project phoenix by apache.

the class QueryOptimizer method orderPlansBestToWorst.

 * Order the plans among all the possible ones from best to worst.
 * If option COST_BASED_OPTIMIZER_ENABLED is on and stats are available, we order the plans based on
 * their costs, otherwise we use the following simple algorithm:
 * 1) If the query is a point lookup (i.e. we have a set of exact row keys), choose that one immediately.
 * 2) If the query has an ORDER BY and a LIMIT, choose the plan that has all the ORDER BY expression
 * in the same order as the row key columns.
 * 3) If there are more than one plan that meets (1&2), choose the plan with:
 *    a) the most row key columns that may be used to form the start/stop scan key (i.e. bound slots).
 *    b) the plan that preserves ordering for a group by.
 *    c) the non local index table plan
 * @param plans the list of candidate plans
 * @return list of plans ordered from best to worst.
private List<QueryPlan> orderPlansBestToWorst(SelectStatement select, List<QueryPlan> plans, boolean stopAtBestPlan) {
    final QueryPlan dataPlan = plans.get(0);
    if (plans.size() == 1) {
        return plans;
    if (this.costBased) {
        Collections.sort(plans, new Comparator<QueryPlan>() {

            public int compare(QueryPlan plan1, QueryPlan plan2) {
                return plan1.getCost().compareTo(plan2.getCost());
        // back to static ordering.
        if (!plans.get(0).getCost().isUnknown()) {
            return stopAtBestPlan ? plans.subList(0, 1) : plans;
     * If we have a plan(s) that are just point lookups (i.e. fully qualified row
     * keys), then favor those first.
    List<QueryPlan> candidates = Lists.newArrayListWithExpectedSize(plans.size());
    if (stopAtBestPlan) {
        // If we're stopping at the best plan, only consider point lookups if there are any
        for (QueryPlan plan : plans) {
            if (plan.getContext().getScanRanges().isPointLookup()) {
    } else {
     * If we have a plan(s) that removes the order by, choose from among these,
     * as this is typically the most expensive operation. Once we have stats, if
     * there's a limit on the query, we might choose a different plan. For example
     * if the limit was a very large number and the combination of applying other
     * filters on the row key are estimated to choose fewer rows, we'd choose that
     * one.
    List<QueryPlan> stillCandidates = plans;
    List<QueryPlan> bestCandidates = candidates;
    if (!candidates.isEmpty()) {
        stillCandidates = candidates;
        bestCandidates = Lists.<QueryPlan>newArrayListWithExpectedSize(candidates.size());
    for (QueryPlan plan : stillCandidates) {
        // If ORDER BY optimized out (or not present at all)
        if (plan.getOrderBy().getOrderByExpressions().isEmpty()) {
    if (bestCandidates.isEmpty()) {
    int nViewConstants = 0;
    PTable dataTable = dataPlan.getTableRef().getTable();
    if (dataTable.getType() == PTableType.VIEW) {
        for (PColumn column : dataTable.getColumns()) {
            if (column.getViewConstant() != null) {
    final int boundRanges = nViewConstants;
    final boolean useDataOverIndexHint = select.getHint().hasHint(Hint.USE_DATA_OVER_INDEX_TABLE);
    final int comparisonOfDataVersusIndexTable = useDataOverIndexHint ? -1 : 1;
    Collections.sort(bestCandidates, new Comparator<QueryPlan>() {

        public int compare(QueryPlan plan1, QueryPlan plan2) {
            PTable table1 = plan1.getTableRef().getTable();
            PTable table2 = plan2.getTableRef().getTable();
            int boundCount1 = plan1.getContext().getScanRanges().getBoundPkColumnCount();
            int boundCount2 = plan2.getContext().getScanRanges().getBoundPkColumnCount();
            // For shared indexes (i.e. indexes on views and local indexes),
            // a) add back any view constants as these won't be in the index, and
            // b) ignore the viewIndexId which will be part of the row key columns.
            int c = (boundCount2 + (table2.getViewIndexId() == null ? 0 : (boundRanges - 1))) - (boundCount1 + (table1.getViewIndexId() == null ? 0 : (boundRanges - 1)));
            if (c != 0)
                return c;
            if (plan1.getGroupBy() != null && plan2.getGroupBy() != null) {
                if (plan1.getGroupBy().isOrderPreserving() != plan2.getGroupBy().isOrderPreserving()) {
                    return plan1.getGroupBy().isOrderPreserving() ? -1 : 1;
            // Use smaller table (table with fewest kv columns)
            if (!useDataOverIndexHint || (table1.getType() == PTableType.INDEX && table2.getType() == PTableType.INDEX)) {
                c = (table1.getColumns().size() - table1.getPKColumns().size()) - (table2.getColumns().size() - table2.getPKColumns().size());
                if (c != 0)
                    return c;
            if (table1.getIndexType() == IndexType.LOCAL && table2.getIndexType() != IndexType.LOCAL) {
                return plan1.getContext().getScanRanges().getRanges().isEmpty() ? -1 : 1;
            if (table2.getIndexType() == IndexType.LOCAL && table1.getIndexType() != IndexType.LOCAL) {
                return plan2.getContext().getScanRanges().getRanges().isEmpty() ? 1 : -1;
            if (table1.getType() == PTableType.INDEX && table2.getType() != PTableType.INDEX) {
                return -comparisonOfDataVersusIndexTable;
            if (table2.getType() == PTableType.INDEX && table1.getType() != PTableType.INDEX) {
                return comparisonOfDataVersusIndexTable;
            return 0;
    return stopAtBestPlan ? bestCandidates.subList(0, 1) : bestCandidates;
Also used : PColumn(org.apache.phoenix.schema.PColumn) BaseQueryPlan(org.apache.phoenix.execute.BaseQueryPlan) QueryPlan(org.apache.phoenix.compile.QueryPlan) Hint(org.apache.phoenix.parse.HintNode.Hint) PTable(org.apache.phoenix.schema.PTable)

Example 92 with PColumn

use of org.apache.phoenix.schema.PColumn in project phoenix by apache.

the class QueryOptimizer method addPlan.

private QueryPlan addPlan(PhoenixStatement statement, SelectStatement select, PTable index, List<? extends PDatum> targetColumns, ParallelIteratorFactory parallelIteratorFactory, QueryPlan dataPlan, boolean isHinted) throws SQLException {
    int nColumns = dataPlan.getProjector().getColumnCount();
    String tableAlias = dataPlan.getTableRef().getTableAlias();
    // double quote in case it's case sensitive
    String alias = tableAlias == null ? null : '"' + tableAlias + '"';
    String schemaName = index.getParentSchemaName().getString();
    schemaName = schemaName.length() == 0 ? null : '"' + schemaName + '"';
    String tableName = '"' + index.getTableName().getString() + '"';
    TableNode table = FACTORY.namedTable(alias, FACTORY.table(schemaName, tableName), select.getTableSamplingRate());
    SelectStatement indexSelect =, table);
    ColumnResolver resolver = FromCompiler.getResolverForQuery(indexSelect, statement.getConnection());
    // We will or will not do tuple projection according to the data plan.
    boolean isProjected = dataPlan.getContext().getResolver().getTables().get(0).getTable().getType() == PTableType.PROJECTED;
    // Check index state of now potentially updated index table to make sure it's active
    TableRef indexTableRef = resolver.getTables().get(0);
    PTable indexTable = indexTableRef.getTable();
    PIndexState indexState = indexTable.getIndexState();
    Map<TableRef, QueryPlan> dataPlans = Collections.singletonMap(indexTableRef, dataPlan);
    if (indexState == PIndexState.ACTIVE || indexState == PIndexState.PENDING_ACTIVE || (indexState == PIndexState.PENDING_DISABLE && isUnderPendingDisableThreshold(indexTableRef.getCurrentTime(), indexTable.getIndexDisableTimestamp()))) {
        try {
            // translate nodes that match expressions that are indexed to the associated column parse node
            indexSelect = ParseNodeRewriter.rewrite(indexSelect, new IndexExpressionParseNodeRewriter(index, null, statement.getConnection(), indexSelect.getUdfParseNodes()));
            QueryCompiler compiler = new QueryCompiler(statement, indexSelect, resolver, targetColumns, parallelIteratorFactory, dataPlan.getContext().getSequenceManager(), isProjected, true, dataPlans);
            QueryPlan plan = compiler.compile();
            // then we can use the index even the query doesn't have where clause.
            if (index.getIndexType() == IndexType.LOCAL && indexSelect.getWhere() == null && !plan.getContext().getDataColumns().isEmpty()) {
                return null;
            indexTableRef = plan.getTableRef();
            indexTable = indexTableRef.getTable();
            indexState = indexTable.getIndexState();
            // must contain all columns from the data table to be able to be used.
            if (indexState == PIndexState.ACTIVE || indexState == PIndexState.PENDING_ACTIVE || (indexState == PIndexState.PENDING_DISABLE && isUnderPendingDisableThreshold(indexTableRef.getCurrentTime(), indexTable.getIndexDisableTimestamp()))) {
                if (plan.getProjector().getColumnCount() == nColumns) {
                    return plan;
                } else if (index.getIndexType() == IndexType.GLOBAL) {
                    String schemaNameStr = index.getSchemaName() == null ? null : index.getSchemaName().getString();
                    String tableNameStr = index.getTableName() == null ? null : index.getTableName().getString();
                    throw new ColumnNotFoundException(schemaNameStr, tableNameStr, null, "*");
        } catch (ColumnNotFoundException e) {
            /* Means that a column is being used that's not in our index.
                 * Since we currently don't keep stats, we don't know the selectivity of the index.
                 * For now, if this is a hinted plan, we will try rewriting the query as a subquery;
                 * otherwise we just don't use this index (as opposed to trying to join back from
                 * the index table to the data table.
            SelectStatement dataSelect = (SelectStatement) dataPlan.getStatement();
            ParseNode where = dataSelect.getWhere();
            if (isHinted && where != null) {
                StatementContext context = new StatementContext(statement, resolver);
                WhereConditionRewriter whereRewriter = new WhereConditionRewriter(FromCompiler.getResolver(dataPlan.getTableRef()), context);
                where = where.accept(whereRewriter);
                if (where != null) {
                    PTable dataTable = dataPlan.getTableRef().getTable();
                    List<PColumn> pkColumns = dataTable.getPKColumns();
                    List<AliasedNode> aliasedNodes = Lists.<AliasedNode>newArrayListWithExpectedSize(pkColumns.size());
                    List<ParseNode> nodes = Lists.<ParseNode>newArrayListWithExpectedSize(pkColumns.size());
                    boolean isSalted = dataTable.getBucketNum() != null;
                    boolean isTenantSpecific = dataTable.isMultiTenant() && statement.getConnection().getTenantId() != null;
                    int posOffset = (isSalted ? 1 : 0) + (isTenantSpecific ? 1 : 0);
                    for (int i = posOffset; i < pkColumns.size(); i++) {
                        PColumn column = pkColumns.get(i);
                        String indexColName = IndexUtil.getIndexColumnName(column);
                        ParseNode indexColNode = new ColumnParseNode(null, '"' + indexColName + '"', indexColName);
                        PDataType indexColType = IndexUtil.getIndexColumnDataType(column);
                        PDataType dataColType = column.getDataType();
                        if (indexColType != dataColType) {
                            indexColNode = FACTORY.cast(indexColNode, dataColType, null, null);
                        aliasedNodes.add(FACTORY.aliasedNode(null, indexColNode));
                        nodes.add(new ColumnParseNode(null, '"' + column.getName().getString() + '"'));
                    SelectStatement innerSelect =, indexSelect.getHint(), false, aliasedNodes, where, null, null, null, null, null, indexSelect.getBindCount(), false, indexSelect.hasSequence(), Collections.<SelectStatement>emptyList(), indexSelect.getUdfParseNodes());
                    ParseNode outerWhere = == 1 ? nodes.get(0) : FACTORY.rowValueConstructor(nodes), FACTORY.subquery(innerSelect, false), false, true);
                    ParseNode extractedCondition = whereRewriter.getExtractedCondition();
                    if (extractedCondition != null) {
                        outerWhere = FACTORY.and(Lists.newArrayList(outerWhere, extractedCondition));
                    HintNode hint = HintNode.combine(HintNode.subtract(indexSelect.getHint(), new Hint[] { Hint.INDEX, Hint.NO_CHILD_PARENT_JOIN_OPTIMIZATION }), FACTORY.hint("NO_INDEX"));
                    SelectStatement query =, hint, outerWhere);
                    ColumnResolver queryResolver = FromCompiler.getResolverForQuery(query, statement.getConnection());
                    query = SubqueryRewriter.transform(query, queryResolver, statement.getConnection());
                    queryResolver = FromCompiler.getResolverForQuery(query, statement.getConnection());
                    query = StatementNormalizer.normalize(query, queryResolver);
                    QueryPlan plan = new QueryCompiler(statement, query, queryResolver, targetColumns, parallelIteratorFactory, dataPlan.getContext().getSequenceManager(), isProjected, true, dataPlans).compile();
                    return plan;
    return null;
Also used : PIndexState(org.apache.phoenix.schema.PIndexState) BaseQueryPlan(org.apache.phoenix.execute.BaseQueryPlan) QueryPlan(org.apache.phoenix.compile.QueryPlan) QueryCompiler(org.apache.phoenix.compile.QueryCompiler) Hint(org.apache.phoenix.parse.HintNode.Hint) PTable(org.apache.phoenix.schema.PTable) StatementContext(org.apache.phoenix.compile.StatementContext) PColumn(org.apache.phoenix.schema.PColumn) SelectStatement(org.apache.phoenix.parse.SelectStatement) ColumnNotFoundException(org.apache.phoenix.schema.ColumnNotFoundException) PDataType(org.apache.phoenix.schema.types.PDataType) ColumnParseNode(org.apache.phoenix.parse.ColumnParseNode) HintNode(org.apache.phoenix.parse.HintNode) TableNode(org.apache.phoenix.parse.TableNode) JoinTableNode(org.apache.phoenix.parse.JoinTableNode) NamedTableNode(org.apache.phoenix.parse.NamedTableNode) BindTableNode(org.apache.phoenix.parse.BindTableNode) DerivedTableNode(org.apache.phoenix.parse.DerivedTableNode) ColumnParseNode(org.apache.phoenix.parse.ColumnParseNode) AndParseNode(org.apache.phoenix.parse.AndParseNode) ParseNode(org.apache.phoenix.parse.ParseNode) List(java.util.List) IndexExpressionParseNodeRewriter(org.apache.phoenix.parse.IndexExpressionParseNodeRewriter) ColumnResolver(org.apache.phoenix.compile.ColumnResolver) TableRef(org.apache.phoenix.schema.TableRef)

Example 93 with PColumn

use of org.apache.phoenix.schema.PColumn in project phoenix by apache.

the class PArrayDataTypeEncoder method getEstimatedByteSize.

 * @param colValueMap map from column to value
 * @return estimated encoded size
public static int getEstimatedByteSize(PTable table, int rowLength, Map<PColumn, byte[]> colValueMap) {
    // iterate over column familiies
    int rowSize = 0;
    for (PColumnFamily family : table.getColumnFamilies()) {
        Collection<PColumn> columns = family.getColumns();
        // we add a non null value to the start so that we can represent absent values in the array with negative offsets
        int numColumns = columns.size() + 1;
        int cellSize = 1;
        int nulls = 0;
        int maxOffset = 0;
        // iterate over columns
        for (PColumn column : columns) {
            if (colValueMap.containsKey(column)) {
                byte[] colValue = colValueMap.get(column);
                // the column value is null
                if (colValue == null || colValue.length == 0) {
                    maxOffset = cellSize;
                } else {
                    // count the bytes written to serialize nulls
                    if (nulls > 0) {
                        cellSize += (1 + Math.ceil(nulls / 255));
                        nulls = 0;
                    maxOffset = cellSize;
                    cellSize += colValue.length;
            } else // the column value is absent
                maxOffset = cellSize;
        // count the bytes used for the offset array
        cellSize += PArrayDataType.useShortForOffsetArray(maxOffset, PArrayDataType.IMMUTABLE_SERIALIZATION_VERSION) ? numColumns * Bytes.SIZEOF_SHORT : numColumns * Bytes.SIZEOF_INT;
        cellSize += 4;
        // count the bytes used for header information
        cellSize += 5;
        // add the size of the single cell containing all column values
        rowSize += KeyValue.getKeyValueDataStructureSize(rowLength, family.getName().getBytes().length, QueryConstants.SINGLE_KEYVALUE_COLUMN_QUALIFIER_BYTES.length, cellSize);
    return rowSize;
Also used : PColumn(org.apache.phoenix.schema.PColumn) PColumnFamily(org.apache.phoenix.schema.PColumnFamily)

Example 94 with PColumn

use of org.apache.phoenix.schema.PColumn in project phoenix by apache.

the class SchemaUtil method estimateRowSize.

 * Imperfect estimate of row size given a PTable
 * TODO: keep row count in stats table and use total size / row count instead
 * @param table
 * @return estimate of size in bytes of a row
public static long estimateRowSize(PTable table) {
    int keyLength = estimateKeyLength(table);
    long rowSize = 0;
    for (PColumn column : table.getColumns()) {
        if (!SchemaUtil.isPKColumn(column)) {
            PDataType type = column.getDataType();
            Integer maxLength = column.getMaxLength();
            int valueLength = !type.isFixedWidth() ? VAR_KV_LENGTH_ESTIMATE : maxLength == null ? type.getByteSize() : maxLength;
            rowSize += KeyValue.getKeyValueDataStructureSize(keyLength, column.getFamilyName().getBytes().length, column.getName().getBytes().length, valueLength);
    byte[] emptyKeyValueKV = EncodedColumnsUtil.getEmptyKeyValueInfo(table).getFirst();
    // Empty key value
    rowSize += KeyValue.getKeyValueDataStructureSize(keyLength, getEmptyColumnFamily(table).length, emptyKeyValueKV.length, 0);
    return rowSize;
Also used : PColumn(org.apache.phoenix.schema.PColumn) PDataType(org.apache.phoenix.schema.types.PDataType)

Example 95 with PColumn

use of org.apache.phoenix.schema.PColumn in project phoenix by apache.

the class PhoenixRuntime method getPkColsDataTypesForSql.

 * @param columns - Initialized empty list to be filled with the pairs of column family name and column name for columns that are used
 * as row key for the query plan. Column family names are optional and hence the first part of the pair is nullable.
 * Column names and family names are enclosed in double quotes to allow for case sensitivity and for presence of
 * special characters. Salting column and view index id column are not included. If the connection is tenant specific
 * and the table used by the query plan is multi-tenant, then the tenant id column is not included as well.
 * @param dataTypes - Initialized empty list to be filled with the corresponding data type for the columns in @param columns.
 * @param plan - query plan to get info for
 * @param conn - phoenix connection used to generate the query plan. Caller should take care of closing the connection appropriately.
 * @param forDataTable - if true, then column names and data types correspond to the data table even if the query plan uses
 * the secondary index table. If false, and if the query plan uses the secondary index table, then the column names and data
 * types correspond to the index table.
 * @throws SQLException
public static void getPkColsDataTypesForSql(List<Pair<String, String>> columns, List<String> dataTypes, QueryPlan plan, Connection conn, boolean forDataTable) throws SQLException {
    List<PColumn> pkColumns = getPkColumns(plan.getTableRef().getTable(), conn, forDataTable);
    String columnName;
    String familyName;
    for (PColumn pCol : pkColumns) {
        String sqlTypeName = getSqlTypeName(pCol);
        columnName = addQuotes(pCol.getName().getString());
        familyName = pCol.getFamilyName() != null ? addQuotes(pCol.getFamilyName().getString()) : null;
        columns.add(new Pair<String, String>(familyName, columnName));
Also used : PColumn(org.apache.phoenix.schema.PColumn)


PColumn (org.apache.phoenix.schema.PColumn)101 PTable (org.apache.phoenix.schema.PTable)59 PhoenixConnection (org.apache.phoenix.jdbc.PhoenixConnection)26 Expression (org.apache.phoenix.expression.Expression)21 TableRef (org.apache.phoenix.schema.TableRef)20 ArrayList (java.util.ArrayList)19 PName (org.apache.phoenix.schema.PName)18 ImmutableBytesWritable ( LiteralExpression (org.apache.phoenix.expression.LiteralExpression)17 ImmutableBytesPtr (org.apache.phoenix.hbase.index.util.ImmutableBytesPtr)17 ColumnRef (org.apache.phoenix.schema.ColumnRef)17 Hint (org.apache.phoenix.parse.HintNode.Hint)14 PTableKey (org.apache.phoenix.schema.PTableKey)14 ColumnNotFoundException (org.apache.phoenix.schema.ColumnNotFoundException)13 PColumnFamily (org.apache.phoenix.schema.PColumnFamily)13 PSmallint (org.apache.phoenix.schema.types.PSmallint)13 SQLException (java.sql.SQLException)12 ProjectedColumnExpression (org.apache.phoenix.expression.ProjectedColumnExpression)12 PColumnImpl (org.apache.phoenix.schema.PColumnImpl)12 Map (java.util.Map)11