Search in sources :

Example 1 with ImmutableBytesPtr

use of org.apache.phoenix.hbase.index.util.ImmutableBytesPtr in project phoenix by apache.

the class QueryCompiler method compileJoinQuery.

/*
     * Call compileJoinQuery() for join queries recursively down to the leaf JoinTable nodes.
     * This matches the input JoinTable node against patterns in the following order:
     * 1. A (leaf JoinTable node, which can be a named table reference or a subquery of any kind.)
     *    Returns the compilation result of a single table scan or of an independent subquery.
     * 2. Matching either of (when hint USE_SORT_MERGE_JOIN not specified):
     *        1) A LEFT/INNER JOIN B
     *        2) A LEFT/INNER JOIN B (LEFT/INNER JOIN C)+, if hint NO_STAR_JOIN not specified
     *        where A can be a named table reference or a flat subquery, and B, C, ... can be a named
     *        table reference, a sub-join or a subquery of any kind.
     *    Returns a HashJoinPlan{scan: A, hash: B, C, ...}.
     * 3. Matching pattern:
     *        A RIGHT/INNER JOIN B (when hint USE_SORT_MERGE_JOIN not specified)
     *        where B can be a named table reference or a flat subquery, and A can be a named table
     *        reference, a sub-join or a subquery of any kind.
     *    Returns a HashJoinPlan{scan: B, hash: A}.
     *    NOTE that "A LEFT/RIGHT/INNER/FULL JOIN B RIGHT/INNER JOIN C" is viewed as
     *    "(A LEFT/RIGHT/INNER/FULL JOIN B) RIGHT/INNER JOIN C" here, which means the left part in the
     *    parenthesis is considered a sub-join.
     *    viewed as a sub-join.
     * 4. All the rest that do not qualify for previous patterns or conditions, including FULL joins.
     *    Returns a SortMergeJoinPlan, the sorting part of which is pushed down to the JoinTable nodes
     *    of both sides as order-by clauses.
     * NOTE that SEMI or ANTI joins are treated the same way as LEFT joins in JoinTable pattern matching.
     *    
     * If no join algorithm hint is provided, according to the above compilation process, a join query 
     * plan can probably consist of both HashJoinPlan and SortMergeJoinPlan which may enclose each other.
     * TODO 1) Use table statistics to guide the choice of join plans.
     *      2) Make it possible to hint a certain join algorithm for a specific join step.
     */
@SuppressWarnings("unchecked")
protected QueryPlan compileJoinQuery(StatementContext context, List<Object> binds, JoinTable joinTable, boolean asSubquery, boolean projectPKColumns, List<OrderByNode> orderBy) throws SQLException {
    byte[] emptyByteArray = new byte[0];
    List<JoinSpec> joinSpecs = joinTable.getJoinSpecs();
    if (joinSpecs.isEmpty()) {
        Table table = joinTable.getTable();
        SelectStatement subquery = table.getAsSubquery(orderBy);
        if (!table.isSubselect()) {
            context.setCurrentTable(table.getTableRef());
            PTable projectedTable = table.createProjectedTable(!projectPKColumns, context);
            TupleProjector projector = new TupleProjector(projectedTable);
            TupleProjector.serializeProjectorIntoScan(context.getScan(), projector);
            context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, context.getConnection(), subquery.getUdfParseNodes()));
            table.projectColumns(context.getScan());
            return compileSingleFlatQuery(context, subquery, binds, asSubquery, !asSubquery, null, projectPKColumns ? projector : null, true);
        }
        QueryPlan plan = compileSubquery(subquery, false);
        PTable projectedTable = table.createProjectedTable(plan.getProjector());
        context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, context.getConnection(), subquery.getUdfParseNodes()));
        return new TupleProjectionPlan(plan, new TupleProjector(plan.getProjector()), table.compilePostFilterExpression(context));
    }
    boolean[] starJoinVector;
    if (!this.useSortMergeJoin && (starJoinVector = joinTable.getStarJoinVector()) != null) {
        Table table = joinTable.getTable();
        PTable initialProjectedTable;
        TableRef tableRef;
        SelectStatement query;
        TupleProjector tupleProjector;
        if (!table.isSubselect()) {
            context.setCurrentTable(table.getTableRef());
            initialProjectedTable = table.createProjectedTable(!projectPKColumns, context);
            tableRef = table.getTableRef();
            table.projectColumns(context.getScan());
            query = joinTable.getAsSingleSubquery(table.getAsSubquery(orderBy), asSubquery);
            tupleProjector = new TupleProjector(initialProjectedTable);
        } else {
            SelectStatement subquery = table.getAsSubquery(orderBy);
            QueryPlan plan = compileSubquery(subquery, false);
            initialProjectedTable = table.createProjectedTable(plan.getProjector());
            tableRef = plan.getTableRef();
            context.getScan().setFamilyMap(plan.getContext().getScan().getFamilyMap());
            query = joinTable.getAsSingleSubquery((SelectStatement) plan.getStatement(), asSubquery);
            tupleProjector = new TupleProjector(plan.getProjector());
        }
        context.setCurrentTable(tableRef);
        PTable projectedTable = initialProjectedTable;
        int count = joinSpecs.size();
        ImmutableBytesPtr[] joinIds = new ImmutableBytesPtr[count];
        List<Expression>[] joinExpressions = new List[count];
        JoinType[] joinTypes = new JoinType[count];
        PTable[] tables = new PTable[count];
        int[] fieldPositions = new int[count];
        StatementContext[] subContexts = new StatementContext[count];
        QueryPlan[] subPlans = new QueryPlan[count];
        HashSubPlan[] hashPlans = new HashSubPlan[count];
        fieldPositions[0] = projectedTable.getColumns().size() - projectedTable.getPKColumns().size();
        for (int i = 0; i < count; i++) {
            JoinSpec joinSpec = joinSpecs.get(i);
            Scan subScan = ScanUtil.newScan(originalScan);
            subContexts[i] = new StatementContext(statement, context.getResolver(), subScan, new SequenceManager(statement));
            subPlans[i] = compileJoinQuery(subContexts[i], binds, joinSpec.getJoinTable(), true, true, null);
            boolean hasPostReference = joinSpec.getJoinTable().hasPostReference();
            if (hasPostReference) {
                tables[i] = subContexts[i].getResolver().getTables().get(0).getTable();
                projectedTable = JoinCompiler.joinProjectedTables(projectedTable, tables[i], joinSpec.getType());
            } else {
                tables[i] = null;
            }
        }
        for (int i = 0; i < count; i++) {
            JoinSpec joinSpec = joinSpecs.get(i);
            context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, context.getConnection(), query.getUdfParseNodes()));
            // place-holder
            joinIds[i] = new ImmutableBytesPtr(emptyByteArray);
            Pair<List<Expression>, List<Expression>> joinConditions = joinSpec.compileJoinConditions(context, subContexts[i], true);
            joinExpressions[i] = joinConditions.getFirst();
            List<Expression> hashExpressions = joinConditions.getSecond();
            Pair<Expression, Expression> keyRangeExpressions = new Pair<Expression, Expression>(null, null);
            boolean optimized = getKeyExpressionCombinations(keyRangeExpressions, context, joinTable.getStatement(), tableRef, joinSpec.getType(), joinExpressions[i], hashExpressions);
            Expression keyRangeLhsExpression = keyRangeExpressions.getFirst();
            Expression keyRangeRhsExpression = keyRangeExpressions.getSecond();
            joinTypes[i] = joinSpec.getType();
            if (i < count - 1) {
                fieldPositions[i + 1] = fieldPositions[i] + (tables[i] == null ? 0 : (tables[i].getColumns().size() - tables[i].getPKColumns().size()));
            }
            hashPlans[i] = new HashSubPlan(i, subPlans[i], optimized ? null : hashExpressions, joinSpec.isSingleValueOnly(), keyRangeLhsExpression, keyRangeRhsExpression);
        }
        TupleProjector.serializeProjectorIntoScan(context.getScan(), tupleProjector);
        QueryPlan plan = compileSingleFlatQuery(context, query, binds, asSubquery, !asSubquery && joinTable.isAllLeftJoin(), null, !table.isSubselect() && projectPKColumns ? tupleProjector : null, true);
        Expression postJoinFilterExpression = joinTable.compilePostFilterExpression(context, table);
        Integer limit = null;
        Integer offset = null;
        if (!query.isAggregate() && !query.isDistinct() && query.getOrderBy().isEmpty()) {
            limit = plan.getLimit();
            offset = plan.getOffset();
        }
        HashJoinInfo joinInfo = new HashJoinInfo(projectedTable, joinIds, joinExpressions, joinTypes, starJoinVector, tables, fieldPositions, postJoinFilterExpression, QueryUtil.getOffsetLimit(limit, offset));
        return HashJoinPlan.create(joinTable.getStatement(), plan, joinInfo, hashPlans);
    }
    JoinSpec lastJoinSpec = joinSpecs.get(joinSpecs.size() - 1);
    JoinType type = lastJoinSpec.getType();
    if (!this.useSortMergeJoin && (type == JoinType.Right || type == JoinType.Inner) && lastJoinSpec.getJoinTable().getJoinSpecs().isEmpty() && lastJoinSpec.getJoinTable().getTable().isFlat()) {
        JoinTable rhsJoinTable = lastJoinSpec.getJoinTable();
        Table rhsTable = rhsJoinTable.getTable();
        JoinTable lhsJoin = joinTable.getSubJoinTableWithoutPostFilters();
        Scan subScan = ScanUtil.newScan(originalScan);
        StatementContext lhsCtx = new StatementContext(statement, context.getResolver(), subScan, new SequenceManager(statement));
        QueryPlan lhsPlan = compileJoinQuery(lhsCtx, binds, lhsJoin, true, true, null);
        PTable rhsProjTable;
        TableRef rhsTableRef;
        SelectStatement rhs;
        TupleProjector tupleProjector;
        if (!rhsTable.isSubselect()) {
            context.setCurrentTable(rhsTable.getTableRef());
            rhsProjTable = rhsTable.createProjectedTable(!projectPKColumns, context);
            rhsTableRef = rhsTable.getTableRef();
            rhsTable.projectColumns(context.getScan());
            rhs = rhsJoinTable.getAsSingleSubquery(rhsTable.getAsSubquery(orderBy), asSubquery);
            tupleProjector = new TupleProjector(rhsProjTable);
        } else {
            SelectStatement subquery = rhsTable.getAsSubquery(orderBy);
            QueryPlan plan = compileSubquery(subquery, false);
            rhsProjTable = rhsTable.createProjectedTable(plan.getProjector());
            rhsTableRef = plan.getTableRef();
            context.getScan().setFamilyMap(plan.getContext().getScan().getFamilyMap());
            rhs = rhsJoinTable.getAsSingleSubquery((SelectStatement) plan.getStatement(), asSubquery);
            tupleProjector = new TupleProjector(plan.getProjector());
        }
        context.setCurrentTable(rhsTableRef);
        context.setResolver(FromCompiler.getResolverForProjectedTable(rhsProjTable, context.getConnection(), rhs.getUdfParseNodes()));
        ImmutableBytesPtr[] joinIds = new ImmutableBytesPtr[] { new ImmutableBytesPtr(emptyByteArray) };
        Pair<List<Expression>, List<Expression>> joinConditions = lastJoinSpec.compileJoinConditions(lhsCtx, context, true);
        List<Expression> joinExpressions = joinConditions.getSecond();
        List<Expression> hashExpressions = joinConditions.getFirst();
        boolean needsMerge = lhsJoin.hasPostReference();
        PTable lhsTable = needsMerge ? lhsCtx.getResolver().getTables().get(0).getTable() : null;
        int fieldPosition = needsMerge ? rhsProjTable.getColumns().size() - rhsProjTable.getPKColumns().size() : 0;
        PTable projectedTable = needsMerge ? JoinCompiler.joinProjectedTables(rhsProjTable, lhsTable, type == JoinType.Right ? JoinType.Left : type) : rhsProjTable;
        TupleProjector.serializeProjectorIntoScan(context.getScan(), tupleProjector);
        context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, context.getConnection(), rhs.getUdfParseNodes()));
        QueryPlan rhsPlan = compileSingleFlatQuery(context, rhs, binds, asSubquery, !asSubquery && type == JoinType.Right, null, !rhsTable.isSubselect() && projectPKColumns ? tupleProjector : null, true);
        Expression postJoinFilterExpression = joinTable.compilePostFilterExpression(context, rhsTable);
        Integer limit = null;
        Integer offset = null;
        if (!rhs.isAggregate() && !rhs.isDistinct() && rhs.getOrderBy().isEmpty()) {
            limit = rhsPlan.getLimit();
            offset = rhsPlan.getOffset();
        }
        HashJoinInfo joinInfo = new HashJoinInfo(projectedTable, joinIds, new List[] { joinExpressions }, new JoinType[] { type == JoinType.Right ? JoinType.Left : type }, new boolean[] { true }, new PTable[] { lhsTable }, new int[] { fieldPosition }, postJoinFilterExpression, QueryUtil.getOffsetLimit(limit, offset));
        Pair<Expression, Expression> keyRangeExpressions = new Pair<Expression, Expression>(null, null);
        getKeyExpressionCombinations(keyRangeExpressions, context, joinTable.getStatement(), rhsTableRef, type, joinExpressions, hashExpressions);
        return HashJoinPlan.create(joinTable.getStatement(), rhsPlan, joinInfo, new HashSubPlan[] { new HashSubPlan(0, lhsPlan, hashExpressions, false, keyRangeExpressions.getFirst(), keyRangeExpressions.getSecond()) });
    }
    JoinTable lhsJoin = joinTable.getSubJoinTableWithoutPostFilters();
    JoinTable rhsJoin = lastJoinSpec.getJoinTable();
    if (type == JoinType.Right) {
        JoinTable temp = lhsJoin;
        lhsJoin = rhsJoin;
        rhsJoin = temp;
    }
    List<EqualParseNode> joinConditionNodes = lastJoinSpec.getOnConditions();
    List<OrderByNode> lhsOrderBy = Lists.<OrderByNode>newArrayListWithExpectedSize(joinConditionNodes.size());
    List<OrderByNode> rhsOrderBy = Lists.<OrderByNode>newArrayListWithExpectedSize(joinConditionNodes.size());
    for (EqualParseNode condition : joinConditionNodes) {
        lhsOrderBy.add(NODE_FACTORY.orderBy(type == JoinType.Right ? condition.getRHS() : condition.getLHS(), false, true));
        rhsOrderBy.add(NODE_FACTORY.orderBy(type == JoinType.Right ? condition.getLHS() : condition.getRHS(), false, true));
    }
    Scan lhsScan = ScanUtil.newScan(originalScan);
    StatementContext lhsCtx = new StatementContext(statement, context.getResolver(), lhsScan, new SequenceManager(statement));
    boolean preserveRowkey = !projectPKColumns && type != JoinType.Full;
    QueryPlan lhsPlan = compileJoinQuery(lhsCtx, binds, lhsJoin, true, !preserveRowkey, lhsOrderBy);
    PTable lhsProjTable = lhsCtx.getResolver().getTables().get(0).getTable();
    boolean isInRowKeyOrder = preserveRowkey && lhsPlan.getOrderBy().getOrderByExpressions().isEmpty();
    Scan rhsScan = ScanUtil.newScan(originalScan);
    StatementContext rhsCtx = new StatementContext(statement, context.getResolver(), rhsScan, new SequenceManager(statement));
    QueryPlan rhsPlan = compileJoinQuery(rhsCtx, binds, rhsJoin, true, true, rhsOrderBy);
    PTable rhsProjTable = rhsCtx.getResolver().getTables().get(0).getTable();
    Pair<List<Expression>, List<Expression>> joinConditions = lastJoinSpec.compileJoinConditions(type == JoinType.Right ? rhsCtx : lhsCtx, type == JoinType.Right ? lhsCtx : rhsCtx, false);
    List<Expression> lhsKeyExpressions = type == JoinType.Right ? joinConditions.getSecond() : joinConditions.getFirst();
    List<Expression> rhsKeyExpressions = type == JoinType.Right ? joinConditions.getFirst() : joinConditions.getSecond();
    boolean needsMerge = rhsJoin.hasPostReference();
    int fieldPosition = needsMerge ? lhsProjTable.getColumns().size() - lhsProjTable.getPKColumns().size() : 0;
    PTable projectedTable = needsMerge ? JoinCompiler.joinProjectedTables(lhsProjTable, rhsProjTable, type == JoinType.Right ? JoinType.Left : type) : lhsProjTable;
    ColumnResolver resolver = FromCompiler.getResolverForProjectedTable(projectedTable, context.getConnection(), joinTable.getStatement().getUdfParseNodes());
    TableRef tableRef = resolver.getTables().get(0);
    StatementContext subCtx = new StatementContext(statement, resolver, ScanUtil.newScan(originalScan), new SequenceManager(statement));
    subCtx.setCurrentTable(tableRef);
    QueryPlan innerPlan = new SortMergeJoinPlan(subCtx, joinTable.getStatement(), tableRef, type == JoinType.Right ? JoinType.Left : type, lhsPlan, rhsPlan, lhsKeyExpressions, rhsKeyExpressions, projectedTable, lhsProjTable, needsMerge ? rhsProjTable : null, fieldPosition, lastJoinSpec.isSingleValueOnly());
    context.setCurrentTable(tableRef);
    context.setResolver(resolver);
    TableNode from = NODE_FACTORY.namedTable(tableRef.getTableAlias(), NODE_FACTORY.table(tableRef.getTable().getSchemaName().getString(), tableRef.getTable().getTableName().getString()));
    ParseNode where = joinTable.getPostFiltersCombined();
    SelectStatement select = asSubquery ? NODE_FACTORY.select(from, joinTable.getStatement().getHint(), false, Collections.<AliasedNode>emptyList(), where, null, null, orderBy, null, null, 0, false, joinTable.getStatement().hasSequence(), Collections.<SelectStatement>emptyList(), joinTable.getStatement().getUdfParseNodes()) : NODE_FACTORY.select(joinTable.getStatement(), from, where);
    return compileSingleFlatQuery(context, select, binds, asSubquery, false, innerPlan, null, isInRowKeyOrder);
}
Also used : TupleProjector(org.apache.phoenix.execute.TupleProjector) TupleProjectionPlan(org.apache.phoenix.execute.TupleProjectionPlan) OrderByNode(org.apache.phoenix.parse.OrderByNode) PTable(org.apache.phoenix.schema.PTable) HashSubPlan(org.apache.phoenix.execute.HashJoinPlan.HashSubPlan) SelectStatement(org.apache.phoenix.parse.SelectStatement) SortMergeJoinPlan(org.apache.phoenix.execute.SortMergeJoinPlan) SubqueryParseNode(org.apache.phoenix.parse.SubqueryParseNode) EqualParseNode(org.apache.phoenix.parse.EqualParseNode) ParseNode(org.apache.phoenix.parse.ParseNode) List(java.util.List) ArrayList(java.util.ArrayList) Pair(org.apache.hadoop.hbase.util.Pair) Table(org.apache.phoenix.compile.JoinCompiler.Table) JoinTable(org.apache.phoenix.compile.JoinCompiler.JoinTable) PTable(org.apache.phoenix.schema.PTable) ImmutableBytesPtr(org.apache.phoenix.hbase.index.util.ImmutableBytesPtr) JoinSpec(org.apache.phoenix.compile.JoinCompiler.JoinSpec) JoinType(org.apache.phoenix.parse.JoinTableNode.JoinType) AliasedNode(org.apache.phoenix.parse.AliasedNode) Hint(org.apache.phoenix.parse.HintNode.Hint) Expression(org.apache.phoenix.expression.Expression) LiteralExpression(org.apache.phoenix.expression.LiteralExpression) RowValueConstructorExpression(org.apache.phoenix.expression.RowValueConstructorExpression) HashJoinInfo(org.apache.phoenix.join.HashJoinInfo) TableNode(org.apache.phoenix.parse.TableNode) Scan(org.apache.hadoop.hbase.client.Scan) TableRef(org.apache.phoenix.schema.TableRef) JoinTable(org.apache.phoenix.compile.JoinCompiler.JoinTable) EqualParseNode(org.apache.phoenix.parse.EqualParseNode)

Example 2 with ImmutableBytesPtr

use of org.apache.phoenix.hbase.index.util.ImmutableBytesPtr in project phoenix by apache.

the class DeleteCompiler method deleteRows.

private static MutationState deleteRows(StatementContext childContext, TableRef targetTableRef, List<TableRef> indexTableRefs, ResultIterator iterator, RowProjector projector, TableRef sourceTableRef) throws SQLException {
    PTable table = targetTableRef.getTable();
    PhoenixStatement statement = childContext.getStatement();
    PhoenixConnection connection = statement.getConnection();
    PName tenantId = connection.getTenantId();
    byte[] tenantIdBytes = null;
    if (tenantId != null) {
        tenantIdBytes = ScanUtil.getTenantIdBytes(table.getRowKeySchema(), table.getBucketNum() != null, tenantId, table.getViewIndexId() != null);
    }
    final boolean isAutoCommit = connection.getAutoCommit();
    ConnectionQueryServices services = connection.getQueryServices();
    final int maxSize = services.getProps().getInt(QueryServices.MAX_MUTATION_SIZE_ATTRIB, QueryServicesOptions.DEFAULT_MAX_MUTATION_SIZE);
    final int maxSizeBytes = services.getProps().getInt(QueryServices.MAX_MUTATION_SIZE_BYTES_ATTRIB, QueryServicesOptions.DEFAULT_MAX_MUTATION_SIZE_BYTES);
    final int batchSize = Math.min(connection.getMutateBatchSize(), maxSize);
    Map<ImmutableBytesPtr, RowMutationState> mutations = Maps.newHashMapWithExpectedSize(batchSize);
    List<Map<ImmutableBytesPtr, RowMutationState>> indexMutations = null;
    // the data table through a single query to save executing an additional one.
    if (!indexTableRefs.isEmpty()) {
        indexMutations = Lists.newArrayListWithExpectedSize(indexTableRefs.size());
        for (int i = 0; i < indexTableRefs.size(); i++) {
            indexMutations.add(Maps.<ImmutableBytesPtr, RowMutationState>newHashMapWithExpectedSize(batchSize));
        }
    }
    List<PColumn> pkColumns = table.getPKColumns();
    boolean isMultiTenant = table.isMultiTenant() && tenantIdBytes != null;
    boolean isSharedViewIndex = table.getViewIndexId() != null;
    int offset = (table.getBucketNum() == null ? 0 : 1);
    byte[][] values = new byte[pkColumns.size()][];
    if (isSharedViewIndex) {
        values[offset++] = MetaDataUtil.getViewIndexIdDataType().toBytes(table.getViewIndexId());
    }
    if (isMultiTenant) {
        values[offset++] = tenantIdBytes;
    }
    try (PhoenixResultSet rs = new PhoenixResultSet(iterator, projector, childContext)) {
        int rowCount = 0;
        while (rs.next()) {
            // allocate new as this is a key in a Map
            ImmutableBytesPtr ptr = new ImmutableBytesPtr();
            // there's no transation required.
            if (sourceTableRef.equals(targetTableRef)) {
                rs.getCurrentRow().getKey(ptr);
            } else {
                for (int i = offset; i < values.length; i++) {
                    byte[] byteValue = rs.getBytes(i + 1 - offset);
                    // TODO: consider going under the hood and just getting the bytes
                    if (pkColumns.get(i).getSortOrder() == SortOrder.DESC) {
                        byte[] tempByteValue = Arrays.copyOf(byteValue, byteValue.length);
                        byteValue = SortOrder.invert(byteValue, 0, tempByteValue, 0, byteValue.length);
                    }
                    values[i] = byteValue;
                }
                table.newKey(ptr, values);
            }
            // When issuing deletes, we do not care about the row time ranges. Also, if the table had a row timestamp column, then the
            // row key will already have its value. 
            mutations.put(ptr, new RowMutationState(PRow.DELETE_MARKER, statement.getConnection().getStatementExecutionCounter(), NULL_ROWTIMESTAMP_INFO, null));
            for (int i = 0; i < indexTableRefs.size(); i++) {
                // allocate new as this is a key in a Map
                ImmutableBytesPtr indexPtr = new ImmutableBytesPtr();
                rs.getCurrentRow().getKey(indexPtr);
                indexMutations.get(i).put(indexPtr, new RowMutationState(PRow.DELETE_MARKER, statement.getConnection().getStatementExecutionCounter(), NULL_ROWTIMESTAMP_INFO, null));
            }
            if (mutations.size() > maxSize) {
                throw new IllegalArgumentException("MutationState size of " + mutations.size() + " is bigger than max allowed size of " + maxSize);
            }
            rowCount++;
            // Commit a batch if auto commit is true and we're at our batch size
            if (isAutoCommit && rowCount % batchSize == 0) {
                MutationState state = new MutationState(targetTableRef, mutations, 0, maxSize, maxSizeBytes, connection);
                connection.getMutationState().join(state);
                for (int i = 0; i < indexTableRefs.size(); i++) {
                    MutationState indexState = new MutationState(indexTableRefs.get(i), indexMutations.get(i), 0, maxSize, maxSizeBytes, connection);
                    connection.getMutationState().join(indexState);
                }
                connection.getMutationState().send();
                mutations.clear();
                if (indexMutations != null) {
                    indexMutations.clear();
                }
            }
        }
        // If auto commit is true, this last batch will be committed upon return
        int nCommittedRows = isAutoCommit ? (rowCount / batchSize * batchSize) : 0;
        MutationState state = new MutationState(targetTableRef, mutations, nCommittedRows, maxSize, maxSizeBytes, connection);
        for (int i = 0; i < indexTableRefs.size(); i++) {
            // To prevent the counting of these index rows, we have a negative for remainingRows.
            MutationState indexState = new MutationState(indexTableRefs.get(i), indexMutations.get(i), 0, maxSize, maxSizeBytes, connection);
            state.join(indexState);
        }
        return state;
    }
}
Also used : PhoenixConnection(org.apache.phoenix.jdbc.PhoenixConnection) ImmutableBytesPtr(org.apache.phoenix.hbase.index.util.ImmutableBytesPtr) PhoenixStatement(org.apache.phoenix.jdbc.PhoenixStatement) PTable(org.apache.phoenix.schema.PTable) Hint(org.apache.phoenix.parse.HintNode.Hint) PColumn(org.apache.phoenix.schema.PColumn) MutationState(org.apache.phoenix.execute.MutationState) RowMutationState(org.apache.phoenix.execute.MutationState.RowMutationState) PName(org.apache.phoenix.schema.PName) PhoenixResultSet(org.apache.phoenix.jdbc.PhoenixResultSet) Map(java.util.Map) HashMap(java.util.HashMap) ConnectionQueryServices(org.apache.phoenix.query.ConnectionQueryServices) RowMutationState(org.apache.phoenix.execute.MutationState.RowMutationState)

Example 3 with ImmutableBytesPtr

use of org.apache.phoenix.hbase.index.util.ImmutableBytesPtr in project phoenix by apache.

the class SpillManager method getKey.

/**
     * Helper method to deserialize the key part from a serialized byte array
     * @param data
     * @return
     * @throws IOException
     */
static ImmutableBytesPtr getKey(byte[] data) throws IOException {
    DataInputStream input = null;
    try {
        input = new DataInputStream(new ByteArrayInputStream(data));
        // key length
        int keyLength = WritableUtils.readVInt(input);
        int offset = WritableUtils.getVIntSize(keyLength);
        // key
        return new ImmutableBytesPtr(data, offset, keyLength);
    } finally {
        if (input != null) {
            input.close();
            input = null;
        }
    }
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) ImmutableBytesPtr(org.apache.phoenix.hbase.index.util.ImmutableBytesPtr) DataInputStream(java.io.DataInputStream)

Example 4 with ImmutableBytesPtr

use of org.apache.phoenix.hbase.index.util.ImmutableBytesPtr in project phoenix by apache.

the class SpillMap method redistribute.

// Function redistributes the elements in the current index
// to two new buckets, based on the bit at localDepth + 1 position.
// Optionally this function also doubles the directory to allow
// for bucket splits
private void redistribute(int index, ImmutableBytesPtr keyNew, byte[] valueNew) {
    // Get the respective bucket
    MappedByteBufferMap byteMap = directory[index];
    // Get the actual bucket index, that the directory index points to
    int mappedIdx = byteMap.pageIndex;
    int localDepth = byteMap.localDepth;
    ArrayList<Integer> buckets = Lists.newArrayList();
    // TODO: can be made faster!
    for (int i = 0; i < directory.length; i++) {
        if (directory[i].pageIndex == mappedIdx) {
            buckets.add(i);
        }
    }
    // Assuming no directory doubling for now
    // compute the two new bucket Ids for splitting
    // SpillFile adds new files dynamically in case the directory points to pageIDs
    // that exceed the size limit of a single file.
    // TODO verify if some sort of de-fragmentation might be helpful
    int tmpIndex = index ^ ((1 << localDepth));
    int b1Index = Math.min(index, tmpIndex);
    int b2Index = Math.max(index, tmpIndex);
    // Create two new split buckets
    MappedByteBufferMap b1 = new MappedByteBufferMap(b1Index, thresholdBytes, pageInserts, spillFile);
    MappedByteBufferMap b2 = new MappedByteBufferMap(b2Index, thresholdBytes, pageInserts, spillFile);
    // redistribute old elements into b1 and b2
    for (Entry<ImmutableBytesPtr, byte[]> element : byteMap.pageMap.entrySet()) {
        ImmutableBytesPtr key = element.getKey();
        byte[] value = element.getValue();
        // Otherwise this is an good point to reduce the number of spilled elements
        if (!cache.isKeyContained(key)) {
            // Re-distribute element onto the new 2 split buckets
            if ((key.hashCode() & ((1 << localDepth))) != 0) {
                b2.addElement(null, key, value);
            } else {
                b1.addElement(null, key, value);
            }
        }
    }
    // Clear and GC the old now redistributed bucket
    byteMap.pageMap.clear();
    byteMap = null;
    // Increase local bucket depths
    b1.localDepth = localDepth + 1;
    b2.localDepth = localDepth + 1;
    boolean doubleDir = false;
    if (globalDepth < (localDepth + 1)) {
        // Double directory structure and re-adjust pointers
        doubleDir = true;
        b2Index = doubleDirectory(b2Index, keyNew);
    }
    if (!doubleDir) {
        // the new buckets
        for (int i = 0; i < buckets.size(); i++) {
            if ((buckets.get(i) & (1 << (localDepth))) != 0) {
                directory[buckets.get(i)] = b2;
            } else {
                directory[buckets.get(i)] = b1;
            }
        }
    } else {
        // Update the directory indexes in case of directory doubling
        directory[b1Index] = b1;
        directory[b2Index] = b2;
    }
}
Also used : ImmutableBytesPtr(org.apache.phoenix.hbase.index.util.ImmutableBytesPtr)

Example 5 with ImmutableBytesPtr

use of org.apache.phoenix.hbase.index.util.ImmutableBytesPtr in project phoenix by apache.

the class SpillableGroupByCache method cache.

/**
     * Extract an element from the Cache If element is not present in in-memory cache / or in spill files cache
     * implements an implicit put() of a new key/value tuple and loads it into the cache
     */
@Override
public Aggregator[] cache(ImmutableBytesPtr cacheKey) {
    ImmutableBytesPtr key = new ImmutableBytesPtr(cacheKey);
    Aggregator[] rowAggregators = cache.get(key);
    if (rowAggregators == null) {
        // per distinct value)
        if (spillManager != null) {
            // spilled before
            try {
                rowAggregators = spillManager.loadEntry(key);
            } catch (IOException ioe) {
                // Ensure that we always close and delete the temp files
                try {
                    throw new RuntimeException(ioe);
                } finally {
                    Closeables.closeQuietly(SpillableGroupByCache.this);
                }
            }
        }
        if (rowAggregators == null) {
            // No, key never spilled before, create a new tuple
            rowAggregators = aggregators.newAggregators(env.getConfiguration());
            if (logger.isDebugEnabled()) {
                logger.debug("Adding new aggregate bucket for row key " + Bytes.toStringBinary(key.get(), key.getOffset(), key.getLength()));
            }
        }
        if (cache.put(key, rowAggregators) == null) {
            totalNumElements++;
        }
    }
    return rowAggregators;
}
Also used : ImmutableBytesPtr(org.apache.phoenix.hbase.index.util.ImmutableBytesPtr) Aggregator(org.apache.phoenix.expression.aggregator.Aggregator) IOException(java.io.IOException)

Aggregations

ImmutableBytesPtr (org.apache.phoenix.hbase.index.util.ImmutableBytesPtr)120 Mutation (org.apache.hadoop.hbase.client.Mutation)31 PTable (org.apache.phoenix.schema.PTable)28 ArrayList (java.util.ArrayList)27 Region (org.apache.hadoop.hbase.regionserver.Region)22 PMetaDataEntity (org.apache.phoenix.schema.PMetaDataEntity)22 Test (org.junit.Test)21 Cell (org.apache.hadoop.hbase.Cell)20 Put (org.apache.hadoop.hbase.client.Put)18 List (java.util.List)15 Scan (org.apache.hadoop.hbase.client.Scan)15 Pair (org.apache.hadoop.hbase.util.Pair)15 IOException (java.io.IOException)14 Expression (org.apache.phoenix.expression.Expression)14 PColumn (org.apache.phoenix.schema.PColumn)14 RowLock (org.apache.hadoop.hbase.regionserver.Region.RowLock)13 PSmallint (org.apache.phoenix.schema.types.PSmallint)12 HashMap (java.util.HashMap)11 ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)11 LiteralExpression (org.apache.phoenix.expression.LiteralExpression)11