use of org.apache.phoenix.hbase.index.util.ImmutableBytesPtr in project phoenix by apache.
the class QueryCompiler method compileJoinQuery.
/*
* Call compileJoinQuery() for join queries recursively down to the leaf JoinTable nodes.
* This matches the input JoinTable node against patterns in the following order:
* 1. A (leaf JoinTable node, which can be a named table reference or a subquery of any kind.)
* Returns the compilation result of a single table scan or of an independent subquery.
* 2. Matching either of (when hint USE_SORT_MERGE_JOIN not specified):
* 1) A LEFT/INNER JOIN B
* 2) A LEFT/INNER JOIN B (LEFT/INNER JOIN C)+, if hint NO_STAR_JOIN not specified
* where A can be a named table reference or a flat subquery, and B, C, ... can be a named
* table reference, a sub-join or a subquery of any kind.
* Returns a HashJoinPlan{scan: A, hash: B, C, ...}.
* 3. Matching pattern:
* A RIGHT/INNER JOIN B (when hint USE_SORT_MERGE_JOIN not specified)
* where B can be a named table reference or a flat subquery, and A can be a named table
* reference, a sub-join or a subquery of any kind.
* Returns a HashJoinPlan{scan: B, hash: A}.
* NOTE that "A LEFT/RIGHT/INNER/FULL JOIN B RIGHT/INNER JOIN C" is viewed as
* "(A LEFT/RIGHT/INNER/FULL JOIN B) RIGHT/INNER JOIN C" here, which means the left part in the
* parenthesis is considered a sub-join.
* viewed as a sub-join.
* 4. All the rest that do not qualify for previous patterns or conditions, including FULL joins.
* Returns a SortMergeJoinPlan, the sorting part of which is pushed down to the JoinTable nodes
* of both sides as order-by clauses.
* NOTE that SEMI or ANTI joins are treated the same way as LEFT joins in JoinTable pattern matching.
*
* If no join algorithm hint is provided, according to the above compilation process, a join query
* plan can probably consist of both HashJoinPlan and SortMergeJoinPlan which may enclose each other.
* TODO 1) Use table statistics to guide the choice of join plans.
* 2) Make it possible to hint a certain join algorithm for a specific join step.
*/
@SuppressWarnings("unchecked")
protected QueryPlan compileJoinQuery(StatementContext context, List<Object> binds, JoinTable joinTable, boolean asSubquery, boolean projectPKColumns, List<OrderByNode> orderBy) throws SQLException {
byte[] emptyByteArray = new byte[0];
List<JoinSpec> joinSpecs = joinTable.getJoinSpecs();
if (joinSpecs.isEmpty()) {
Table table = joinTable.getTable();
SelectStatement subquery = table.getAsSubquery(orderBy);
if (!table.isSubselect()) {
context.setCurrentTable(table.getTableRef());
PTable projectedTable = table.createProjectedTable(!projectPKColumns, context);
TupleProjector projector = new TupleProjector(projectedTable);
TupleProjector.serializeProjectorIntoScan(context.getScan(), projector);
context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, context.getConnection(), subquery.getUdfParseNodes()));
table.projectColumns(context.getScan());
return compileSingleFlatQuery(context, subquery, binds, asSubquery, !asSubquery, null, projectPKColumns ? projector : null, true);
}
QueryPlan plan = compileSubquery(subquery, false);
PTable projectedTable = table.createProjectedTable(plan.getProjector());
context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, context.getConnection(), subquery.getUdfParseNodes()));
return new TupleProjectionPlan(plan, new TupleProjector(plan.getProjector()), table.compilePostFilterExpression(context));
}
boolean[] starJoinVector;
if (!this.useSortMergeJoin && (starJoinVector = joinTable.getStarJoinVector()) != null) {
Table table = joinTable.getTable();
PTable initialProjectedTable;
TableRef tableRef;
SelectStatement query;
TupleProjector tupleProjector;
if (!table.isSubselect()) {
context.setCurrentTable(table.getTableRef());
initialProjectedTable = table.createProjectedTable(!projectPKColumns, context);
tableRef = table.getTableRef();
table.projectColumns(context.getScan());
query = joinTable.getAsSingleSubquery(table.getAsSubquery(orderBy), asSubquery);
tupleProjector = new TupleProjector(initialProjectedTable);
} else {
SelectStatement subquery = table.getAsSubquery(orderBy);
QueryPlan plan = compileSubquery(subquery, false);
initialProjectedTable = table.createProjectedTable(plan.getProjector());
tableRef = plan.getTableRef();
context.getScan().setFamilyMap(plan.getContext().getScan().getFamilyMap());
query = joinTable.getAsSingleSubquery((SelectStatement) plan.getStatement(), asSubquery);
tupleProjector = new TupleProjector(plan.getProjector());
}
context.setCurrentTable(tableRef);
PTable projectedTable = initialProjectedTable;
int count = joinSpecs.size();
ImmutableBytesPtr[] joinIds = new ImmutableBytesPtr[count];
List<Expression>[] joinExpressions = new List[count];
JoinType[] joinTypes = new JoinType[count];
PTable[] tables = new PTable[count];
int[] fieldPositions = new int[count];
StatementContext[] subContexts = new StatementContext[count];
QueryPlan[] subPlans = new QueryPlan[count];
HashSubPlan[] hashPlans = new HashSubPlan[count];
fieldPositions[0] = projectedTable.getColumns().size() - projectedTable.getPKColumns().size();
for (int i = 0; i < count; i++) {
JoinSpec joinSpec = joinSpecs.get(i);
Scan subScan = ScanUtil.newScan(originalScan);
subContexts[i] = new StatementContext(statement, context.getResolver(), subScan, new SequenceManager(statement));
subPlans[i] = compileJoinQuery(subContexts[i], binds, joinSpec.getJoinTable(), true, true, null);
boolean hasPostReference = joinSpec.getJoinTable().hasPostReference();
if (hasPostReference) {
tables[i] = subContexts[i].getResolver().getTables().get(0).getTable();
projectedTable = JoinCompiler.joinProjectedTables(projectedTable, tables[i], joinSpec.getType());
} else {
tables[i] = null;
}
}
for (int i = 0; i < count; i++) {
JoinSpec joinSpec = joinSpecs.get(i);
context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, context.getConnection(), query.getUdfParseNodes()));
// place-holder
joinIds[i] = new ImmutableBytesPtr(emptyByteArray);
Pair<List<Expression>, List<Expression>> joinConditions = joinSpec.compileJoinConditions(context, subContexts[i], true);
joinExpressions[i] = joinConditions.getFirst();
List<Expression> hashExpressions = joinConditions.getSecond();
Pair<Expression, Expression> keyRangeExpressions = new Pair<Expression, Expression>(null, null);
boolean optimized = getKeyExpressionCombinations(keyRangeExpressions, context, joinTable.getStatement(), tableRef, joinSpec.getType(), joinExpressions[i], hashExpressions);
Expression keyRangeLhsExpression = keyRangeExpressions.getFirst();
Expression keyRangeRhsExpression = keyRangeExpressions.getSecond();
joinTypes[i] = joinSpec.getType();
if (i < count - 1) {
fieldPositions[i + 1] = fieldPositions[i] + (tables[i] == null ? 0 : (tables[i].getColumns().size() - tables[i].getPKColumns().size()));
}
hashPlans[i] = new HashSubPlan(i, subPlans[i], optimized ? null : hashExpressions, joinSpec.isSingleValueOnly(), keyRangeLhsExpression, keyRangeRhsExpression);
}
TupleProjector.serializeProjectorIntoScan(context.getScan(), tupleProjector);
QueryPlan plan = compileSingleFlatQuery(context, query, binds, asSubquery, !asSubquery && joinTable.isAllLeftJoin(), null, !table.isSubselect() && projectPKColumns ? tupleProjector : null, true);
Expression postJoinFilterExpression = joinTable.compilePostFilterExpression(context, table);
Integer limit = null;
Integer offset = null;
if (!query.isAggregate() && !query.isDistinct() && query.getOrderBy().isEmpty()) {
limit = plan.getLimit();
offset = plan.getOffset();
}
HashJoinInfo joinInfo = new HashJoinInfo(projectedTable, joinIds, joinExpressions, joinTypes, starJoinVector, tables, fieldPositions, postJoinFilterExpression, QueryUtil.getOffsetLimit(limit, offset));
return HashJoinPlan.create(joinTable.getStatement(), plan, joinInfo, hashPlans);
}
JoinSpec lastJoinSpec = joinSpecs.get(joinSpecs.size() - 1);
JoinType type = lastJoinSpec.getType();
if (!this.useSortMergeJoin && (type == JoinType.Right || type == JoinType.Inner) && lastJoinSpec.getJoinTable().getJoinSpecs().isEmpty() && lastJoinSpec.getJoinTable().getTable().isFlat()) {
JoinTable rhsJoinTable = lastJoinSpec.getJoinTable();
Table rhsTable = rhsJoinTable.getTable();
JoinTable lhsJoin = joinTable.getSubJoinTableWithoutPostFilters();
Scan subScan = ScanUtil.newScan(originalScan);
StatementContext lhsCtx = new StatementContext(statement, context.getResolver(), subScan, new SequenceManager(statement));
QueryPlan lhsPlan = compileJoinQuery(lhsCtx, binds, lhsJoin, true, true, null);
PTable rhsProjTable;
TableRef rhsTableRef;
SelectStatement rhs;
TupleProjector tupleProjector;
if (!rhsTable.isSubselect()) {
context.setCurrentTable(rhsTable.getTableRef());
rhsProjTable = rhsTable.createProjectedTable(!projectPKColumns, context);
rhsTableRef = rhsTable.getTableRef();
rhsTable.projectColumns(context.getScan());
rhs = rhsJoinTable.getAsSingleSubquery(rhsTable.getAsSubquery(orderBy), asSubquery);
tupleProjector = new TupleProjector(rhsProjTable);
} else {
SelectStatement subquery = rhsTable.getAsSubquery(orderBy);
QueryPlan plan = compileSubquery(subquery, false);
rhsProjTable = rhsTable.createProjectedTable(plan.getProjector());
rhsTableRef = plan.getTableRef();
context.getScan().setFamilyMap(plan.getContext().getScan().getFamilyMap());
rhs = rhsJoinTable.getAsSingleSubquery((SelectStatement) plan.getStatement(), asSubquery);
tupleProjector = new TupleProjector(plan.getProjector());
}
context.setCurrentTable(rhsTableRef);
context.setResolver(FromCompiler.getResolverForProjectedTable(rhsProjTable, context.getConnection(), rhs.getUdfParseNodes()));
ImmutableBytesPtr[] joinIds = new ImmutableBytesPtr[] { new ImmutableBytesPtr(emptyByteArray) };
Pair<List<Expression>, List<Expression>> joinConditions = lastJoinSpec.compileJoinConditions(lhsCtx, context, true);
List<Expression> joinExpressions = joinConditions.getSecond();
List<Expression> hashExpressions = joinConditions.getFirst();
boolean needsMerge = lhsJoin.hasPostReference();
PTable lhsTable = needsMerge ? lhsCtx.getResolver().getTables().get(0).getTable() : null;
int fieldPosition = needsMerge ? rhsProjTable.getColumns().size() - rhsProjTable.getPKColumns().size() : 0;
PTable projectedTable = needsMerge ? JoinCompiler.joinProjectedTables(rhsProjTable, lhsTable, type == JoinType.Right ? JoinType.Left : type) : rhsProjTable;
TupleProjector.serializeProjectorIntoScan(context.getScan(), tupleProjector);
context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, context.getConnection(), rhs.getUdfParseNodes()));
QueryPlan rhsPlan = compileSingleFlatQuery(context, rhs, binds, asSubquery, !asSubquery && type == JoinType.Right, null, !rhsTable.isSubselect() && projectPKColumns ? tupleProjector : null, true);
Expression postJoinFilterExpression = joinTable.compilePostFilterExpression(context, rhsTable);
Integer limit = null;
Integer offset = null;
if (!rhs.isAggregate() && !rhs.isDistinct() && rhs.getOrderBy().isEmpty()) {
limit = rhsPlan.getLimit();
offset = rhsPlan.getOffset();
}
HashJoinInfo joinInfo = new HashJoinInfo(projectedTable, joinIds, new List[] { joinExpressions }, new JoinType[] { type == JoinType.Right ? JoinType.Left : type }, new boolean[] { true }, new PTable[] { lhsTable }, new int[] { fieldPosition }, postJoinFilterExpression, QueryUtil.getOffsetLimit(limit, offset));
Pair<Expression, Expression> keyRangeExpressions = new Pair<Expression, Expression>(null, null);
getKeyExpressionCombinations(keyRangeExpressions, context, joinTable.getStatement(), rhsTableRef, type, joinExpressions, hashExpressions);
return HashJoinPlan.create(joinTable.getStatement(), rhsPlan, joinInfo, new HashSubPlan[] { new HashSubPlan(0, lhsPlan, hashExpressions, false, keyRangeExpressions.getFirst(), keyRangeExpressions.getSecond()) });
}
JoinTable lhsJoin = joinTable.getSubJoinTableWithoutPostFilters();
JoinTable rhsJoin = lastJoinSpec.getJoinTable();
if (type == JoinType.Right) {
JoinTable temp = lhsJoin;
lhsJoin = rhsJoin;
rhsJoin = temp;
}
List<EqualParseNode> joinConditionNodes = lastJoinSpec.getOnConditions();
List<OrderByNode> lhsOrderBy = Lists.<OrderByNode>newArrayListWithExpectedSize(joinConditionNodes.size());
List<OrderByNode> rhsOrderBy = Lists.<OrderByNode>newArrayListWithExpectedSize(joinConditionNodes.size());
for (EqualParseNode condition : joinConditionNodes) {
lhsOrderBy.add(NODE_FACTORY.orderBy(type == JoinType.Right ? condition.getRHS() : condition.getLHS(), false, true));
rhsOrderBy.add(NODE_FACTORY.orderBy(type == JoinType.Right ? condition.getLHS() : condition.getRHS(), false, true));
}
Scan lhsScan = ScanUtil.newScan(originalScan);
StatementContext lhsCtx = new StatementContext(statement, context.getResolver(), lhsScan, new SequenceManager(statement));
boolean preserveRowkey = !projectPKColumns && type != JoinType.Full;
QueryPlan lhsPlan = compileJoinQuery(lhsCtx, binds, lhsJoin, true, !preserveRowkey, lhsOrderBy);
PTable lhsProjTable = lhsCtx.getResolver().getTables().get(0).getTable();
boolean isInRowKeyOrder = preserveRowkey && lhsPlan.getOrderBy().getOrderByExpressions().isEmpty();
Scan rhsScan = ScanUtil.newScan(originalScan);
StatementContext rhsCtx = new StatementContext(statement, context.getResolver(), rhsScan, new SequenceManager(statement));
QueryPlan rhsPlan = compileJoinQuery(rhsCtx, binds, rhsJoin, true, true, rhsOrderBy);
PTable rhsProjTable = rhsCtx.getResolver().getTables().get(0).getTable();
Pair<List<Expression>, List<Expression>> joinConditions = lastJoinSpec.compileJoinConditions(type == JoinType.Right ? rhsCtx : lhsCtx, type == JoinType.Right ? lhsCtx : rhsCtx, false);
List<Expression> lhsKeyExpressions = type == JoinType.Right ? joinConditions.getSecond() : joinConditions.getFirst();
List<Expression> rhsKeyExpressions = type == JoinType.Right ? joinConditions.getFirst() : joinConditions.getSecond();
boolean needsMerge = rhsJoin.hasPostReference();
int fieldPosition = needsMerge ? lhsProjTable.getColumns().size() - lhsProjTable.getPKColumns().size() : 0;
PTable projectedTable = needsMerge ? JoinCompiler.joinProjectedTables(lhsProjTable, rhsProjTable, type == JoinType.Right ? JoinType.Left : type) : lhsProjTable;
ColumnResolver resolver = FromCompiler.getResolverForProjectedTable(projectedTable, context.getConnection(), joinTable.getStatement().getUdfParseNodes());
TableRef tableRef = resolver.getTables().get(0);
StatementContext subCtx = new StatementContext(statement, resolver, ScanUtil.newScan(originalScan), new SequenceManager(statement));
subCtx.setCurrentTable(tableRef);
QueryPlan innerPlan = new SortMergeJoinPlan(subCtx, joinTable.getStatement(), tableRef, type == JoinType.Right ? JoinType.Left : type, lhsPlan, rhsPlan, lhsKeyExpressions, rhsKeyExpressions, projectedTable, lhsProjTable, needsMerge ? rhsProjTable : null, fieldPosition, lastJoinSpec.isSingleValueOnly());
context.setCurrentTable(tableRef);
context.setResolver(resolver);
TableNode from = NODE_FACTORY.namedTable(tableRef.getTableAlias(), NODE_FACTORY.table(tableRef.getTable().getSchemaName().getString(), tableRef.getTable().getTableName().getString()));
ParseNode where = joinTable.getPostFiltersCombined();
SelectStatement select = asSubquery ? NODE_FACTORY.select(from, joinTable.getStatement().getHint(), false, Collections.<AliasedNode>emptyList(), where, null, null, orderBy, null, null, 0, false, joinTable.getStatement().hasSequence(), Collections.<SelectStatement>emptyList(), joinTable.getStatement().getUdfParseNodes()) : NODE_FACTORY.select(joinTable.getStatement(), from, where);
return compileSingleFlatQuery(context, select, binds, asSubquery, false, innerPlan, null, isInRowKeyOrder);
}
use of org.apache.phoenix.hbase.index.util.ImmutableBytesPtr in project phoenix by apache.
the class DeleteCompiler method deleteRows.
private static MutationState deleteRows(StatementContext childContext, TableRef targetTableRef, List<TableRef> indexTableRefs, ResultIterator iterator, RowProjector projector, TableRef sourceTableRef) throws SQLException {
PTable table = targetTableRef.getTable();
PhoenixStatement statement = childContext.getStatement();
PhoenixConnection connection = statement.getConnection();
PName tenantId = connection.getTenantId();
byte[] tenantIdBytes = null;
if (tenantId != null) {
tenantIdBytes = ScanUtil.getTenantIdBytes(table.getRowKeySchema(), table.getBucketNum() != null, tenantId, table.getViewIndexId() != null);
}
final boolean isAutoCommit = connection.getAutoCommit();
ConnectionQueryServices services = connection.getQueryServices();
final int maxSize = services.getProps().getInt(QueryServices.MAX_MUTATION_SIZE_ATTRIB, QueryServicesOptions.DEFAULT_MAX_MUTATION_SIZE);
final int maxSizeBytes = services.getProps().getInt(QueryServices.MAX_MUTATION_SIZE_BYTES_ATTRIB, QueryServicesOptions.DEFAULT_MAX_MUTATION_SIZE_BYTES);
final int batchSize = Math.min(connection.getMutateBatchSize(), maxSize);
Map<ImmutableBytesPtr, RowMutationState> mutations = Maps.newHashMapWithExpectedSize(batchSize);
List<Map<ImmutableBytesPtr, RowMutationState>> indexMutations = null;
// the data table through a single query to save executing an additional one.
if (!indexTableRefs.isEmpty()) {
indexMutations = Lists.newArrayListWithExpectedSize(indexTableRefs.size());
for (int i = 0; i < indexTableRefs.size(); i++) {
indexMutations.add(Maps.<ImmutableBytesPtr, RowMutationState>newHashMapWithExpectedSize(batchSize));
}
}
List<PColumn> pkColumns = table.getPKColumns();
boolean isMultiTenant = table.isMultiTenant() && tenantIdBytes != null;
boolean isSharedViewIndex = table.getViewIndexId() != null;
int offset = (table.getBucketNum() == null ? 0 : 1);
byte[][] values = new byte[pkColumns.size()][];
if (isSharedViewIndex) {
values[offset++] = MetaDataUtil.getViewIndexIdDataType().toBytes(table.getViewIndexId());
}
if (isMultiTenant) {
values[offset++] = tenantIdBytes;
}
try (PhoenixResultSet rs = new PhoenixResultSet(iterator, projector, childContext)) {
int rowCount = 0;
while (rs.next()) {
// allocate new as this is a key in a Map
ImmutableBytesPtr ptr = new ImmutableBytesPtr();
// there's no transation required.
if (sourceTableRef.equals(targetTableRef)) {
rs.getCurrentRow().getKey(ptr);
} else {
for (int i = offset; i < values.length; i++) {
byte[] byteValue = rs.getBytes(i + 1 - offset);
// TODO: consider going under the hood and just getting the bytes
if (pkColumns.get(i).getSortOrder() == SortOrder.DESC) {
byte[] tempByteValue = Arrays.copyOf(byteValue, byteValue.length);
byteValue = SortOrder.invert(byteValue, 0, tempByteValue, 0, byteValue.length);
}
values[i] = byteValue;
}
table.newKey(ptr, values);
}
// When issuing deletes, we do not care about the row time ranges. Also, if the table had a row timestamp column, then the
// row key will already have its value.
mutations.put(ptr, new RowMutationState(PRow.DELETE_MARKER, statement.getConnection().getStatementExecutionCounter(), NULL_ROWTIMESTAMP_INFO, null));
for (int i = 0; i < indexTableRefs.size(); i++) {
// allocate new as this is a key in a Map
ImmutableBytesPtr indexPtr = new ImmutableBytesPtr();
rs.getCurrentRow().getKey(indexPtr);
indexMutations.get(i).put(indexPtr, new RowMutationState(PRow.DELETE_MARKER, statement.getConnection().getStatementExecutionCounter(), NULL_ROWTIMESTAMP_INFO, null));
}
if (mutations.size() > maxSize) {
throw new IllegalArgumentException("MutationState size of " + mutations.size() + " is bigger than max allowed size of " + maxSize);
}
rowCount++;
// Commit a batch if auto commit is true and we're at our batch size
if (isAutoCommit && rowCount % batchSize == 0) {
MutationState state = new MutationState(targetTableRef, mutations, 0, maxSize, maxSizeBytes, connection);
connection.getMutationState().join(state);
for (int i = 0; i < indexTableRefs.size(); i++) {
MutationState indexState = new MutationState(indexTableRefs.get(i), indexMutations.get(i), 0, maxSize, maxSizeBytes, connection);
connection.getMutationState().join(indexState);
}
connection.getMutationState().send();
mutations.clear();
if (indexMutations != null) {
indexMutations.clear();
}
}
}
// If auto commit is true, this last batch will be committed upon return
int nCommittedRows = isAutoCommit ? (rowCount / batchSize * batchSize) : 0;
MutationState state = new MutationState(targetTableRef, mutations, nCommittedRows, maxSize, maxSizeBytes, connection);
for (int i = 0; i < indexTableRefs.size(); i++) {
// To prevent the counting of these index rows, we have a negative for remainingRows.
MutationState indexState = new MutationState(indexTableRefs.get(i), indexMutations.get(i), 0, maxSize, maxSizeBytes, connection);
state.join(indexState);
}
return state;
}
}
use of org.apache.phoenix.hbase.index.util.ImmutableBytesPtr in project phoenix by apache.
the class SpillManager method getKey.
/**
* Helper method to deserialize the key part from a serialized byte array
* @param data
* @return
* @throws IOException
*/
static ImmutableBytesPtr getKey(byte[] data) throws IOException {
DataInputStream input = null;
try {
input = new DataInputStream(new ByteArrayInputStream(data));
// key length
int keyLength = WritableUtils.readVInt(input);
int offset = WritableUtils.getVIntSize(keyLength);
// key
return new ImmutableBytesPtr(data, offset, keyLength);
} finally {
if (input != null) {
input.close();
input = null;
}
}
}
use of org.apache.phoenix.hbase.index.util.ImmutableBytesPtr in project phoenix by apache.
the class SpillMap method redistribute.
// Function redistributes the elements in the current index
// to two new buckets, based on the bit at localDepth + 1 position.
// Optionally this function also doubles the directory to allow
// for bucket splits
private void redistribute(int index, ImmutableBytesPtr keyNew, byte[] valueNew) {
// Get the respective bucket
MappedByteBufferMap byteMap = directory[index];
// Get the actual bucket index, that the directory index points to
int mappedIdx = byteMap.pageIndex;
int localDepth = byteMap.localDepth;
ArrayList<Integer> buckets = Lists.newArrayList();
// TODO: can be made faster!
for (int i = 0; i < directory.length; i++) {
if (directory[i].pageIndex == mappedIdx) {
buckets.add(i);
}
}
// Assuming no directory doubling for now
// compute the two new bucket Ids for splitting
// SpillFile adds new files dynamically in case the directory points to pageIDs
// that exceed the size limit of a single file.
// TODO verify if some sort of de-fragmentation might be helpful
int tmpIndex = index ^ ((1 << localDepth));
int b1Index = Math.min(index, tmpIndex);
int b2Index = Math.max(index, tmpIndex);
// Create two new split buckets
MappedByteBufferMap b1 = new MappedByteBufferMap(b1Index, thresholdBytes, pageInserts, spillFile);
MappedByteBufferMap b2 = new MappedByteBufferMap(b2Index, thresholdBytes, pageInserts, spillFile);
// redistribute old elements into b1 and b2
for (Entry<ImmutableBytesPtr, byte[]> element : byteMap.pageMap.entrySet()) {
ImmutableBytesPtr key = element.getKey();
byte[] value = element.getValue();
// Otherwise this is an good point to reduce the number of spilled elements
if (!cache.isKeyContained(key)) {
// Re-distribute element onto the new 2 split buckets
if ((key.hashCode() & ((1 << localDepth))) != 0) {
b2.addElement(null, key, value);
} else {
b1.addElement(null, key, value);
}
}
}
// Clear and GC the old now redistributed bucket
byteMap.pageMap.clear();
byteMap = null;
// Increase local bucket depths
b1.localDepth = localDepth + 1;
b2.localDepth = localDepth + 1;
boolean doubleDir = false;
if (globalDepth < (localDepth + 1)) {
// Double directory structure and re-adjust pointers
doubleDir = true;
b2Index = doubleDirectory(b2Index, keyNew);
}
if (!doubleDir) {
// the new buckets
for (int i = 0; i < buckets.size(); i++) {
if ((buckets.get(i) & (1 << (localDepth))) != 0) {
directory[buckets.get(i)] = b2;
} else {
directory[buckets.get(i)] = b1;
}
}
} else {
// Update the directory indexes in case of directory doubling
directory[b1Index] = b1;
directory[b2Index] = b2;
}
}
use of org.apache.phoenix.hbase.index.util.ImmutableBytesPtr in project phoenix by apache.
the class SpillableGroupByCache method cache.
/**
* Extract an element from the Cache If element is not present in in-memory cache / or in spill files cache
* implements an implicit put() of a new key/value tuple and loads it into the cache
*/
@Override
public Aggregator[] cache(ImmutableBytesPtr cacheKey) {
ImmutableBytesPtr key = new ImmutableBytesPtr(cacheKey);
Aggregator[] rowAggregators = cache.get(key);
if (rowAggregators == null) {
// per distinct value)
if (spillManager != null) {
// spilled before
try {
rowAggregators = spillManager.loadEntry(key);
} catch (IOException ioe) {
// Ensure that we always close and delete the temp files
try {
throw new RuntimeException(ioe);
} finally {
Closeables.closeQuietly(SpillableGroupByCache.this);
}
}
}
if (rowAggregators == null) {
// No, key never spilled before, create a new tuple
rowAggregators = aggregators.newAggregators(env.getConfiguration());
if (logger.isDebugEnabled()) {
logger.debug("Adding new aggregate bucket for row key " + Bytes.toStringBinary(key.get(), key.getOffset(), key.getLength()));
}
}
if (cache.put(key, rowAggregators) == null) {
totalNumElements++;
}
}
return rowAggregators;
}
Aggregations