use of org.apache.phoenix.join.HashJoinInfo in project phoenix by apache.
the class QueryCompiler method compileJoinQuery.
/*
* Call compileJoinQuery() for join queries recursively down to the leaf JoinTable nodes.
* This matches the input JoinTable node against patterns in the following order:
* 1. A (leaf JoinTable node, which can be a named table reference or a subquery of any kind.)
* Returns the compilation result of a single table scan or of an independent subquery.
* 2. Matching either of (when hint USE_SORT_MERGE_JOIN not specified):
* 1) A LEFT/INNER JOIN B
* 2) A LEFT/INNER JOIN B (LEFT/INNER JOIN C)+, if hint NO_STAR_JOIN not specified
* where A can be a named table reference or a flat subquery, and B, C, ... can be a named
* table reference, a sub-join or a subquery of any kind.
* Returns a HashJoinPlan{scan: A, hash: B, C, ...}.
* 3. Matching pattern:
* A RIGHT/INNER JOIN B (when hint USE_SORT_MERGE_JOIN not specified)
* where B can be a named table reference or a flat subquery, and A can be a named table
* reference, a sub-join or a subquery of any kind.
* Returns a HashJoinPlan{scan: B, hash: A}.
* NOTE that "A LEFT/RIGHT/INNER/FULL JOIN B RIGHT/INNER JOIN C" is viewed as
* "(A LEFT/RIGHT/INNER/FULL JOIN B) RIGHT/INNER JOIN C" here, which means the left part in the
* parenthesis is considered a sub-join.
* viewed as a sub-join.
* 4. All the rest that do not qualify for previous patterns or conditions, including FULL joins.
* Returns a SortMergeJoinPlan, the sorting part of which is pushed down to the JoinTable nodes
* of both sides as order-by clauses.
* NOTE that SEMI or ANTI joins are treated the same way as LEFT joins in JoinTable pattern matching.
*
* If no join algorithm hint is provided, according to the above compilation process, a join query
* plan can probably consist of both HashJoinPlan and SortMergeJoinPlan which may enclose each other.
* TODO 1) Use table statistics to guide the choice of join plans.
* 2) Make it possible to hint a certain join algorithm for a specific join step.
*/
@SuppressWarnings("unchecked")
protected QueryPlan compileJoinQuery(StatementContext context, List<Object> binds, JoinTable joinTable, boolean asSubquery, boolean projectPKColumns, List<OrderByNode> orderBy) throws SQLException {
byte[] emptyByteArray = new byte[0];
List<JoinSpec> joinSpecs = joinTable.getJoinSpecs();
if (joinSpecs.isEmpty()) {
Table table = joinTable.getTable();
SelectStatement subquery = table.getAsSubquery(orderBy);
if (!table.isSubselect()) {
context.setCurrentTable(table.getTableRef());
PTable projectedTable = table.createProjectedTable(!projectPKColumns, context);
TupleProjector projector = new TupleProjector(projectedTable);
TupleProjector.serializeProjectorIntoScan(context.getScan(), projector);
context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, context.getConnection(), subquery.getUdfParseNodes()));
table.projectColumns(context.getScan());
return compileSingleFlatQuery(context, subquery, binds, asSubquery, !asSubquery, null, projectPKColumns ? projector : null, true);
}
QueryPlan plan = compileSubquery(subquery, false);
PTable projectedTable = table.createProjectedTable(plan.getProjector());
context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, context.getConnection(), subquery.getUdfParseNodes()));
return new TupleProjectionPlan(plan, new TupleProjector(plan.getProjector()), table.compilePostFilterExpression(context));
}
boolean[] starJoinVector;
if (!this.useSortMergeJoin && (starJoinVector = joinTable.getStarJoinVector()) != null) {
Table table = joinTable.getTable();
PTable initialProjectedTable;
TableRef tableRef;
SelectStatement query;
TupleProjector tupleProjector;
if (!table.isSubselect()) {
context.setCurrentTable(table.getTableRef());
initialProjectedTable = table.createProjectedTable(!projectPKColumns, context);
tableRef = table.getTableRef();
table.projectColumns(context.getScan());
query = joinTable.getAsSingleSubquery(table.getAsSubquery(orderBy), asSubquery);
tupleProjector = new TupleProjector(initialProjectedTable);
} else {
SelectStatement subquery = table.getAsSubquery(orderBy);
QueryPlan plan = compileSubquery(subquery, false);
initialProjectedTable = table.createProjectedTable(plan.getProjector());
tableRef = plan.getTableRef();
context.getScan().setFamilyMap(plan.getContext().getScan().getFamilyMap());
query = joinTable.getAsSingleSubquery((SelectStatement) plan.getStatement(), asSubquery);
tupleProjector = new TupleProjector(plan.getProjector());
}
context.setCurrentTable(tableRef);
PTable projectedTable = initialProjectedTable;
int count = joinSpecs.size();
ImmutableBytesPtr[] joinIds = new ImmutableBytesPtr[count];
List<Expression>[] joinExpressions = new List[count];
JoinType[] joinTypes = new JoinType[count];
PTable[] tables = new PTable[count];
int[] fieldPositions = new int[count];
StatementContext[] subContexts = new StatementContext[count];
QueryPlan[] subPlans = new QueryPlan[count];
HashSubPlan[] hashPlans = new HashSubPlan[count];
fieldPositions[0] = projectedTable.getColumns().size() - projectedTable.getPKColumns().size();
for (int i = 0; i < count; i++) {
JoinSpec joinSpec = joinSpecs.get(i);
Scan subScan = ScanUtil.newScan(originalScan);
subContexts[i] = new StatementContext(statement, context.getResolver(), subScan, new SequenceManager(statement));
subPlans[i] = compileJoinQuery(subContexts[i], binds, joinSpec.getJoinTable(), true, true, null);
boolean hasPostReference = joinSpec.getJoinTable().hasPostReference();
if (hasPostReference) {
tables[i] = subContexts[i].getResolver().getTables().get(0).getTable();
projectedTable = JoinCompiler.joinProjectedTables(projectedTable, tables[i], joinSpec.getType());
} else {
tables[i] = null;
}
}
for (int i = 0; i < count; i++) {
JoinSpec joinSpec = joinSpecs.get(i);
context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, context.getConnection(), query.getUdfParseNodes()));
// place-holder
joinIds[i] = new ImmutableBytesPtr(emptyByteArray);
Pair<List<Expression>, List<Expression>> joinConditions = joinSpec.compileJoinConditions(context, subContexts[i], true);
joinExpressions[i] = joinConditions.getFirst();
List<Expression> hashExpressions = joinConditions.getSecond();
Pair<Expression, Expression> keyRangeExpressions = new Pair<Expression, Expression>(null, null);
boolean optimized = getKeyExpressionCombinations(keyRangeExpressions, context, joinTable.getStatement(), tableRef, joinSpec.getType(), joinExpressions[i], hashExpressions);
Expression keyRangeLhsExpression = keyRangeExpressions.getFirst();
Expression keyRangeRhsExpression = keyRangeExpressions.getSecond();
joinTypes[i] = joinSpec.getType();
if (i < count - 1) {
fieldPositions[i + 1] = fieldPositions[i] + (tables[i] == null ? 0 : (tables[i].getColumns().size() - tables[i].getPKColumns().size()));
}
hashPlans[i] = new HashSubPlan(i, subPlans[i], optimized ? null : hashExpressions, joinSpec.isSingleValueOnly(), keyRangeLhsExpression, keyRangeRhsExpression);
}
TupleProjector.serializeProjectorIntoScan(context.getScan(), tupleProjector);
QueryPlan plan = compileSingleFlatQuery(context, query, binds, asSubquery, !asSubquery && joinTable.isAllLeftJoin(), null, !table.isSubselect() && projectPKColumns ? tupleProjector : null, true);
Expression postJoinFilterExpression = joinTable.compilePostFilterExpression(context, table);
Integer limit = null;
Integer offset = null;
if (!query.isAggregate() && !query.isDistinct() && query.getOrderBy().isEmpty()) {
limit = plan.getLimit();
offset = plan.getOffset();
}
HashJoinInfo joinInfo = new HashJoinInfo(projectedTable, joinIds, joinExpressions, joinTypes, starJoinVector, tables, fieldPositions, postJoinFilterExpression, QueryUtil.getOffsetLimit(limit, offset));
return HashJoinPlan.create(joinTable.getStatement(), plan, joinInfo, hashPlans);
}
JoinSpec lastJoinSpec = joinSpecs.get(joinSpecs.size() - 1);
JoinType type = lastJoinSpec.getType();
if (!this.useSortMergeJoin && (type == JoinType.Right || type == JoinType.Inner) && lastJoinSpec.getJoinTable().getJoinSpecs().isEmpty() && lastJoinSpec.getJoinTable().getTable().isFlat()) {
JoinTable rhsJoinTable = lastJoinSpec.getJoinTable();
Table rhsTable = rhsJoinTable.getTable();
JoinTable lhsJoin = joinTable.getSubJoinTableWithoutPostFilters();
Scan subScan = ScanUtil.newScan(originalScan);
StatementContext lhsCtx = new StatementContext(statement, context.getResolver(), subScan, new SequenceManager(statement));
QueryPlan lhsPlan = compileJoinQuery(lhsCtx, binds, lhsJoin, true, true, null);
PTable rhsProjTable;
TableRef rhsTableRef;
SelectStatement rhs;
TupleProjector tupleProjector;
if (!rhsTable.isSubselect()) {
context.setCurrentTable(rhsTable.getTableRef());
rhsProjTable = rhsTable.createProjectedTable(!projectPKColumns, context);
rhsTableRef = rhsTable.getTableRef();
rhsTable.projectColumns(context.getScan());
rhs = rhsJoinTable.getAsSingleSubquery(rhsTable.getAsSubquery(orderBy), asSubquery);
tupleProjector = new TupleProjector(rhsProjTable);
} else {
SelectStatement subquery = rhsTable.getAsSubquery(orderBy);
QueryPlan plan = compileSubquery(subquery, false);
rhsProjTable = rhsTable.createProjectedTable(plan.getProjector());
rhsTableRef = plan.getTableRef();
context.getScan().setFamilyMap(plan.getContext().getScan().getFamilyMap());
rhs = rhsJoinTable.getAsSingleSubquery((SelectStatement) plan.getStatement(), asSubquery);
tupleProjector = new TupleProjector(plan.getProjector());
}
context.setCurrentTable(rhsTableRef);
context.setResolver(FromCompiler.getResolverForProjectedTable(rhsProjTable, context.getConnection(), rhs.getUdfParseNodes()));
ImmutableBytesPtr[] joinIds = new ImmutableBytesPtr[] { new ImmutableBytesPtr(emptyByteArray) };
Pair<List<Expression>, List<Expression>> joinConditions = lastJoinSpec.compileJoinConditions(lhsCtx, context, true);
List<Expression> joinExpressions = joinConditions.getSecond();
List<Expression> hashExpressions = joinConditions.getFirst();
boolean needsMerge = lhsJoin.hasPostReference();
PTable lhsTable = needsMerge ? lhsCtx.getResolver().getTables().get(0).getTable() : null;
int fieldPosition = needsMerge ? rhsProjTable.getColumns().size() - rhsProjTable.getPKColumns().size() : 0;
PTable projectedTable = needsMerge ? JoinCompiler.joinProjectedTables(rhsProjTable, lhsTable, type == JoinType.Right ? JoinType.Left : type) : rhsProjTable;
TupleProjector.serializeProjectorIntoScan(context.getScan(), tupleProjector);
context.setResolver(FromCompiler.getResolverForProjectedTable(projectedTable, context.getConnection(), rhs.getUdfParseNodes()));
QueryPlan rhsPlan = compileSingleFlatQuery(context, rhs, binds, asSubquery, !asSubquery && type == JoinType.Right, null, !rhsTable.isSubselect() && projectPKColumns ? tupleProjector : null, true);
Expression postJoinFilterExpression = joinTable.compilePostFilterExpression(context, rhsTable);
Integer limit = null;
Integer offset = null;
if (!rhs.isAggregate() && !rhs.isDistinct() && rhs.getOrderBy().isEmpty()) {
limit = rhsPlan.getLimit();
offset = rhsPlan.getOffset();
}
HashJoinInfo joinInfo = new HashJoinInfo(projectedTable, joinIds, new List[] { joinExpressions }, new JoinType[] { type == JoinType.Right ? JoinType.Left : type }, new boolean[] { true }, new PTable[] { lhsTable }, new int[] { fieldPosition }, postJoinFilterExpression, QueryUtil.getOffsetLimit(limit, offset));
Pair<Expression, Expression> keyRangeExpressions = new Pair<Expression, Expression>(null, null);
getKeyExpressionCombinations(keyRangeExpressions, context, joinTable.getStatement(), rhsTableRef, type, joinExpressions, hashExpressions);
return HashJoinPlan.create(joinTable.getStatement(), rhsPlan, joinInfo, new HashSubPlan[] { new HashSubPlan(0, lhsPlan, hashExpressions, false, keyRangeExpressions.getFirst(), keyRangeExpressions.getSecond()) });
}
JoinTable lhsJoin = joinTable.getSubJoinTableWithoutPostFilters();
JoinTable rhsJoin = lastJoinSpec.getJoinTable();
if (type == JoinType.Right) {
JoinTable temp = lhsJoin;
lhsJoin = rhsJoin;
rhsJoin = temp;
}
List<EqualParseNode> joinConditionNodes = lastJoinSpec.getOnConditions();
List<OrderByNode> lhsOrderBy = Lists.<OrderByNode>newArrayListWithExpectedSize(joinConditionNodes.size());
List<OrderByNode> rhsOrderBy = Lists.<OrderByNode>newArrayListWithExpectedSize(joinConditionNodes.size());
for (EqualParseNode condition : joinConditionNodes) {
lhsOrderBy.add(NODE_FACTORY.orderBy(type == JoinType.Right ? condition.getRHS() : condition.getLHS(), false, true));
rhsOrderBy.add(NODE_FACTORY.orderBy(type == JoinType.Right ? condition.getLHS() : condition.getRHS(), false, true));
}
Scan lhsScan = ScanUtil.newScan(originalScan);
StatementContext lhsCtx = new StatementContext(statement, context.getResolver(), lhsScan, new SequenceManager(statement));
boolean preserveRowkey = !projectPKColumns && type != JoinType.Full;
QueryPlan lhsPlan = compileJoinQuery(lhsCtx, binds, lhsJoin, true, !preserveRowkey, lhsOrderBy);
PTable lhsProjTable = lhsCtx.getResolver().getTables().get(0).getTable();
boolean isInRowKeyOrder = preserveRowkey && lhsPlan.getOrderBy().getOrderByExpressions().isEmpty();
Scan rhsScan = ScanUtil.newScan(originalScan);
StatementContext rhsCtx = new StatementContext(statement, context.getResolver(), rhsScan, new SequenceManager(statement));
QueryPlan rhsPlan = compileJoinQuery(rhsCtx, binds, rhsJoin, true, true, rhsOrderBy);
PTable rhsProjTable = rhsCtx.getResolver().getTables().get(0).getTable();
Pair<List<Expression>, List<Expression>> joinConditions = lastJoinSpec.compileJoinConditions(type == JoinType.Right ? rhsCtx : lhsCtx, type == JoinType.Right ? lhsCtx : rhsCtx, false);
List<Expression> lhsKeyExpressions = type == JoinType.Right ? joinConditions.getSecond() : joinConditions.getFirst();
List<Expression> rhsKeyExpressions = type == JoinType.Right ? joinConditions.getFirst() : joinConditions.getSecond();
boolean needsMerge = rhsJoin.hasPostReference();
int fieldPosition = needsMerge ? lhsProjTable.getColumns().size() - lhsProjTable.getPKColumns().size() : 0;
PTable projectedTable = needsMerge ? JoinCompiler.joinProjectedTables(lhsProjTable, rhsProjTable, type == JoinType.Right ? JoinType.Left : type) : lhsProjTable;
ColumnResolver resolver = FromCompiler.getResolverForProjectedTable(projectedTable, context.getConnection(), joinTable.getStatement().getUdfParseNodes());
TableRef tableRef = resolver.getTables().get(0);
StatementContext subCtx = new StatementContext(statement, resolver, ScanUtil.newScan(originalScan), new SequenceManager(statement));
subCtx.setCurrentTable(tableRef);
QueryPlan innerPlan = new SortMergeJoinPlan(subCtx, joinTable.getStatement(), tableRef, type == JoinType.Right ? JoinType.Left : type, lhsPlan, rhsPlan, lhsKeyExpressions, rhsKeyExpressions, projectedTable, lhsProjTable, needsMerge ? rhsProjTable : null, fieldPosition, lastJoinSpec.isSingleValueOnly());
context.setCurrentTable(tableRef);
context.setResolver(resolver);
TableNode from = NODE_FACTORY.namedTable(tableRef.getTableAlias(), NODE_FACTORY.table(tableRef.getTable().getSchemaName().getString(), tableRef.getTable().getTableName().getString()));
ParseNode where = joinTable.getPostFiltersCombined();
SelectStatement select = asSubquery ? NODE_FACTORY.select(from, joinTable.getStatement().getHint(), false, Collections.<AliasedNode>emptyList(), where, null, null, orderBy, null, null, 0, false, joinTable.getStatement().hasSequence(), Collections.<SelectStatement>emptyList(), joinTable.getStatement().getUdfParseNodes()) : NODE_FACTORY.select(joinTable.getStatement(), from, where);
return compileSingleFlatQuery(context, select, binds, asSubquery, false, innerPlan, null, isInRowKeyOrder);
}
use of org.apache.phoenix.join.HashJoinInfo in project phoenix by apache.
the class GroupedAggregateRegionObserver method doPostScannerOpen.
/**
* Replaces the RegionScanner s with a RegionScanner that groups by the key formed by the list
* of expressions from the scan and returns the aggregated rows of each group. For example,
* given the following original rows in the RegionScanner: KEY COL1 row1 a row2 b row3 a row4 a
* the following rows will be returned for COUNT(*): KEY COUNT a 3 b 1 The client is required to
* do a sort and a final aggregation, since multiple rows with the same key may be returned from
* different regions.
*/
@Override
protected RegionScanner doPostScannerOpen(ObserverContext<RegionCoprocessorEnvironment> c, Scan scan, RegionScanner s) throws IOException {
boolean keyOrdered = false;
byte[] expressionBytes = scan.getAttribute(BaseScannerRegionObserver.UNORDERED_GROUP_BY_EXPRESSIONS);
if (expressionBytes == null) {
expressionBytes = scan.getAttribute(BaseScannerRegionObserver.KEY_ORDERED_GROUP_BY_EXPRESSIONS);
keyOrdered = true;
}
int offset = 0;
if (ScanUtil.isLocalIndex(scan)) {
/*
* For local indexes, we need to set an offset on row key expressions to skip
* the region start key.
*/
Region region = c.getEnvironment().getRegion();
offset = region.getRegionInfo().getStartKey().length != 0 ? region.getRegionInfo().getStartKey().length : region.getRegionInfo().getEndKey().length;
ScanUtil.setRowKeyOffset(scan, offset);
}
List<Expression> expressions = deserializeGroupByExpressions(expressionBytes, 0);
ServerAggregators aggregators = ServerAggregators.deserialize(scan.getAttribute(BaseScannerRegionObserver.AGGREGATORS), c.getEnvironment().getConfiguration());
RegionScanner innerScanner = s;
boolean useProto = false;
byte[] localIndexBytes = scan.getAttribute(LOCAL_INDEX_BUILD_PROTO);
useProto = localIndexBytes != null;
if (localIndexBytes == null) {
localIndexBytes = scan.getAttribute(LOCAL_INDEX_BUILD);
}
List<IndexMaintainer> indexMaintainers = localIndexBytes == null ? null : IndexMaintainer.deserialize(localIndexBytes, useProto);
TupleProjector tupleProjector = null;
byte[][] viewConstants = null;
ColumnReference[] dataColumns = IndexUtil.deserializeDataTableColumnsToJoin(scan);
final TupleProjector p = TupleProjector.deserializeProjectorFromScan(scan);
final HashJoinInfo j = HashJoinInfo.deserializeHashJoinFromScan(scan);
boolean useQualifierAsIndex = EncodedColumnsUtil.useQualifierAsIndex(EncodedColumnsUtil.getMinMaxQualifiersFromScan(scan));
if (ScanUtil.isLocalIndex(scan) || (j == null && p != null)) {
if (dataColumns != null) {
tupleProjector = IndexUtil.getTupleProjector(scan, dataColumns);
viewConstants = IndexUtil.deserializeViewConstantsFromScan(scan);
}
ImmutableBytesPtr tempPtr = new ImmutableBytesPtr();
innerScanner = getWrappedScanner(c, innerScanner, offset, scan, dataColumns, tupleProjector, c.getEnvironment().getRegion(), indexMaintainers == null ? null : indexMaintainers.get(0), viewConstants, p, tempPtr, useQualifierAsIndex);
}
if (j != null) {
innerScanner = new HashJoinRegionScanner(innerScanner, p, j, ScanUtil.getTenantId(scan), c.getEnvironment(), useQualifierAsIndex, useNewValueColumnQualifier);
}
long limit = Long.MAX_VALUE;
byte[] limitBytes = scan.getAttribute(GROUP_BY_LIMIT);
if (limitBytes != null) {
limit = PInteger.INSTANCE.getCodec().decodeInt(limitBytes, 0, SortOrder.getDefault());
}
if (keyOrdered) {
// already in the required group by key order
return scanOrdered(c, scan, innerScanner, expressions, aggregators, limit);
} else {
// Otherwse, collect them all up in an in memory map
return scanUnordered(c, scan, innerScanner, expressions, aggregators, limit);
}
}
use of org.apache.phoenix.join.HashJoinInfo in project phoenix by apache.
the class UngroupedAggregateRegionObserver method doPostScannerOpen.
@Override
protected RegionScanner doPostScannerOpen(final ObserverContext<RegionCoprocessorEnvironment> c, final Scan scan, final RegionScanner s) throws IOException, SQLException {
RegionCoprocessorEnvironment env = c.getEnvironment();
Region region = env.getRegion();
long ts = scan.getTimeRange().getMax();
boolean localIndexScan = ScanUtil.isLocalIndex(scan);
if (ScanUtil.isAnalyzeTable(scan)) {
byte[] gp_width_bytes = scan.getAttribute(BaseScannerRegionObserver.GUIDEPOST_WIDTH_BYTES);
byte[] gp_per_region_bytes = scan.getAttribute(BaseScannerRegionObserver.GUIDEPOST_PER_REGION);
// Let this throw, as this scan is being done for the sole purpose of collecting stats
StatisticsCollector statsCollector = StatisticsCollectorFactory.createStatisticsCollector(env, region.getRegionInfo().getTable().getNameAsString(), ts, gp_width_bytes, gp_per_region_bytes);
return collectStats(s, statsCollector, region, scan, env.getConfiguration());
} else if (ScanUtil.isIndexRebuild(scan)) {
return rebuildIndices(s, region, scan, env.getConfiguration());
}
int offsetToBe = 0;
if (localIndexScan) {
/*
* For local indexes, we need to set an offset on row key expressions to skip
* the region start key.
*/
offsetToBe = region.getRegionInfo().getStartKey().length != 0 ? region.getRegionInfo().getStartKey().length : region.getRegionInfo().getEndKey().length;
ScanUtil.setRowKeyOffset(scan, offsetToBe);
}
final int offset = offsetToBe;
PTable projectedTable = null;
PTable writeToTable = null;
byte[][] values = null;
byte[] descRowKeyTableBytes = scan.getAttribute(UPGRADE_DESC_ROW_KEY);
boolean isDescRowKeyOrderUpgrade = descRowKeyTableBytes != null;
if (isDescRowKeyOrderUpgrade) {
logger.debug("Upgrading row key for " + region.getRegionInfo().getTable().getNameAsString());
projectedTable = deserializeTable(descRowKeyTableBytes);
try {
writeToTable = PTableImpl.makePTable(projectedTable, true);
} catch (SQLException e) {
// Impossible
ServerUtil.throwIOException("Upgrade failed", e);
}
values = new byte[projectedTable.getPKColumns().size()][];
}
boolean useProto = false;
byte[] localIndexBytes = scan.getAttribute(LOCAL_INDEX_BUILD_PROTO);
useProto = localIndexBytes != null;
if (localIndexBytes == null) {
localIndexBytes = scan.getAttribute(LOCAL_INDEX_BUILD);
}
List<IndexMaintainer> indexMaintainers = localIndexBytes == null ? null : IndexMaintainer.deserialize(localIndexBytes, useProto);
MutationList indexMutations = localIndexBytes == null ? new MutationList() : new MutationList(1024);
RegionScanner theScanner = s;
boolean replayMutations = scan.getAttribute(BaseScannerRegionObserver.IGNORE_NEWER_MUTATIONS) != null;
byte[] indexUUID = scan.getAttribute(PhoenixIndexCodec.INDEX_UUID);
byte[] txState = scan.getAttribute(BaseScannerRegionObserver.TX_STATE);
List<Expression> selectExpressions = null;
byte[] upsertSelectTable = scan.getAttribute(BaseScannerRegionObserver.UPSERT_SELECT_TABLE);
boolean isUpsert = false;
boolean isDelete = false;
byte[] deleteCQ = null;
byte[] deleteCF = null;
byte[] emptyCF = null;
HTable targetHTable = null;
boolean areMutationInSameRegion = true;
ImmutableBytesWritable ptr = new ImmutableBytesWritable();
if (upsertSelectTable != null) {
isUpsert = true;
projectedTable = deserializeTable(upsertSelectTable);
targetHTable = new HTable(env.getConfiguration(), projectedTable.getPhysicalName().getBytes());
selectExpressions = deserializeExpressions(scan.getAttribute(BaseScannerRegionObserver.UPSERT_SELECT_EXPRS));
values = new byte[projectedTable.getPKColumns().size()][];
areMutationInSameRegion = Bytes.compareTo(targetHTable.getTableName(), region.getTableDesc().getTableName().getName()) == 0 && !ExpressionUtil.isPkPositionChanging(new TableRef(projectedTable), selectExpressions);
} else {
byte[] isDeleteAgg = scan.getAttribute(BaseScannerRegionObserver.DELETE_AGG);
isDelete = isDeleteAgg != null && Bytes.compareTo(PDataType.TRUE_BYTES, isDeleteAgg) == 0;
if (!isDelete) {
deleteCF = scan.getAttribute(BaseScannerRegionObserver.DELETE_CF);
deleteCQ = scan.getAttribute(BaseScannerRegionObserver.DELETE_CQ);
}
emptyCF = scan.getAttribute(BaseScannerRegionObserver.EMPTY_CF);
}
TupleProjector tupleProjector = null;
byte[][] viewConstants = null;
ColumnReference[] dataColumns = IndexUtil.deserializeDataTableColumnsToJoin(scan);
final TupleProjector p = TupleProjector.deserializeProjectorFromScan(scan);
final HashJoinInfo j = HashJoinInfo.deserializeHashJoinFromScan(scan);
boolean useQualifierAsIndex = EncodedColumnsUtil.useQualifierAsIndex(EncodedColumnsUtil.getMinMaxQualifiersFromScan(scan));
if ((localIndexScan && !isDelete && !isDescRowKeyOrderUpgrade) || (j == null && p != null)) {
if (dataColumns != null) {
tupleProjector = IndexUtil.getTupleProjector(scan, dataColumns);
viewConstants = IndexUtil.deserializeViewConstantsFromScan(scan);
}
ImmutableBytesWritable tempPtr = new ImmutableBytesWritable();
theScanner = getWrappedScanner(c, theScanner, offset, scan, dataColumns, tupleProjector, region, indexMaintainers == null ? null : indexMaintainers.get(0), viewConstants, p, tempPtr, useQualifierAsIndex);
}
if (j != null) {
theScanner = new HashJoinRegionScanner(theScanner, p, j, ScanUtil.getTenantId(scan), env, useQualifierAsIndex, useNewValueColumnQualifier);
}
int maxBatchSize = 0;
long maxBatchSizeBytes = 0L;
MutationList mutations = new MutationList();
boolean needToWrite = false;
Configuration conf = c.getEnvironment().getConfiguration();
long flushSize = region.getTableDesc().getMemStoreFlushSize();
if (flushSize <= 0) {
flushSize = conf.getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE);
}
/**
* Slow down the writes if the memstore size more than
* (hbase.hregion.memstore.block.multiplier - 1) times hbase.hregion.memstore.flush.size
* bytes. This avoids flush storm to hdfs for cases like index building where reads and
* write happen to all the table regions in the server.
*/
final long blockingMemStoreSize = flushSize * (conf.getLong(HConstants.HREGION_MEMSTORE_BLOCK_MULTIPLIER, HConstants.DEFAULT_HREGION_MEMSTORE_BLOCK_MULTIPLIER) - 1);
boolean buildLocalIndex = indexMaintainers != null && dataColumns == null && !localIndexScan;
if (isDescRowKeyOrderUpgrade || isDelete || isUpsert || (deleteCQ != null && deleteCF != null) || emptyCF != null || buildLocalIndex) {
needToWrite = true;
maxBatchSize = env.getConfiguration().getInt(MUTATE_BATCH_SIZE_ATTRIB, QueryServicesOptions.DEFAULT_MUTATE_BATCH_SIZE);
mutations = new MutationList(Ints.saturatedCast(maxBatchSize + maxBatchSize / 10));
maxBatchSizeBytes = env.getConfiguration().getLong(MUTATE_BATCH_SIZE_BYTES_ATTRIB, QueryServicesOptions.DEFAULT_MUTATE_BATCH_SIZE_BYTES);
}
Aggregators aggregators = ServerAggregators.deserialize(scan.getAttribute(BaseScannerRegionObserver.AGGREGATORS), env.getConfiguration());
Aggregator[] rowAggregators = aggregators.getAggregators();
boolean hasMore;
boolean hasAny = false;
Pair<Integer, Integer> minMaxQualifiers = EncodedColumnsUtil.getMinMaxQualifiersFromScan(scan);
Tuple result = useQualifierAsIndex ? new PositionBasedMultiKeyValueTuple() : new MultiKeyValueTuple();
if (logger.isDebugEnabled()) {
logger.debug(LogUtil.addCustomAnnotations("Starting ungrouped coprocessor scan " + scan + " " + region.getRegionInfo(), ScanUtil.getCustomAnnotations(scan)));
}
int rowCount = 0;
final RegionScanner innerScanner = theScanner;
boolean useIndexProto = true;
byte[] indexMaintainersPtr = scan.getAttribute(PhoenixIndexCodec.INDEX_PROTO_MD);
// for backward compatiblity fall back to look by the old attribute
if (indexMaintainersPtr == null) {
indexMaintainersPtr = scan.getAttribute(PhoenixIndexCodec.INDEX_MD);
useIndexProto = false;
}
boolean acquiredLock = false;
try {
if (needToWrite) {
synchronized (lock) {
scansReferenceCount++;
}
}
region.startRegionOperation();
acquiredLock = true;
synchronized (innerScanner) {
do {
List<Cell> results = useQualifierAsIndex ? new EncodedColumnQualiferCellsList(minMaxQualifiers.getFirst(), minMaxQualifiers.getSecond(), encodingScheme) : new ArrayList<Cell>();
// Results are potentially returned even when the return value of s.next is false
// since this is an indication of whether or not there are more values after the
// ones returned
hasMore = innerScanner.nextRaw(results);
if (!results.isEmpty()) {
rowCount++;
result.setKeyValues(results);
if (isDescRowKeyOrderUpgrade) {
Arrays.fill(values, null);
Cell firstKV = results.get(0);
RowKeySchema schema = projectedTable.getRowKeySchema();
int maxOffset = schema.iterator(firstKV.getRowArray(), firstKV.getRowOffset() + offset, firstKV.getRowLength(), ptr);
for (int i = 0; i < schema.getFieldCount(); i++) {
Boolean hasValue = schema.next(ptr, i, maxOffset);
if (hasValue == null) {
break;
}
Field field = schema.getField(i);
if (field.getSortOrder() == SortOrder.DESC) {
// Special case for re-writing DESC ARRAY, as the actual byte value needs to change in this case
if (field.getDataType().isArrayType()) {
field.getDataType().coerceBytes(ptr, null, field.getDataType(), field.getMaxLength(), field.getScale(), field.getSortOrder(), field.getMaxLength(), field.getScale(), field.getSortOrder(), // force to use correct separator byte
true);
} else // Special case for re-writing DESC CHAR or DESC BINARY, to force the re-writing of trailing space characters
if (field.getDataType() == PChar.INSTANCE || field.getDataType() == PBinary.INSTANCE) {
int len = ptr.getLength();
while (len > 0 && ptr.get()[ptr.getOffset() + len - 1] == StringUtil.SPACE_UTF8) {
len--;
}
ptr.set(ptr.get(), ptr.getOffset(), len);
// Special case for re-writing DESC FLOAT and DOUBLE, as they're not inverted like they should be (PHOENIX-2171)
} else if (field.getDataType() == PFloat.INSTANCE || field.getDataType() == PDouble.INSTANCE) {
byte[] invertedBytes = SortOrder.invert(ptr.get(), ptr.getOffset(), ptr.getLength());
ptr.set(invertedBytes);
}
} else if (field.getDataType() == PBinary.INSTANCE) {
// Remove trailing space characters so that the setValues call below will replace them
// with the correct zero byte character. Note this is somewhat dangerous as these
// could be legit, but I don't know what the alternative is.
int len = ptr.getLength();
while (len > 0 && ptr.get()[ptr.getOffset() + len - 1] == StringUtil.SPACE_UTF8) {
len--;
}
ptr.set(ptr.get(), ptr.getOffset(), len);
}
values[i] = ptr.copyBytes();
}
writeToTable.newKey(ptr, values);
if (Bytes.compareTo(firstKV.getRowArray(), firstKV.getRowOffset() + offset, firstKV.getRowLength(), ptr.get(), ptr.getOffset() + offset, ptr.getLength()) == 0) {
continue;
}
byte[] newRow = ByteUtil.copyKeyBytesIfNecessary(ptr);
if (offset > 0) {
// for local indexes (prepend region start key)
byte[] newRowWithOffset = new byte[offset + newRow.length];
System.arraycopy(firstKV.getRowArray(), firstKV.getRowOffset(), newRowWithOffset, 0, offset);
;
System.arraycopy(newRow, 0, newRowWithOffset, offset, newRow.length);
newRow = newRowWithOffset;
}
byte[] oldRow = Bytes.copy(firstKV.getRowArray(), firstKV.getRowOffset(), firstKV.getRowLength());
for (Cell cell : results) {
// Copy existing cell but with new row key
Cell newCell = new KeyValue(newRow, 0, newRow.length, cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(), cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength(), cell.getTimestamp(), KeyValue.Type.codeToType(cell.getTypeByte()), cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
switch(KeyValue.Type.codeToType(cell.getTypeByte())) {
case Put:
// If Put, point delete old Put
Delete del = new Delete(oldRow);
del.addDeleteMarker(new KeyValue(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(), cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength(), cell.getTimestamp(), KeyValue.Type.Delete, ByteUtil.EMPTY_BYTE_ARRAY, 0, 0));
mutations.add(del);
Put put = new Put(newRow);
put.add(newCell);
mutations.add(put);
break;
case Delete:
case DeleteColumn:
case DeleteFamily:
case DeleteFamilyVersion:
Delete delete = new Delete(newRow);
delete.addDeleteMarker(newCell);
mutations.add(delete);
break;
}
}
} else if (buildLocalIndex) {
for (IndexMaintainer maintainer : indexMaintainers) {
if (!results.isEmpty()) {
result.getKey(ptr);
ValueGetter valueGetter = maintainer.createGetterFromKeyValues(ImmutableBytesPtr.copyBytesIfNecessary(ptr), results);
Put put = maintainer.buildUpdateMutation(kvBuilder, valueGetter, ptr, results.get(0).getTimestamp(), env.getRegion().getRegionInfo().getStartKey(), env.getRegion().getRegionInfo().getEndKey());
indexMutations.add(put);
}
}
result.setKeyValues(results);
} else if (isDelete) {
// FIXME: the version of the Delete constructor without the lock
// args was introduced in 0.94.4, thus if we try to use it here
// we can no longer use the 0.94.2 version of the client.
Cell firstKV = results.get(0);
Delete delete = new Delete(firstKV.getRowArray(), firstKV.getRowOffset(), firstKV.getRowLength(), ts);
if (replayMutations) {
delete.setAttribute(IGNORE_NEWER_MUTATIONS, PDataType.TRUE_BYTES);
}
mutations.add(delete);
// force tephra to ignore this deletes
delete.setAttribute(TxConstants.TX_ROLLBACK_ATTRIBUTE_KEY, new byte[0]);
} else if (isUpsert) {
Arrays.fill(values, null);
int bucketNumOffset = 0;
if (projectedTable.getBucketNum() != null) {
values[0] = new byte[] { 0 };
bucketNumOffset = 1;
}
int i = bucketNumOffset;
List<PColumn> projectedColumns = projectedTable.getColumns();
for (; i < projectedTable.getPKColumns().size(); i++) {
Expression expression = selectExpressions.get(i - bucketNumOffset);
if (expression.evaluate(result, ptr)) {
values[i] = ptr.copyBytes();
// column being projected into then invert the bits.
if (expression.getSortOrder() != projectedColumns.get(i).getSortOrder()) {
SortOrder.invert(values[i], 0, values[i], 0, values[i].length);
}
} else {
values[i] = ByteUtil.EMPTY_BYTE_ARRAY;
}
}
projectedTable.newKey(ptr, values);
PRow row = projectedTable.newRow(kvBuilder, ts, ptr, false);
for (; i < projectedColumns.size(); i++) {
Expression expression = selectExpressions.get(i - bucketNumOffset);
if (expression.evaluate(result, ptr)) {
PColumn column = projectedColumns.get(i);
if (!column.getDataType().isSizeCompatible(ptr, null, expression.getDataType(), expression.getSortOrder(), expression.getMaxLength(), expression.getScale(), column.getMaxLength(), column.getScale())) {
throw new DataExceedsCapacityException(column.getDataType(), column.getMaxLength(), column.getScale(), column.getName().getString(), ptr);
}
column.getDataType().coerceBytes(ptr, null, expression.getDataType(), expression.getMaxLength(), expression.getScale(), expression.getSortOrder(), column.getMaxLength(), column.getScale(), column.getSortOrder(), projectedTable.rowKeyOrderOptimizable());
byte[] bytes = ByteUtil.copyKeyBytesIfNecessary(ptr);
row.setValue(column, bytes);
}
}
for (Mutation mutation : row.toRowMutations()) {
if (replayMutations) {
mutation.setAttribute(IGNORE_NEWER_MUTATIONS, PDataType.TRUE_BYTES);
}
mutations.add(mutation);
}
for (i = 0; i < selectExpressions.size(); i++) {
selectExpressions.get(i).reset();
}
} else if (deleteCF != null && deleteCQ != null) {
// if no empty key value is being set
if (emptyCF == null || result.getValue(deleteCF, deleteCQ) != null) {
Delete delete = new Delete(results.get(0).getRowArray(), results.get(0).getRowOffset(), results.get(0).getRowLength());
delete.deleteColumns(deleteCF, deleteCQ, ts);
// force tephra to ignore this deletes
delete.setAttribute(TxConstants.TX_ROLLBACK_ATTRIBUTE_KEY, new byte[0]);
mutations.add(delete);
}
}
if (emptyCF != null) {
/*
* If we've specified an emptyCF, then we need to insert an empty
* key value "retroactively" for any key value that is visible at
* the timestamp that the DDL was issued. Key values that are not
* visible at this timestamp will not ever be projected up to
* scans past this timestamp, so don't need to be considered.
* We insert one empty key value per row per timestamp.
*/
Set<Long> timeStamps = Sets.newHashSetWithExpectedSize(results.size());
for (Cell kv : results) {
long kvts = kv.getTimestamp();
if (!timeStamps.contains(kvts)) {
Put put = new Put(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength());
put.add(emptyCF, QueryConstants.EMPTY_COLUMN_BYTES, kvts, ByteUtil.EMPTY_BYTE_ARRAY);
mutations.add(put);
}
}
}
if (readyToCommit(rowCount, mutations.byteSize(), maxBatchSize, maxBatchSizeBytes)) {
commit(region, mutations, indexUUID, blockingMemStoreSize, indexMaintainersPtr, txState, areMutationInSameRegion, targetHTable, useIndexProto);
mutations.clear();
}
if (readyToCommit(rowCount, indexMutations.byteSize(), maxBatchSize, maxBatchSizeBytes)) {
commitBatch(region, indexMutations, null, blockingMemStoreSize, null, txState, useIndexProto);
indexMutations.clear();
}
aggregators.aggregate(rowAggregators, result);
hasAny = true;
}
} while (hasMore);
if (!mutations.isEmpty()) {
commit(region, mutations, indexUUID, blockingMemStoreSize, indexMaintainersPtr, txState, areMutationInSameRegion, targetHTable, useIndexProto);
mutations.clear();
}
if (!indexMutations.isEmpty()) {
commitBatch(region, indexMutations, null, blockingMemStoreSize, indexMaintainersPtr, txState, useIndexProto);
indexMutations.clear();
}
}
} finally {
if (needToWrite) {
synchronized (lock) {
scansReferenceCount--;
}
}
if (targetHTable != null) {
targetHTable.close();
}
try {
innerScanner.close();
} finally {
if (acquiredLock)
region.closeRegionOperation();
}
}
if (logger.isDebugEnabled()) {
logger.debug(LogUtil.addCustomAnnotations("Finished scanning " + rowCount + " rows for ungrouped coprocessor scan " + scan, ScanUtil.getCustomAnnotations(scan)));
}
final boolean hadAny = hasAny;
KeyValue keyValue = null;
if (hadAny) {
byte[] value = aggregators.toBytes(rowAggregators);
keyValue = KeyValueUtil.newKeyValue(UNGROUPED_AGG_ROW_KEY, SINGLE_COLUMN_FAMILY, SINGLE_COLUMN, AGG_TIMESTAMP, value, 0, value.length);
}
final KeyValue aggKeyValue = keyValue;
RegionScanner scanner = new BaseRegionScanner(innerScanner) {
private boolean done = !hadAny;
@Override
public boolean isFilterDone() {
return done;
}
@Override
public boolean next(List<Cell> results) throws IOException {
if (done)
return false;
done = true;
results.add(aggKeyValue);
return false;
}
@Override
public long getMaxResultSize() {
return scan.getMaxResultSize();
}
};
return scanner;
}
use of org.apache.phoenix.join.HashJoinInfo in project phoenix by apache.
the class NonAggregateRegionScannerFactory method getRegionScanner.
@Override
public RegionScanner getRegionScanner(final Scan scan, final RegionScanner s) throws Throwable {
int offset = 0;
if (ScanUtil.isLocalIndex(scan)) {
/*
* For local indexes, we need to set an offset on row key expressions to skip
* the region start key.
*/
Region region = getRegion();
offset = region.getRegionInfo().getStartKey().length != 0 ? region.getRegionInfo().getStartKey().length : region.getRegionInfo().getEndKey().length;
ScanUtil.setRowKeyOffset(scan, offset);
}
byte[] scanOffsetBytes = scan.getAttribute(BaseScannerRegionObserver.SCAN_OFFSET);
Integer scanOffset = null;
if (scanOffsetBytes != null) {
scanOffset = (Integer) PInteger.INSTANCE.toObject(scanOffsetBytes);
}
RegionScanner innerScanner = s;
Set<KeyValueColumnExpression> arrayKVRefs = Sets.newHashSet();
Expression[] arrayFuncRefs = deserializeArrayPostionalExpressionInfoFromScan(scan, innerScanner, arrayKVRefs);
TupleProjector tupleProjector = null;
Region dataRegion = null;
IndexMaintainer indexMaintainer = null;
byte[][] viewConstants = null;
Transaction tx = null;
ColumnReference[] dataColumns = IndexUtil.deserializeDataTableColumnsToJoin(scan);
if (dataColumns != null) {
tupleProjector = IndexUtil.getTupleProjector(scan, dataColumns);
dataRegion = env.getRegion();
boolean useProto = false;
byte[] localIndexBytes = scan.getAttribute(BaseScannerRegionObserver.LOCAL_INDEX_BUILD_PROTO);
useProto = localIndexBytes != null;
if (localIndexBytes == null) {
localIndexBytes = scan.getAttribute(BaseScannerRegionObserver.LOCAL_INDEX_BUILD);
}
List<IndexMaintainer> indexMaintainers = localIndexBytes == null ? null : IndexMaintainer.deserialize(localIndexBytes, useProto);
indexMaintainer = indexMaintainers.get(0);
viewConstants = IndexUtil.deserializeViewConstantsFromScan(scan);
byte[] txState = scan.getAttribute(BaseScannerRegionObserver.TX_STATE);
tx = MutationState.decodeTransaction(txState);
}
final TupleProjector p = TupleProjector.deserializeProjectorFromScan(scan);
final HashJoinInfo j = HashJoinInfo.deserializeHashJoinFromScan(scan);
boolean useQualifierAsIndex = EncodedColumnsUtil.useQualifierAsIndex(getMinMaxQualifiersFromScan(scan)) && scan.getAttribute(BaseScannerRegionObserver.TOPN) != null;
// setting dataRegion in case of a non-coprocessor environment
if (dataRegion == null && env.getConfiguration().get(PhoenixConfigurationUtil.SNAPSHOT_NAME_KEY) != null) {
dataRegion = env.getRegion();
}
innerScanner = getWrappedScanner(env, innerScanner, arrayKVRefs, arrayFuncRefs, offset, scan, dataColumns, tupleProjector, dataRegion, indexMaintainer, tx, viewConstants, kvSchema, kvSchemaBitSet, j == null ? p : null, ptr, useQualifierAsIndex);
final ImmutableBytesPtr tenantId = ScanUtil.getTenantId(scan);
if (j != null) {
innerScanner = new HashJoinRegionScanner(innerScanner, p, j, tenantId, env, useQualifierAsIndex, useNewValueColumnQualifier);
}
if (scanOffset != null) {
innerScanner = getOffsetScanner(innerScanner, new OffsetResultIterator(new RegionScannerResultIterator(innerScanner, getMinMaxQualifiersFromScan(scan), encodingScheme), scanOffset), scan.getAttribute(QueryConstants.LAST_SCAN) != null);
}
final OrderedResultIterator iterator = deserializeFromScan(scan, innerScanner);
if (iterator == null) {
return innerScanner;
}
// TODO:the above wrapped scanner should be used here also
return getTopNScanner(env, innerScanner, iterator, tenantId);
}
Aggregations