use of org.apache.phoenix.schema.tuple.MultiKeyValueTuple in project phoenix by apache.
the class RegionScannerResultIterator method next.
@Override
public Tuple next() throws SQLException {
// stopRegionOperation
synchronized (scanner) {
try {
// TODO: size
List<Cell> results = useQualifierAsIndex ? new EncodedColumnQualiferCellsList(minMaxQualifiers.getFirst(), minMaxQualifiers.getSecond(), encodingScheme) : new ArrayList<Cell>();
// Results are potentially returned even when the return value of s.next is false
// since this is an indication of whether or not there are more values after the
// ones returned
boolean hasMore = scanner.nextRaw(results);
if (!hasMore && results.isEmpty()) {
return null;
}
// We instantiate a new tuple because in all cases currently we hang on to it
// (i.e. to compute and hold onto the TopN).
Tuple tuple = useQualifierAsIndex ? new PositionBasedMultiKeyValueTuple() : new MultiKeyValueTuple();
tuple.setKeyValues(results);
return tuple;
} catch (IOException e) {
throw ServerUtil.parseServerException(e);
}
}
}
use of org.apache.phoenix.schema.tuple.MultiKeyValueTuple in project phoenix by apache.
the class PhoenixIndexBuilder method executeAtomicOp.
@Override
public List<Mutation> executeAtomicOp(Increment inc) throws IOException {
byte[] opBytes = inc.getAttribute(ATOMIC_OP_ATTRIB);
if (opBytes == null) {
// Unexpected
return null;
}
inc.setAttribute(ATOMIC_OP_ATTRIB, null);
Put put = null;
Delete delete = null;
// We cannot neither use the time stamp in the Increment to set the Get time range
// nor set the Put/Delete time stamp and have this be atomic as HBase does not
// handle that. Though we disallow using ON DUPLICATE KEY clause when the
// CURRENT_SCN is set, we still may have a time stamp set as of when the table
// was resolved on the client side. We need to ignore this as well due to limitations
// in HBase, but this isn't too bad as the time will be very close the the current
// time anyway.
long ts = HConstants.LATEST_TIMESTAMP;
byte[] rowKey = inc.getRow();
final Get get = new Get(rowKey);
if (isDupKeyIgnore(opBytes)) {
get.setFilter(new FirstKeyOnlyFilter());
Result result = this.env.getRegion().get(get);
return result.isEmpty() ? convertIncrementToPutInSingletonList(inc) : Collections.<Mutation>emptyList();
}
ByteArrayInputStream stream = new ByteArrayInputStream(opBytes);
DataInputStream input = new DataInputStream(stream);
boolean skipFirstOp = input.readBoolean();
short repeat = input.readShort();
final int[] estimatedSizeHolder = { 0 };
List<Pair<PTable, List<Expression>>> operations = Lists.newArrayListWithExpectedSize(3);
while (true) {
ExpressionVisitor<Void> visitor = new StatelessTraverseAllExpressionVisitor<Void>() {
@Override
public Void visit(KeyValueColumnExpression expression) {
get.addColumn(expression.getColumnFamily(), expression.getColumnQualifier());
estimatedSizeHolder[0]++;
return null;
}
};
try {
int nExpressions = WritableUtils.readVInt(input);
List<Expression> expressions = Lists.newArrayListWithExpectedSize(nExpressions);
for (int i = 0; i < nExpressions; i++) {
Expression expression = ExpressionType.values()[WritableUtils.readVInt(input)].newInstance();
expression.readFields(input);
expressions.add(expression);
expression.accept(visitor);
}
PTableProtos.PTable tableProto = PTableProtos.PTable.parseDelimitedFrom(input);
PTable table = PTableImpl.createFromProto(tableProto);
operations.add(new Pair<>(table, expressions));
} catch (EOFException e) {
break;
}
}
int estimatedSize = estimatedSizeHolder[0];
if (get.getFamilyMap().isEmpty()) {
get.setFilter(new FirstKeyOnlyFilter());
}
MultiKeyValueTuple tuple;
List<Cell> flattenedCells = null;
List<Cell> cells = ((HRegion) this.env.getRegion()).get(get, false);
if (cells.isEmpty()) {
if (skipFirstOp) {
if (operations.size() <= 1 && repeat <= 1) {
return convertIncrementToPutInSingletonList(inc);
}
// Skip first operation (if first wasn't ON DUPLICATE KEY IGNORE)
repeat--;
}
// Base current state off of new row
flattenedCells = flattenCells(inc, estimatedSize);
tuple = new MultiKeyValueTuple(flattenedCells);
} else {
// Base current state off of existing row
tuple = new MultiKeyValueTuple(cells);
}
ImmutableBytesWritable ptr = new ImmutableBytesWritable();
for (int opIndex = 0; opIndex < operations.size(); opIndex++) {
Pair<PTable, List<Expression>> operation = operations.get(opIndex);
PTable table = operation.getFirst();
List<Expression> expressions = operation.getSecond();
for (int j = 0; j < repeat; j++) {
// repeater loop
ptr.set(rowKey);
// executed, not when the outer loop is exited. Hence we do it here, at the top of the loop.
if (flattenedCells != null) {
Collections.sort(flattenedCells, KeyValue.COMPARATOR);
}
PRow row = table.newRow(GenericKeyValueBuilder.INSTANCE, ts, ptr, false);
int adjust = table.getBucketNum() == null ? 1 : 2;
for (int i = 0; i < expressions.size(); i++) {
Expression expression = expressions.get(i);
ptr.set(ByteUtil.EMPTY_BYTE_ARRAY);
expression.evaluate(tuple, ptr);
PColumn column = table.getColumns().get(i + adjust);
Object value = expression.getDataType().toObject(ptr, column.getSortOrder());
// same type.
if (!column.getDataType().isSizeCompatible(ptr, value, column.getDataType(), expression.getSortOrder(), expression.getMaxLength(), expression.getScale(), column.getMaxLength(), column.getScale())) {
throw new DataExceedsCapacityException(column.getDataType(), column.getMaxLength(), column.getScale());
}
column.getDataType().coerceBytes(ptr, value, expression.getDataType(), expression.getMaxLength(), expression.getScale(), expression.getSortOrder(), column.getMaxLength(), column.getScale(), column.getSortOrder(), table.rowKeyOrderOptimizable());
byte[] bytes = ByteUtil.copyKeyBytesIfNecessary(ptr);
row.setValue(column, bytes);
}
flattenedCells = Lists.newArrayListWithExpectedSize(estimatedSize);
List<Mutation> mutations = row.toRowMutations();
for (Mutation source : mutations) {
flattenCells(source, flattenedCells);
}
tuple.setKeyValues(flattenedCells);
}
// Repeat only applies to first statement
repeat = 1;
}
List<Mutation> mutations = Lists.newArrayListWithExpectedSize(2);
for (int i = 0; i < tuple.size(); i++) {
Cell cell = tuple.getValue(i);
if (Type.codeToType(cell.getTypeByte()) == Type.Put) {
if (put == null) {
put = new Put(rowKey);
transferAttributes(inc, put);
mutations.add(put);
}
put.add(cell);
} else {
if (delete == null) {
delete = new Delete(rowKey);
transferAttributes(inc, delete);
mutations.add(delete);
}
delete.addDeleteMarker(cell);
}
}
return mutations;
}
use of org.apache.phoenix.schema.tuple.MultiKeyValueTuple in project phoenix by apache.
the class GroupedAggregateRegionObserver method scanOrdered.
/**
* Used for an aggregate query in which the key order match the group by key order. In this
* case, we can do the aggregation as we scan, by detecting when the group by key changes.
* @param limit TODO
* @throws IOException
*/
private RegionScanner scanOrdered(final ObserverContext<RegionCoprocessorEnvironment> c, final Scan scan, final RegionScanner scanner, final List<Expression> expressions, final ServerAggregators aggregators, final long limit) throws IOException {
if (logger.isDebugEnabled()) {
logger.debug(LogUtil.addCustomAnnotations("Grouped aggregation over ordered rows with scan " + scan + ", group by " + expressions + ", aggregators " + aggregators, ScanUtil.getCustomAnnotations(scan)));
}
final Pair<Integer, Integer> minMaxQualifiers = EncodedColumnsUtil.getMinMaxQualifiersFromScan(scan);
final boolean useQualifierAsIndex = EncodedColumnsUtil.useQualifierAsIndex(minMaxQualifiers);
return new BaseRegionScanner(scanner) {
private long rowCount = 0;
private ImmutableBytesPtr currentKey = null;
@Override
public boolean next(List<Cell> results) throws IOException {
boolean hasMore;
boolean atLimit;
boolean aggBoundary = false;
Tuple result = useQualifierAsIndex ? new PositionBasedMultiKeyValueTuple() : new MultiKeyValueTuple();
ImmutableBytesPtr key = null;
Aggregator[] rowAggregators = aggregators.getAggregators();
// If we're calculating no aggregate functions, we can exit at the
// start of a new row. Otherwise, we have to wait until an agg
int countOffset = rowAggregators.length == 0 ? 1 : 0;
Region region = c.getEnvironment().getRegion();
boolean acquiredLock = false;
try {
region.startRegionOperation();
acquiredLock = true;
synchronized (scanner) {
do {
List<Cell> kvs = useQualifierAsIndex ? new EncodedColumnQualiferCellsList(minMaxQualifiers.getFirst(), minMaxQualifiers.getSecond(), encodingScheme) : new ArrayList<Cell>();
// Results are potentially returned even when the return
// value of s.next is false
// since this is an indication of whether or not there
// are more values after the
// ones returned
hasMore = scanner.nextRaw(kvs);
if (!kvs.isEmpty()) {
result.setKeyValues(kvs);
key = TupleUtil.getConcatenatedValue(result, expressions);
aggBoundary = currentKey != null && currentKey.compareTo(key) != 0;
if (!aggBoundary) {
aggregators.aggregate(rowAggregators, result);
if (logger.isDebugEnabled()) {
logger.debug(LogUtil.addCustomAnnotations("Row passed filters: " + kvs + ", aggregated values: " + Arrays.asList(rowAggregators), ScanUtil.getCustomAnnotations(scan)));
}
currentKey = key;
}
}
atLimit = rowCount + countOffset >= limit;
// Do rowCount + 1 b/c we don't have to wait for a complete
// row in the case of a DISTINCT with a LIMIT
} while (hasMore && !aggBoundary && !atLimit);
}
} finally {
if (acquiredLock)
region.closeRegionOperation();
}
if (currentKey != null) {
byte[] value = aggregators.toBytes(rowAggregators);
KeyValue keyValue = KeyValueUtil.newKeyValue(currentKey.get(), currentKey.getOffset(), currentKey.getLength(), SINGLE_COLUMN_FAMILY, SINGLE_COLUMN, AGG_TIMESTAMP, value, 0, value.length);
results.add(keyValue);
if (logger.isDebugEnabled()) {
logger.debug(LogUtil.addCustomAnnotations("Adding new aggregate row: " + keyValue + ",for current key " + Bytes.toStringBinary(currentKey.get(), currentKey.getOffset(), currentKey.getLength()) + ", aggregated values: " + Arrays.asList(rowAggregators), ScanUtil.getCustomAnnotations(scan)));
}
// the returned result).
if (aggBoundary) {
aggregators.reset(rowAggregators);
aggregators.aggregate(rowAggregators, result);
currentKey = key;
rowCount++;
atLimit |= rowCount >= limit;
}
}
// Continue if there are more
if (!atLimit && (hasMore || aggBoundary)) {
return true;
}
currentKey = null;
return false;
}
};
}
use of org.apache.phoenix.schema.tuple.MultiKeyValueTuple in project phoenix by apache.
the class UngroupedAggregateRegionObserver method doPostScannerOpen.
@Override
protected RegionScanner doPostScannerOpen(final ObserverContext<RegionCoprocessorEnvironment> c, final Scan scan, final RegionScanner s) throws IOException, SQLException {
RegionCoprocessorEnvironment env = c.getEnvironment();
Region region = env.getRegion();
long ts = scan.getTimeRange().getMax();
boolean localIndexScan = ScanUtil.isLocalIndex(scan);
if (ScanUtil.isAnalyzeTable(scan)) {
byte[] gp_width_bytes = scan.getAttribute(BaseScannerRegionObserver.GUIDEPOST_WIDTH_BYTES);
byte[] gp_per_region_bytes = scan.getAttribute(BaseScannerRegionObserver.GUIDEPOST_PER_REGION);
// Let this throw, as this scan is being done for the sole purpose of collecting stats
StatisticsCollector statsCollector = StatisticsCollectorFactory.createStatisticsCollector(env, region.getRegionInfo().getTable().getNameAsString(), ts, gp_width_bytes, gp_per_region_bytes);
return collectStats(s, statsCollector, region, scan, env.getConfiguration());
} else if (ScanUtil.isIndexRebuild(scan)) {
return rebuildIndices(s, region, scan, env.getConfiguration());
}
int offsetToBe = 0;
if (localIndexScan) {
/*
* For local indexes, we need to set an offset on row key expressions to skip
* the region start key.
*/
offsetToBe = region.getRegionInfo().getStartKey().length != 0 ? region.getRegionInfo().getStartKey().length : region.getRegionInfo().getEndKey().length;
ScanUtil.setRowKeyOffset(scan, offsetToBe);
}
final int offset = offsetToBe;
PTable projectedTable = null;
PTable writeToTable = null;
byte[][] values = null;
byte[] descRowKeyTableBytes = scan.getAttribute(UPGRADE_DESC_ROW_KEY);
boolean isDescRowKeyOrderUpgrade = descRowKeyTableBytes != null;
if (isDescRowKeyOrderUpgrade) {
logger.debug("Upgrading row key for " + region.getRegionInfo().getTable().getNameAsString());
projectedTable = deserializeTable(descRowKeyTableBytes);
try {
writeToTable = PTableImpl.makePTable(projectedTable, true);
} catch (SQLException e) {
// Impossible
ServerUtil.throwIOException("Upgrade failed", e);
}
values = new byte[projectedTable.getPKColumns().size()][];
}
boolean useProto = false;
byte[] localIndexBytes = scan.getAttribute(LOCAL_INDEX_BUILD_PROTO);
useProto = localIndexBytes != null;
if (localIndexBytes == null) {
localIndexBytes = scan.getAttribute(LOCAL_INDEX_BUILD);
}
List<IndexMaintainer> indexMaintainers = localIndexBytes == null ? null : IndexMaintainer.deserialize(localIndexBytes, useProto);
MutationList indexMutations = localIndexBytes == null ? new MutationList() : new MutationList(1024);
RegionScanner theScanner = s;
byte[] replayMutations = scan.getAttribute(BaseScannerRegionObserver.REPLAY_WRITES);
byte[] indexUUID = scan.getAttribute(PhoenixIndexCodec.INDEX_UUID);
byte[] txState = scan.getAttribute(BaseScannerRegionObserver.TX_STATE);
List<Expression> selectExpressions = null;
byte[] upsertSelectTable = scan.getAttribute(BaseScannerRegionObserver.UPSERT_SELECT_TABLE);
boolean isUpsert = false;
boolean isDelete = false;
byte[] deleteCQ = null;
byte[] deleteCF = null;
byte[] emptyCF = null;
HTable targetHTable = null;
boolean isPKChanging = false;
ImmutableBytesWritable ptr = new ImmutableBytesWritable();
if (upsertSelectTable != null) {
isUpsert = true;
projectedTable = deserializeTable(upsertSelectTable);
targetHTable = new HTable(upsertSelectConfig, projectedTable.getPhysicalName().getBytes());
selectExpressions = deserializeExpressions(scan.getAttribute(BaseScannerRegionObserver.UPSERT_SELECT_EXPRS));
values = new byte[projectedTable.getPKColumns().size()][];
isPKChanging = ExpressionUtil.isPkPositionChanging(new TableRef(projectedTable), selectExpressions);
} else {
byte[] isDeleteAgg = scan.getAttribute(BaseScannerRegionObserver.DELETE_AGG);
isDelete = isDeleteAgg != null && Bytes.compareTo(PDataType.TRUE_BYTES, isDeleteAgg) == 0;
if (!isDelete) {
deleteCF = scan.getAttribute(BaseScannerRegionObserver.DELETE_CF);
deleteCQ = scan.getAttribute(BaseScannerRegionObserver.DELETE_CQ);
}
emptyCF = scan.getAttribute(BaseScannerRegionObserver.EMPTY_CF);
}
TupleProjector tupleProjector = null;
byte[][] viewConstants = null;
ColumnReference[] dataColumns = IndexUtil.deserializeDataTableColumnsToJoin(scan);
final TupleProjector p = TupleProjector.deserializeProjectorFromScan(scan);
final HashJoinInfo j = HashJoinInfo.deserializeHashJoinFromScan(scan);
boolean useQualifierAsIndex = EncodedColumnsUtil.useQualifierAsIndex(EncodedColumnsUtil.getMinMaxQualifiersFromScan(scan));
if ((localIndexScan && !isDelete && !isDescRowKeyOrderUpgrade) || (j == null && p != null)) {
if (dataColumns != null) {
tupleProjector = IndexUtil.getTupleProjector(scan, dataColumns);
viewConstants = IndexUtil.deserializeViewConstantsFromScan(scan);
}
ImmutableBytesWritable tempPtr = new ImmutableBytesWritable();
theScanner = getWrappedScanner(c, theScanner, offset, scan, dataColumns, tupleProjector, region, indexMaintainers == null ? null : indexMaintainers.get(0), viewConstants, p, tempPtr, useQualifierAsIndex);
}
if (j != null) {
theScanner = new HashJoinRegionScanner(theScanner, p, j, ScanUtil.getTenantId(scan), env, useQualifierAsIndex, useNewValueColumnQualifier);
}
int maxBatchSize = 0;
long maxBatchSizeBytes = 0L;
MutationList mutations = new MutationList();
boolean needToWrite = false;
Configuration conf = env.getConfiguration();
long flushSize = region.getTableDesc().getMemStoreFlushSize();
if (flushSize <= 0) {
flushSize = conf.getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE);
}
/**
* Slow down the writes if the memstore size more than
* (hbase.hregion.memstore.block.multiplier - 1) times hbase.hregion.memstore.flush.size
* bytes. This avoids flush storm to hdfs for cases like index building where reads and
* write happen to all the table regions in the server.
*/
final long blockingMemStoreSize = flushSize * (conf.getLong(HConstants.HREGION_MEMSTORE_BLOCK_MULTIPLIER, HConstants.DEFAULT_HREGION_MEMSTORE_BLOCK_MULTIPLIER) - 1);
boolean buildLocalIndex = indexMaintainers != null && dataColumns == null && !localIndexScan;
if (buildLocalIndex) {
checkForLocalIndexColumnFamilies(region, indexMaintainers);
}
if (isDescRowKeyOrderUpgrade || isDelete || isUpsert || (deleteCQ != null && deleteCF != null) || emptyCF != null || buildLocalIndex) {
needToWrite = true;
maxBatchSize = conf.getInt(MUTATE_BATCH_SIZE_ATTRIB, QueryServicesOptions.DEFAULT_MUTATE_BATCH_SIZE);
mutations = new MutationList(Ints.saturatedCast(maxBatchSize + maxBatchSize / 10));
maxBatchSizeBytes = conf.getLong(MUTATE_BATCH_SIZE_BYTES_ATTRIB, QueryServicesOptions.DEFAULT_MUTATE_BATCH_SIZE_BYTES);
}
Aggregators aggregators = ServerAggregators.deserialize(scan.getAttribute(BaseScannerRegionObserver.AGGREGATORS), conf);
Aggregator[] rowAggregators = aggregators.getAggregators();
boolean hasMore;
boolean hasAny = false;
Pair<Integer, Integer> minMaxQualifiers = EncodedColumnsUtil.getMinMaxQualifiersFromScan(scan);
Tuple result = useQualifierAsIndex ? new PositionBasedMultiKeyValueTuple() : new MultiKeyValueTuple();
if (logger.isDebugEnabled()) {
logger.debug(LogUtil.addCustomAnnotations("Starting ungrouped coprocessor scan " + scan + " " + region.getRegionInfo(), ScanUtil.getCustomAnnotations(scan)));
}
int rowCount = 0;
final RegionScanner innerScanner = theScanner;
boolean useIndexProto = true;
byte[] indexMaintainersPtr = scan.getAttribute(PhoenixIndexCodec.INDEX_PROTO_MD);
// for backward compatiblity fall back to look by the old attribute
if (indexMaintainersPtr == null) {
indexMaintainersPtr = scan.getAttribute(PhoenixIndexCodec.INDEX_MD);
useIndexProto = false;
}
byte[] clientVersionBytes = scan.getAttribute(PhoenixIndexCodec.CLIENT_VERSION);
boolean acquiredLock = false;
boolean incrScanRefCount = false;
final TenantCache tenantCache = GlobalCache.getTenantCache(env, ScanUtil.getTenantId(scan));
try (MemoryChunk em = tenantCache.getMemoryManager().allocate(0)) {
if (needToWrite) {
synchronized (lock) {
if (isRegionClosingOrSplitting) {
throw new IOException("Temporarily unable to write from scan because region is closing or splitting");
}
scansReferenceCount++;
incrScanRefCount = true;
lock.notifyAll();
}
}
region.startRegionOperation();
acquiredLock = true;
long size = 0;
synchronized (innerScanner) {
do {
List<Cell> results = useQualifierAsIndex ? new EncodedColumnQualiferCellsList(minMaxQualifiers.getFirst(), minMaxQualifiers.getSecond(), encodingScheme) : new ArrayList<Cell>();
// Results are potentially returned even when the return value of s.next is false
// since this is an indication of whether or not there are more values after the
// ones returned
hasMore = innerScanner.nextRaw(results);
if (!results.isEmpty()) {
rowCount++;
result.setKeyValues(results);
if (isDescRowKeyOrderUpgrade) {
Arrays.fill(values, null);
Cell firstKV = results.get(0);
RowKeySchema schema = projectedTable.getRowKeySchema();
int maxOffset = schema.iterator(firstKV.getRowArray(), firstKV.getRowOffset() + offset, firstKV.getRowLength(), ptr);
for (int i = 0; i < schema.getFieldCount(); i++) {
Boolean hasValue = schema.next(ptr, i, maxOffset);
if (hasValue == null) {
break;
}
Field field = schema.getField(i);
if (field.getSortOrder() == SortOrder.DESC) {
// Special case for re-writing DESC ARRAY, as the actual byte value needs to change in this case
if (field.getDataType().isArrayType()) {
field.getDataType().coerceBytes(ptr, null, field.getDataType(), field.getMaxLength(), field.getScale(), field.getSortOrder(), field.getMaxLength(), field.getScale(), field.getSortOrder(), // force to use correct separator byte
true);
} else // Special case for re-writing DESC CHAR or DESC BINARY, to force the re-writing of trailing space characters
if (field.getDataType() == PChar.INSTANCE || field.getDataType() == PBinary.INSTANCE) {
int len = ptr.getLength();
while (len > 0 && ptr.get()[ptr.getOffset() + len - 1] == StringUtil.SPACE_UTF8) {
len--;
}
ptr.set(ptr.get(), ptr.getOffset(), len);
// Special case for re-writing DESC FLOAT and DOUBLE, as they're not inverted like they should be (PHOENIX-2171)
} else if (field.getDataType() == PFloat.INSTANCE || field.getDataType() == PDouble.INSTANCE) {
byte[] invertedBytes = SortOrder.invert(ptr.get(), ptr.getOffset(), ptr.getLength());
ptr.set(invertedBytes);
}
} else if (field.getDataType() == PBinary.INSTANCE) {
// Remove trailing space characters so that the setValues call below will replace them
// with the correct zero byte character. Note this is somewhat dangerous as these
// could be legit, but I don't know what the alternative is.
int len = ptr.getLength();
while (len > 0 && ptr.get()[ptr.getOffset() + len - 1] == StringUtil.SPACE_UTF8) {
len--;
}
ptr.set(ptr.get(), ptr.getOffset(), len);
}
values[i] = ptr.copyBytes();
}
writeToTable.newKey(ptr, values);
if (Bytes.compareTo(firstKV.getRowArray(), firstKV.getRowOffset() + offset, firstKV.getRowLength(), ptr.get(), ptr.getOffset() + offset, ptr.getLength()) == 0) {
continue;
}
byte[] newRow = ByteUtil.copyKeyBytesIfNecessary(ptr);
if (offset > 0) {
// for local indexes (prepend region start key)
byte[] newRowWithOffset = new byte[offset + newRow.length];
System.arraycopy(firstKV.getRowArray(), firstKV.getRowOffset(), newRowWithOffset, 0, offset);
;
System.arraycopy(newRow, 0, newRowWithOffset, offset, newRow.length);
newRow = newRowWithOffset;
}
byte[] oldRow = Bytes.copy(firstKV.getRowArray(), firstKV.getRowOffset(), firstKV.getRowLength());
for (Cell cell : results) {
// Copy existing cell but with new row key
Cell newCell = new KeyValue(newRow, 0, newRow.length, cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(), cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength(), cell.getTimestamp(), KeyValue.Type.codeToType(cell.getTypeByte()), cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
switch(KeyValue.Type.codeToType(cell.getTypeByte())) {
case Put:
// If Put, point delete old Put
Delete del = new Delete(oldRow);
del.addDeleteMarker(new KeyValue(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(), cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength(), cell.getTimestamp(), KeyValue.Type.Delete, ByteUtil.EMPTY_BYTE_ARRAY, 0, 0));
mutations.add(del);
Put put = new Put(newRow);
put.add(newCell);
mutations.add(put);
break;
case Delete:
case DeleteColumn:
case DeleteFamily:
case DeleteFamilyVersion:
Delete delete = new Delete(newRow);
delete.addDeleteMarker(newCell);
mutations.add(delete);
break;
}
}
} else if (buildLocalIndex) {
for (IndexMaintainer maintainer : indexMaintainers) {
if (!results.isEmpty()) {
result.getKey(ptr);
ValueGetter valueGetter = maintainer.createGetterFromKeyValues(ImmutableBytesPtr.copyBytesIfNecessary(ptr), results);
Put put = maintainer.buildUpdateMutation(kvBuilder, valueGetter, ptr, results.get(0).getTimestamp(), env.getRegion().getRegionInfo().getStartKey(), env.getRegion().getRegionInfo().getEndKey());
indexMutations.add(put);
}
}
result.setKeyValues(results);
} else if (isDelete) {
// FIXME: the version of the Delete constructor without the lock
// args was introduced in 0.94.4, thus if we try to use it here
// we can no longer use the 0.94.2 version of the client.
Cell firstKV = results.get(0);
Delete delete = new Delete(firstKV.getRowArray(), firstKV.getRowOffset(), firstKV.getRowLength(), ts);
if (replayMutations != null) {
delete.setAttribute(REPLAY_WRITES, replayMutations);
}
mutations.add(delete);
// force tephra to ignore this deletes
delete.setAttribute(PhoenixTransactionContext.TX_ROLLBACK_ATTRIBUTE_KEY, new byte[0]);
} else if (isUpsert) {
Arrays.fill(values, null);
int bucketNumOffset = 0;
if (projectedTable.getBucketNum() != null) {
values[0] = new byte[] { 0 };
bucketNumOffset = 1;
}
int i = bucketNumOffset;
List<PColumn> projectedColumns = projectedTable.getColumns();
for (; i < projectedTable.getPKColumns().size(); i++) {
Expression expression = selectExpressions.get(i - bucketNumOffset);
if (expression.evaluate(result, ptr)) {
values[i] = ptr.copyBytes();
// column being projected into then invert the bits.
if (expression.getSortOrder() != projectedColumns.get(i).getSortOrder()) {
SortOrder.invert(values[i], 0, values[i], 0, values[i].length);
}
} else {
values[i] = ByteUtil.EMPTY_BYTE_ARRAY;
}
}
projectedTable.newKey(ptr, values);
PRow row = projectedTable.newRow(kvBuilder, ts, ptr, false);
for (; i < projectedColumns.size(); i++) {
Expression expression = selectExpressions.get(i - bucketNumOffset);
if (expression.evaluate(result, ptr)) {
PColumn column = projectedColumns.get(i);
if (!column.getDataType().isSizeCompatible(ptr, null, expression.getDataType(), expression.getSortOrder(), expression.getMaxLength(), expression.getScale(), column.getMaxLength(), column.getScale())) {
throw new DataExceedsCapacityException(column.getDataType(), column.getMaxLength(), column.getScale(), column.getName().getString(), ptr);
}
column.getDataType().coerceBytes(ptr, null, expression.getDataType(), expression.getMaxLength(), expression.getScale(), expression.getSortOrder(), column.getMaxLength(), column.getScale(), column.getSortOrder(), projectedTable.rowKeyOrderOptimizable());
byte[] bytes = ByteUtil.copyKeyBytesIfNecessary(ptr);
row.setValue(column, bytes);
}
}
for (Mutation mutation : row.toRowMutations()) {
if (replayMutations != null) {
mutation.setAttribute(REPLAY_WRITES, replayMutations);
}
mutations.add(mutation);
}
for (i = 0; i < selectExpressions.size(); i++) {
selectExpressions.get(i).reset();
}
} else if (deleteCF != null && deleteCQ != null) {
// if no empty key value is being set
if (emptyCF == null || result.getValue(deleteCF, deleteCQ) != null) {
Delete delete = new Delete(results.get(0).getRowArray(), results.get(0).getRowOffset(), results.get(0).getRowLength());
delete.deleteColumns(deleteCF, deleteCQ, ts);
// force tephra to ignore this deletes
delete.setAttribute(PhoenixTransactionContext.TX_ROLLBACK_ATTRIBUTE_KEY, new byte[0]);
mutations.add(delete);
}
}
if (emptyCF != null) {
/*
* If we've specified an emptyCF, then we need to insert an empty
* key value "retroactively" for any key value that is visible at
* the timestamp that the DDL was issued. Key values that are not
* visible at this timestamp will not ever be projected up to
* scans past this timestamp, so don't need to be considered.
* We insert one empty key value per row per timestamp.
*/
Set<Long> timeStamps = Sets.newHashSetWithExpectedSize(results.size());
for (Cell kv : results) {
long kvts = kv.getTimestamp();
if (!timeStamps.contains(kvts)) {
Put put = new Put(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength());
put.add(emptyCF, QueryConstants.EMPTY_COLUMN_BYTES, kvts, ByteUtil.EMPTY_BYTE_ARRAY);
mutations.add(put);
}
}
}
if (ServerUtil.readyToCommit(mutations.size(), mutations.byteSize(), maxBatchSize, maxBatchSizeBytes)) {
commit(region, mutations, indexUUID, blockingMemStoreSize, indexMaintainersPtr, txState, targetHTable, useIndexProto, isPKChanging, clientVersionBytes);
mutations.clear();
}
if (ServerUtil.readyToCommit(indexMutations.size(), indexMutations.byteSize(), maxBatchSize, maxBatchSizeBytes)) {
setIndexAndTransactionProperties(indexMutations, indexUUID, indexMaintainersPtr, txState, clientVersionBytes, useIndexProto);
commitBatch(region, indexMutations, blockingMemStoreSize);
indexMutations.clear();
}
size += aggregators.aggregate(rowAggregators, result);
while (size > em.getSize()) {
logger.info("Request: {}, resizing {} by 1024*1024", size, em.getSize());
em.resize(em.getSize() + 1024 * 1024);
}
hasAny = true;
}
} while (hasMore);
if (!mutations.isEmpty()) {
commit(region, mutations, indexUUID, blockingMemStoreSize, indexMaintainersPtr, txState, targetHTable, useIndexProto, isPKChanging, clientVersionBytes);
mutations.clear();
}
if (!indexMutations.isEmpty()) {
commitBatch(region, indexMutations, blockingMemStoreSize);
indexMutations.clear();
}
}
} finally {
if (needToWrite && incrScanRefCount) {
synchronized (lock) {
scansReferenceCount--;
if (scansReferenceCount < 0) {
logger.warn("Scan reference count went below zero. Something isn't correct. Resetting it back to zero");
scansReferenceCount = 0;
}
lock.notifyAll();
}
}
try {
if (targetHTable != null) {
targetHTable.close();
}
} finally {
try {
innerScanner.close();
} finally {
if (acquiredLock)
region.closeRegionOperation();
}
}
}
if (logger.isDebugEnabled()) {
logger.debug(LogUtil.addCustomAnnotations("Finished scanning " + rowCount + " rows for ungrouped coprocessor scan " + scan, ScanUtil.getCustomAnnotations(scan)));
}
final boolean hadAny = hasAny;
KeyValue keyValue = null;
if (hadAny) {
byte[] value = aggregators.toBytes(rowAggregators);
keyValue = KeyValueUtil.newKeyValue(UNGROUPED_AGG_ROW_KEY, SINGLE_COLUMN_FAMILY, SINGLE_COLUMN, AGG_TIMESTAMP, value, 0, value.length);
}
final KeyValue aggKeyValue = keyValue;
RegionScanner scanner = new BaseRegionScanner(innerScanner) {
private boolean done = !hadAny;
@Override
public boolean isFilterDone() {
return done;
}
@Override
public boolean next(List<Cell> results) throws IOException {
if (done)
return false;
done = true;
results.add(aggKeyValue);
return false;
}
@Override
public long getMaxResultSize() {
return scan.getMaxResultSize();
}
};
return scanner;
}
use of org.apache.phoenix.schema.tuple.MultiKeyValueTuple in project phoenix by apache.
the class GroupedAggregateRegionObserver method scanUnordered.
/**
* Used for an aggregate query in which the key order does not necessarily match the group by
* key order. In this case, we must collect all distinct groups within a region into a map,
* aggregating as we go.
* @param limit TODO
*/
private RegionScanner scanUnordered(ObserverContext<RegionCoprocessorEnvironment> c, Scan scan, final RegionScanner scanner, final List<Expression> expressions, final ServerAggregators aggregators, long limit) throws IOException {
if (logger.isDebugEnabled()) {
logger.debug(LogUtil.addCustomAnnotations("Grouped aggregation over unordered rows with scan " + scan + ", group by " + expressions + ", aggregators " + aggregators, ScanUtil.getCustomAnnotations(scan)));
}
RegionCoprocessorEnvironment env = c.getEnvironment();
Configuration conf = env.getConfiguration();
int estDistVals = conf.getInt(GROUPBY_ESTIMATED_DISTINCT_VALUES_ATTRIB, DEFAULT_GROUPBY_ESTIMATED_DISTINCT_VALUES);
byte[] estDistValsBytes = scan.getAttribute(BaseScannerRegionObserver.ESTIMATED_DISTINCT_VALUES);
if (estDistValsBytes != null) {
// Allocate 1.5x estimation
estDistVals = Math.max(MIN_DISTINCT_VALUES, (int) (Bytes.toInt(estDistValsBytes) * 1.5f));
}
Pair<Integer, Integer> minMaxQualifiers = EncodedColumnsUtil.getMinMaxQualifiersFromScan(scan);
boolean useQualifierAsIndex = EncodedColumnsUtil.useQualifierAsIndex(EncodedColumnsUtil.getMinMaxQualifiersFromScan(scan));
final boolean spillableEnabled = conf.getBoolean(GROUPBY_SPILLABLE_ATTRIB, DEFAULT_GROUPBY_SPILLABLE);
GroupByCache groupByCache = GroupByCacheFactory.INSTANCE.newCache(env, ScanUtil.getTenantId(scan), ScanUtil.getCustomAnnotations(scan), aggregators, estDistVals);
boolean success = false;
try {
boolean hasMore;
Tuple result = useQualifierAsIndex ? new PositionBasedMultiKeyValueTuple() : new MultiKeyValueTuple();
if (logger.isDebugEnabled()) {
logger.debug(LogUtil.addCustomAnnotations("Spillable groupby enabled: " + spillableEnabled, ScanUtil.getCustomAnnotations(scan)));
}
Region region = c.getEnvironment().getRegion();
boolean acquiredLock = false;
try {
region.startRegionOperation();
acquiredLock = true;
synchronized (scanner) {
do {
List<Cell> results = useQualifierAsIndex ? new EncodedColumnQualiferCellsList(minMaxQualifiers.getFirst(), minMaxQualifiers.getSecond(), encodingScheme) : new ArrayList<Cell>();
// Results are potentially returned even when the return
// value of s.next is false
// since this is an indication of whether or not there are
// more values after the
// ones returned
hasMore = scanner.nextRaw(results);
if (!results.isEmpty()) {
result.setKeyValues(results);
ImmutableBytesPtr key = TupleUtil.getConcatenatedValue(result, expressions);
Aggregator[] rowAggregators = groupByCache.cache(key);
// Aggregate values here
aggregators.aggregate(rowAggregators, result);
}
} while (hasMore && groupByCache.size() < limit);
}
} finally {
if (acquiredLock)
region.closeRegionOperation();
}
RegionScanner regionScanner = groupByCache.getScanner(scanner);
// Do not sort here, but sort back on the client instead
// The reason is that if the scan ever extends beyond a region
// (which can happen if we're basing our parallelization split
// points on old metadata), we'll get incorrect query results.
success = true;
return regionScanner;
} finally {
if (!success) {
Closeables.closeQuietly(groupByCache);
}
}
}
Aggregations