use of org.apache.hadoop.hbase.regionserver.RegionScanner in project cdap by caskdata.
the class IncrementSummingScannerTest method testMultiColumnFlushAndCompact.
@Test
public void testMultiColumnFlushAndCompact() throws Exception {
TableId tableId = TableId.from(NamespaceId.DEFAULT.getNamespace(), "testMultiColumnFlushAndCompact");
byte[] familyBytes = Bytes.toBytes("f");
byte[] columnBytes = Bytes.toBytes("c");
byte[] columnBytes2 = Bytes.toBytes("c2");
HRegion region = createRegion(tableId, familyBytes);
try {
region.initialize();
long now = 1;
byte[] row1 = Bytes.toBytes("row1");
byte[] row2 = Bytes.toBytes("row2");
// Initial put to row1,c2
Put row1P = new Put(row1);
row1P.add(familyBytes, columnBytes2, now - 1, Bytes.toBytes(5L));
region.put(row1P);
// Initial put to row2,c
Put row2P = new Put(row2);
row2P.add(familyBytes, columnBytes, now - 1, Bytes.toBytes(10L));
region.put(row2P);
// Generate some increments
long ts = now;
for (int i = 0; i < 50; i++) {
region.put(generateIncrementPut(familyBytes, columnBytes, row1, ts));
region.put(generateIncrementPut(familyBytes, columnBytes, row2, ts));
region.put(generateIncrementPut(familyBytes, columnBytes2, row1, ts));
ts++;
}
// First scanner represents flush scanner
RegionScanner scanner = new IncrementSummingScanner(region, -1, region.getScanner(new Scan().setMaxVersions()), ScanType.COMPACT_RETAIN_DELETES, now + 15, -1);
// Second scanner is a user scan, this is to help in easy asserts
scanner = new IncrementSummingScanner(region, -1, scanner, ScanType.USER_SCAN);
List<Cell> results = Lists.newArrayList();
assertTrue(scanner.next(results, ScannerContext.newBuilder().setBatchLimit(10).build()));
assertEquals(2, results.size());
Cell cell = results.get(0);
assertNotNull(cell);
assertEquals("row1", Bytes.toString(cell.getRow()));
assertEquals("c", Bytes.toString(cell.getQualifier()));
assertEquals(50, Bytes.toLong(cell.getValue()));
cell = results.get(1);
assertNotNull(cell);
assertEquals("row1", Bytes.toString(cell.getRow()));
assertEquals("c2", Bytes.toString(cell.getQualifier()));
assertEquals(55, Bytes.toLong(cell.getValue()));
results.clear();
assertFalse(scanner.next(results, ScannerContext.newBuilder().setBatchLimit(10).build()));
assertEquals(1, results.size());
cell = results.get(0);
assertNotNull(cell);
assertEquals("row2", Bytes.toString(cell.getRow()));
assertEquals(60, Bytes.toLong(cell.getValue()));
} finally {
region.close();
}
}
use of org.apache.hadoop.hbase.regionserver.RegionScanner in project phoenix by apache.
the class MetaDataEndpointImpl method doDropTable.
private MetaDataMutationResult doDropTable(byte[] key, byte[] tenantId, byte[] schemaName, byte[] tableName, byte[] parentTableName, PTableType tableType, List<Mutation> rowsToDelete, List<ImmutableBytesPtr> invalidateList, List<RowLock> locks, List<byte[]> tableNamesToDelete, List<SharedTableState> sharedTablesToDelete, boolean isCascade, int clientVersion) throws IOException, SQLException {
long clientTimeStamp = MetaDataUtil.getClientTimeStamp(rowsToDelete);
Region region = env.getRegion();
ImmutableBytesPtr cacheKey = new ImmutableBytesPtr(key);
Cache<ImmutableBytesPtr, PMetaDataEntity> metaDataCache = GlobalCache.getInstance(this.env).getMetaDataCache();
PTable table = (PTable) metaDataCache.getIfPresent(cacheKey);
// We always cache the latest version - fault in if not in cache
if (table != null || (table = buildTable(key, cacheKey, region, HConstants.LATEST_TIMESTAMP, clientVersion)) != null) {
if (table.getTimeStamp() < clientTimeStamp) {
if (isTableDeleted(table) || tableType != table.getType()) {
return new MetaDataMutationResult(MutationCode.TABLE_NOT_FOUND, EnvironmentEdgeManager.currentTimeMillis(), null);
}
} else {
return new MetaDataMutationResult(MutationCode.NEWER_TABLE_FOUND, EnvironmentEdgeManager.currentTimeMillis(), null);
}
}
// there was a table, but it's been deleted. In either case we want to return.
if (table == null) {
if (buildDeletedTable(key, cacheKey, region, clientTimeStamp) != null) {
return new MetaDataMutationResult(MutationCode.NEWER_TABLE_FOUND, EnvironmentEdgeManager.currentTimeMillis(), null);
}
return new MetaDataMutationResult(MutationCode.TABLE_NOT_FOUND, EnvironmentEdgeManager.currentTimeMillis(), null);
}
// Make sure we're not deleting the "wrong" child
if (parentTableName != null && table.getParentTableName() != null && !Arrays.equals(parentTableName, table.getParentTableName().getBytes())) {
return new MetaDataMutationResult(MutationCode.TABLE_NOT_FOUND, EnvironmentEdgeManager.currentTimeMillis(), null);
}
// Since we don't allow back in time DDL, we know if we have a table it's the one
// we want to delete. FIXME: we shouldn't need a scan here, but should be able to
// use the table to generate the Delete markers.
Scan scan = MetaDataUtil.newTableRowsScan(key, MIN_TABLE_TIMESTAMP, clientTimeStamp);
List<byte[]> indexNames = Lists.newArrayList();
List<Cell> results = Lists.newArrayList();
try (RegionScanner scanner = region.getScanner(scan)) {
scanner.next(results);
if (results.isEmpty()) {
// Should not be possible
return new MetaDataMutationResult(MutationCode.TABLE_NOT_FOUND, EnvironmentEdgeManager.currentTimeMillis(), null);
}
if (tableType == PTableType.TABLE || tableType == PTableType.SYSTEM) {
// Handle any child views that exist
TableViewFinder tableViewFinderResult = findChildViews(region, tenantId, table, clientVersion);
if (tableViewFinderResult.hasViews()) {
if (isCascade) {
if (tableViewFinderResult.allViewsInMultipleRegions()) {
// view metadata spans multiple regions
return new MetaDataMutationResult(MutationCode.UNALLOWED_TABLE_MUTATION, EnvironmentEdgeManager.currentTimeMillis(), null);
} else if (tableViewFinderResult.allViewsInSingleRegion()) {
// Recursively delete views - safe as all the views as all in the same region
for (ViewInfo viewInfo : tableViewFinderResult.getViewInfoList()) {
byte[] viewTenantId = viewInfo.getTenantId();
byte[] viewSchemaName = viewInfo.getSchemaName();
byte[] viewName = viewInfo.getViewName();
byte[] viewKey = SchemaUtil.getTableKey(viewTenantId, viewSchemaName, viewName);
Delete delete = new Delete(viewKey, clientTimeStamp);
rowsToDelete.add(delete);
acquireLock(region, viewKey, locks);
MetaDataMutationResult result = doDropTable(viewKey, viewTenantId, viewSchemaName, viewName, null, PTableType.VIEW, rowsToDelete, invalidateList, locks, tableNamesToDelete, sharedTablesToDelete, false, clientVersion);
if (result.getMutationCode() != MutationCode.TABLE_ALREADY_EXISTS) {
return result;
}
}
}
} else {
// DROP without CASCADE on tables with child views is not permitted
return new MetaDataMutationResult(MutationCode.UNALLOWED_TABLE_MUTATION, EnvironmentEdgeManager.currentTimeMillis(), null);
}
}
}
// Add to list of HTables to delete, unless it's a view or its a shared index
if (tableType != PTableType.VIEW && table.getViewIndexId() == null) {
tableNamesToDelete.add(table.getPhysicalName().getBytes());
} else {
sharedTablesToDelete.add(new SharedTableState(table));
}
invalidateList.add(cacheKey);
byte[][] rowKeyMetaData = new byte[5][];
do {
Cell kv = results.get(LINK_TYPE_INDEX);
int nColumns = getVarChars(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), 0, rowKeyMetaData);
if (nColumns == 5 && rowKeyMetaData[PhoenixDatabaseMetaData.FAMILY_NAME_INDEX].length > 0 && Bytes.compareTo(kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength(), LINK_TYPE_BYTES, 0, LINK_TYPE_BYTES.length) == 0) {
LinkType linkType = LinkType.fromSerializedValue(kv.getValueArray()[kv.getValueOffset()]);
if (rowKeyMetaData[PhoenixDatabaseMetaData.COLUMN_NAME_INDEX].length == 0 && linkType == LinkType.INDEX_TABLE) {
indexNames.add(rowKeyMetaData[PhoenixDatabaseMetaData.FAMILY_NAME_INDEX]);
} else if (linkType == LinkType.PARENT_TABLE || linkType == LinkType.PHYSICAL_TABLE) {
// delete parent->child link for views
Cell parentTenantIdCell = MetaDataUtil.getCell(results, PhoenixDatabaseMetaData.PARENT_TENANT_ID_BYTES);
PName parentTenantId = parentTenantIdCell != null ? PNameFactory.newName(parentTenantIdCell.getValueArray(), parentTenantIdCell.getValueOffset(), parentTenantIdCell.getValueLength()) : null;
byte[] linkKey = MetaDataUtil.getChildLinkKey(parentTenantId, table.getParentSchemaName(), table.getParentTableName(), table.getTenantId(), table.getName());
Delete linkDelete = new Delete(linkKey, clientTimeStamp);
rowsToDelete.add(linkDelete);
}
}
// FIXME: Remove when unintentionally deprecated method is fixed (HBASE-7870).
// FIXME: the version of the Delete constructor without the lock args was introduced
// in 0.94.4, thus if we try to use it here we can no longer use the 0.94.2 version
// of the client.
Delete delete = new Delete(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), clientTimeStamp);
rowsToDelete.add(delete);
results.clear();
scanner.next(results);
} while (!results.isEmpty());
}
// Recursively delete indexes
for (byte[] indexName : indexNames) {
byte[] indexKey = SchemaUtil.getTableKey(tenantId, schemaName, indexName);
// FIXME: Remove when unintentionally deprecated method is fixed (HBASE-7870).
// FIXME: the version of the Delete constructor without the lock args was introduced
// in 0.94.4, thus if we try to use it here we can no longer use the 0.94.2 version
// of the client.
Delete delete = new Delete(indexKey, clientTimeStamp);
rowsToDelete.add(delete);
acquireLock(region, indexKey, locks);
MetaDataMutationResult result = doDropTable(indexKey, tenantId, schemaName, indexName, tableName, PTableType.INDEX, rowsToDelete, invalidateList, locks, tableNamesToDelete, sharedTablesToDelete, false, clientVersion);
if (result.getMutationCode() != MutationCode.TABLE_ALREADY_EXISTS) {
return result;
}
}
return new MetaDataMutationResult(MutationCode.TABLE_ALREADY_EXISTS, EnvironmentEdgeManager.currentTimeMillis(), table, tableNamesToDelete);
}
use of org.apache.hadoop.hbase.regionserver.RegionScanner in project phoenix by apache.
the class GroupedAggregateRegionObserver method scanUnordered.
/**
* Used for an aggregate query in which the key order does not necessarily match the group by
* key order. In this case, we must collect all distinct groups within a region into a map,
* aggregating as we go.
* @param limit TODO
*/
private RegionScanner scanUnordered(ObserverContext<RegionCoprocessorEnvironment> c, Scan scan, final RegionScanner scanner, final List<Expression> expressions, final ServerAggregators aggregators, long limit) throws IOException {
if (logger.isDebugEnabled()) {
logger.debug(LogUtil.addCustomAnnotations("Grouped aggregation over unordered rows with scan " + scan + ", group by " + expressions + ", aggregators " + aggregators, ScanUtil.getCustomAnnotations(scan)));
}
RegionCoprocessorEnvironment env = c.getEnvironment();
Configuration conf = env.getConfiguration();
int estDistVals = conf.getInt(GROUPBY_ESTIMATED_DISTINCT_VALUES_ATTRIB, DEFAULT_GROUPBY_ESTIMATED_DISTINCT_VALUES);
byte[] estDistValsBytes = scan.getAttribute(BaseScannerRegionObserver.ESTIMATED_DISTINCT_VALUES);
if (estDistValsBytes != null) {
// Allocate 1.5x estimation
estDistVals = Math.max(MIN_DISTINCT_VALUES, (int) (Bytes.toInt(estDistValsBytes) * 1.5f));
}
Pair<Integer, Integer> minMaxQualifiers = EncodedColumnsUtil.getMinMaxQualifiersFromScan(scan);
boolean useQualifierAsIndex = EncodedColumnsUtil.useQualifierAsIndex(EncodedColumnsUtil.getMinMaxQualifiersFromScan(scan));
final boolean spillableEnabled = conf.getBoolean(GROUPBY_SPILLABLE_ATTRIB, DEFAULT_GROUPBY_SPILLABLE);
GroupByCache groupByCache = GroupByCacheFactory.INSTANCE.newCache(env, ScanUtil.getTenantId(scan), ScanUtil.getCustomAnnotations(scan), aggregators, estDistVals);
boolean success = false;
try {
boolean hasMore;
Tuple result = useQualifierAsIndex ? new PositionBasedMultiKeyValueTuple() : new MultiKeyValueTuple();
if (logger.isDebugEnabled()) {
logger.debug(LogUtil.addCustomAnnotations("Spillable groupby enabled: " + spillableEnabled, ScanUtil.getCustomAnnotations(scan)));
}
Region region = c.getEnvironment().getRegion();
boolean acquiredLock = false;
try {
region.startRegionOperation();
acquiredLock = true;
synchronized (scanner) {
do {
List<Cell> results = useQualifierAsIndex ? new EncodedColumnQualiferCellsList(minMaxQualifiers.getFirst(), minMaxQualifiers.getSecond(), encodingScheme) : new ArrayList<Cell>();
// Results are potentially returned even when the return
// value of s.next is false
// since this is an indication of whether or not there are
// more values after the
// ones returned
hasMore = scanner.nextRaw(results);
if (!results.isEmpty()) {
result.setKeyValues(results);
ImmutableBytesPtr key = TupleUtil.getConcatenatedValue(result, expressions);
Aggregator[] rowAggregators = groupByCache.cache(key);
// Aggregate values here
aggregators.aggregate(rowAggregators, result);
}
} while (hasMore && groupByCache.size() < limit);
}
} finally {
if (acquiredLock)
region.closeRegionOperation();
}
RegionScanner regionScanner = groupByCache.getScanner(scanner);
// Do not sort here, but sort back on the client instead
// The reason is that if the scan ever extends beyond a region
// (which can happen if we're basing our parallelization split
// points on old metadata), we'll get incorrect query results.
success = true;
return regionScanner;
} finally {
if (!success) {
Closeables.closeQuietly(groupByCache);
}
}
}
use of org.apache.hadoop.hbase.regionserver.RegionScanner in project phoenix by apache.
the class OrderedResultIteratorTest method testSpoolingBackwardCompatibility.
@Test
public void testSpoolingBackwardCompatibility() {
RegionScanner s = Mockito.mock(RegionScanner.class);
Scan scan = new Scan();
Expression exp = LiteralExpression.newConstant(Boolean.TRUE);
OrderByExpression ex = OrderByExpression.createByCheckIfOrderByReverse(exp, false, false, false);
ScanRegionObserver.serializeIntoScan(scan, 0, Arrays.asList(ex), 100);
// Check 5.1.0 & Check > 5.1.0
ScanUtil.setClientVersion(scan, VersionUtil.encodeVersion("5.1.0"));
NonAggregateRegionScannerFactory.deserializeFromScan(scan, s, false, 100);
ScanUtil.setClientVersion(scan, VersionUtil.encodeVersion("5.2.0"));
NonAggregateRegionScannerFactory.deserializeFromScan(scan, s, false, 100);
// Check 4.15.0 Check > 4.15.0
ScanUtil.setClientVersion(scan, VersionUtil.encodeVersion("4.15.0"));
NonAggregateRegionScannerFactory.deserializeFromScan(scan, s, false, 100);
ScanUtil.setClientVersion(scan, VersionUtil.encodeVersion("4.15.1"));
NonAggregateRegionScannerFactory.deserializeFromScan(scan, s, false, 100);
// Check < 5.1
ScanUtil.setClientVersion(scan, VersionUtil.encodeVersion("5.0.0"));
try {
NonAggregateRegionScannerFactory.deserializeFromScan(scan, s, false, 100);
fail("Deserialize should fail for 5.0.0 since we didn't serialize thresholdBytes");
} catch (IllegalArgumentException e) {
}
// Check < 4.15
ScanUtil.setClientVersion(scan, VersionUtil.encodeVersion("4.14.0"));
try {
NonAggregateRegionScannerFactory.deserializeFromScan(scan, s, false, 100);
fail("Deserialize should fail for 4.14.0 since we didn't serialize thresholdBytes");
} catch (IllegalArgumentException e) {
}
}
use of org.apache.hadoop.hbase.regionserver.RegionScanner in project phoenix by apache.
the class CachedLocalTable method doScan.
private List<Cell> doScan(Mutation mutation, Collection<? extends ColumnReference> columnReferences) throws IOException {
byte[] rowKey = mutation.getRow();
// need to use a scan here so we can get raw state, which Get doesn't provide.
Scan scan = IndexManagementUtil.newLocalStateScan(Collections.singletonList(columnReferences));
scan.setStartRow(rowKey);
scan.setStopRow(rowKey);
// Provides a means of client indicating that newer cells should not be considered,
// enabling mutations to be replayed to partially rebuild the index when a write fails.
// When replaying mutations we want the oldest timestamp (as anything newer we be replayed)
// long ts = getOldestTimestamp(m.getFamilyCellMap().values());
long ts = getMutationTimestampWhenAllCellTimestampIsSame(mutation);
scan.setTimeRange(0, ts);
try (RegionScanner regionScanner = region.getScanner(scan)) {
List<Cell> cells = new ArrayList<Cell>(1);
boolean more = regionScanner.next(cells);
assert !more : "Got more than one result when scanning" + " a single row in the primary table!";
return cells;
}
}
Aggregations