use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.
the class HBaseTableTest method testScannerCache.
private void testScannerCache(int rowsExpected, String tableName, @Nullable String property, @Nullable String argument, @Nullable String scanArgument) throws Exception {
// Now scan and sleep for a while after each result
Transaction tx = txClient.startShort();
DatasetProperties props = property == null ? DatasetProperties.EMPTY : DatasetProperties.of(ImmutableMap.of(HConstants.HBASE_CLIENT_SCANNER_CACHING, property));
Map<String, String> arguments = argument == null ? Collections.<String, String>emptyMap() : ImmutableMap.of(HConstants.HBASE_CLIENT_SCANNER_CACHING, argument);
Scan scan = new Scan(null, null);
if (scanArgument != null) {
scan.setProperty(HConstants.HBASE_CLIENT_SCANNER_CACHING, scanArgument);
}
try (Table table = getTable(CONTEXT1, tableName, props, arguments)) {
((TransactionAware) table).startTx(tx);
Scanner scanner = table.scan(scan);
int scanCount = 0;
try {
while (scanner.next() != null) {
scanCount++;
TimeUnit.MILLISECONDS.sleep(10);
}
scanner.close();
} finally {
LOG.info("Scanned {} rows.", scanCount);
txClient.abort(tx);
}
Assert.assertEquals(rowsExpected, scanCount);
}
}
use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.
the class PartitionedFileSetDataset method consumePartitions.
// PartitionConsumerState consists of two things:
// 1) A list of transaction IDs representing the list of transactions in progress during the previous call.
// Each of these transaction IDs need to be checked for new partitions because there may be partitions created by
// those partitions since the previous call.
// 2) A transaction ID from which to start scanning for new partitions. This is an exclusive end range that the
// previous call stopped scanning partitions at.
// Note that each of the transactions IDs in (1) will be smaller than the transactionId in (2).
@ReadWrite
@Override
public PartitionConsumerResult consumePartitions(PartitionConsumerState partitionConsumerState, int limit, Predicate<PartitionDetail> predicate) {
List<Long> previousInProgress = partitionConsumerState.getVersionsToCheck();
Set<Long> noLongerInProgress = setDiff(previousInProgress, tx.getInProgress());
List<PartitionDetail> partitions = Lists.newArrayList();
Iterator<Long> iter = noLongerInProgress.iterator();
while (iter.hasNext()) {
Long txId = iter.next();
if (partitions.size() >= limit) {
break;
}
try (Scanner scanner = partitionsTable.readByIndex(WRITE_PTR_COL, Bytes.toBytes(txId))) {
scannerToPartitions(scanner, partitions, limit, predicate);
}
// remove the txIds as they are added to the partitions list already
// if they're not removed, they will be persisted in the state for the next scan
iter.remove();
}
// exclusive scan end, to be used as the start for a next call to consumePartitions
long scanUpTo;
if (partitions.size() < limit) {
// no read your own writes (partitions)
scanUpTo = Math.min(tx.getWritePointer(), tx.getReadPointer() + 1);
Long endTxId;
try (Scanner scanner = partitionsTable.scanByIndex(WRITE_PTR_COL, Bytes.toBytes(partitionConsumerState.getStartVersion()), Bytes.toBytes(scanUpTo))) {
endTxId = scannerToPartitions(scanner, partitions, limit, predicate);
}
if (endTxId != null) {
// nonnull means that the scanner was not exhausted
scanUpTo = endTxId;
}
} else {
// if we have already hit the limit, don't scan; instead, use the startVersion as the startVersion to the next
// call to consumePartitions
scanUpTo = partitionConsumerState.getStartVersion();
}
List<Long> inProgressBeforeScanEnd = Lists.newArrayList(noLongerInProgress);
for (long txId : tx.getInProgress()) {
if (txId >= scanUpTo) {
break;
}
inProgressBeforeScanEnd.add(txId);
}
return new PartitionConsumerResult(new PartitionConsumerState(scanUpTo, inProgressBeforeScanEnd), partitions);
}
use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.
the class PartitionedFileSetDataset method getPartitions.
private void getPartitions(@Nullable PartitionFilter filter, PartitionConsumer consumer, boolean decodeMetadata, @Nullable byte[] startKey, @Nullable byte[] endKey, long limit) {
long count = 0L;
try (Scanner scanner = partitionsTable.scan(startKey, endKey)) {
while (count < limit) {
Row row = scanner.next();
if (row == null) {
break;
}
PartitionKey key;
try {
key = parseRowKey(row.getRow(), partitioning);
} catch (IllegalArgumentException e) {
LOG.debug(String.format("Failed to parse row key for partitioned file set '%s': %s", getName(), Bytes.toStringBinary(row.getRow())));
continue;
}
if (filter != null && !filter.match(key)) {
continue;
}
byte[] pathBytes = row.get(RELATIVE_PATH);
if (pathBytes != null) {
consumer.consume(key, Bytes.toString(pathBytes), decodeMetadata ? metadataFromRow(row) : null);
}
count++;
}
if (count == 0) {
warnIfInvalidPartitionFilter(filter, partitioning);
}
}
}
use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.
the class TableTest method countRows.
private static int countRows(Table table) throws Exception {
Scanner scanner = table.scan(null, null);
int count = 0;
while (scanner.next() != null) {
count++;
}
return count;
}
use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.
the class MetricsTableTest method countRange.
private static int countRange(MetricsTable table, Integer start, Integer stop) throws Exception {
Scanner scanner = table.scan(start == null ? null : Bytes.toBytes(start), stop == null ? null : Bytes.toBytes(stop), null);
int count = 0;
while (scanner.next() != null) {
count++;
}
return count;
}
Aggregations