Search in sources :

Example 21 with Scanner

use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class HBaseTableTest method testScannerCache.

private void testScannerCache(int rowsExpected, String tableName, @Nullable String property, @Nullable String argument, @Nullable String scanArgument) throws Exception {
    // Now scan and sleep for a while after each result
    Transaction tx = txClient.startShort();
    DatasetProperties props = property == null ? DatasetProperties.EMPTY : DatasetProperties.of(ImmutableMap.of(HConstants.HBASE_CLIENT_SCANNER_CACHING, property));
    Map<String, String> arguments = argument == null ? Collections.<String, String>emptyMap() : ImmutableMap.of(HConstants.HBASE_CLIENT_SCANNER_CACHING, argument);
    Scan scan = new Scan(null, null);
    if (scanArgument != null) {
        scan.setProperty(HConstants.HBASE_CLIENT_SCANNER_CACHING, scanArgument);
    }
    try (Table table = getTable(CONTEXT1, tableName, props, arguments)) {
        ((TransactionAware) table).startTx(tx);
        Scanner scanner = table.scan(scan);
        int scanCount = 0;
        try {
            while (scanner.next() != null) {
                scanCount++;
                TimeUnit.MILLISECONDS.sleep(10);
            }
            scanner.close();
        } finally {
            LOG.info("Scanned {} rows.", scanCount);
            txClient.abort(tx);
        }
        Assert.assertEquals(rowsExpected, scanCount);
    }
}
Also used : RegionScanner(org.apache.hadoop.hbase.regionserver.RegionScanner) Scanner(io.cdap.cdap.api.dataset.table.Scanner) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) BufferingTable(io.cdap.cdap.data2.dataset2.lib.table.BufferingTable) Table(io.cdap.cdap.api.dataset.table.Table) DelegatingTable(io.cdap.cdap.data2.util.hbase.DelegatingTable) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) Scan(io.cdap.cdap.api.dataset.table.Scan)

Example 22 with Scanner

use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class PartitionedFileSetDataset method consumePartitions.

// PartitionConsumerState consists of two things:
// 1) A list of transaction IDs representing the list of transactions in progress during the previous call.
// Each of these transaction IDs need to be checked for new partitions because there may be partitions created by
// those partitions since the previous call.
// 2) A transaction ID from which to start scanning for new partitions. This is an exclusive end range that the
// previous call stopped scanning partitions at.
// Note that each of the transactions IDs in (1) will be smaller than the transactionId in (2).
@ReadWrite
@Override
public PartitionConsumerResult consumePartitions(PartitionConsumerState partitionConsumerState, int limit, Predicate<PartitionDetail> predicate) {
    List<Long> previousInProgress = partitionConsumerState.getVersionsToCheck();
    Set<Long> noLongerInProgress = setDiff(previousInProgress, tx.getInProgress());
    List<PartitionDetail> partitions = Lists.newArrayList();
    Iterator<Long> iter = noLongerInProgress.iterator();
    while (iter.hasNext()) {
        Long txId = iter.next();
        if (partitions.size() >= limit) {
            break;
        }
        try (Scanner scanner = partitionsTable.readByIndex(WRITE_PTR_COL, Bytes.toBytes(txId))) {
            scannerToPartitions(scanner, partitions, limit, predicate);
        }
        // remove the txIds as they are added to the partitions list already
        // if they're not removed, they will be persisted in the state for the next scan
        iter.remove();
    }
    // exclusive scan end, to be used as the start for a next call to consumePartitions
    long scanUpTo;
    if (partitions.size() < limit) {
        // no read your own writes (partitions)
        scanUpTo = Math.min(tx.getWritePointer(), tx.getReadPointer() + 1);
        Long endTxId;
        try (Scanner scanner = partitionsTable.scanByIndex(WRITE_PTR_COL, Bytes.toBytes(partitionConsumerState.getStartVersion()), Bytes.toBytes(scanUpTo))) {
            endTxId = scannerToPartitions(scanner, partitions, limit, predicate);
        }
        if (endTxId != null) {
            // nonnull means that the scanner was not exhausted
            scanUpTo = endTxId;
        }
    } else {
        // if we have already hit the limit, don't scan; instead, use the startVersion as the startVersion to the next
        // call to consumePartitions
        scanUpTo = partitionConsumerState.getStartVersion();
    }
    List<Long> inProgressBeforeScanEnd = Lists.newArrayList(noLongerInProgress);
    for (long txId : tx.getInProgress()) {
        if (txId >= scanUpTo) {
            break;
        }
        inProgressBeforeScanEnd.add(txId);
    }
    return new PartitionConsumerResult(new PartitionConsumerState(scanUpTo, inProgressBeforeScanEnd), partitions);
}
Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) PartitionConsumerResult(io.cdap.cdap.api.dataset.lib.PartitionConsumerResult) PartitionConsumerState(io.cdap.cdap.api.dataset.lib.PartitionConsumerState) AtomicLong(java.util.concurrent.atomic.AtomicLong) PartitionDetail(io.cdap.cdap.api.dataset.lib.PartitionDetail) ReadWrite(io.cdap.cdap.api.annotation.ReadWrite)

Example 23 with Scanner

use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class PartitionedFileSetDataset method getPartitions.

private void getPartitions(@Nullable PartitionFilter filter, PartitionConsumer consumer, boolean decodeMetadata, @Nullable byte[] startKey, @Nullable byte[] endKey, long limit) {
    long count = 0L;
    try (Scanner scanner = partitionsTable.scan(startKey, endKey)) {
        while (count < limit) {
            Row row = scanner.next();
            if (row == null) {
                break;
            }
            PartitionKey key;
            try {
                key = parseRowKey(row.getRow(), partitioning);
            } catch (IllegalArgumentException e) {
                LOG.debug(String.format("Failed to parse row key for partitioned file set '%s': %s", getName(), Bytes.toStringBinary(row.getRow())));
                continue;
            }
            if (filter != null && !filter.match(key)) {
                continue;
            }
            byte[] pathBytes = row.get(RELATIVE_PATH);
            if (pathBytes != null) {
                consumer.consume(key, Bytes.toString(pathBytes), decodeMetadata ? metadataFromRow(row) : null);
            }
            count++;
        }
        if (count == 0) {
            warnIfInvalidPartitionFilter(filter, partitioning);
        }
    }
}
Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) Row(io.cdap.cdap.api.dataset.table.Row)

Example 24 with Scanner

use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class TableTest method countRows.

private static int countRows(Table table) throws Exception {
    Scanner scanner = table.scan(null, null);
    int count = 0;
    while (scanner.next() != null) {
        count++;
    }
    return count;
}
Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner)

Example 25 with Scanner

use of io.cdap.cdap.api.dataset.table.Scanner in project cdap by caskdata.

the class MetricsTableTest method countRange.

private static int countRange(MetricsTable table, Integer start, Integer stop) throws Exception {
    Scanner scanner = table.scan(start == null ? null : Bytes.toBytes(start), stop == null ? null : Bytes.toBytes(stop), null);
    int count = 0;
    while (scanner.next() != null) {
        count++;
    }
    return count;
}
Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner)

Aggregations

Scanner (io.cdap.cdap.api.dataset.table.Scanner)104 Row (io.cdap.cdap.api.dataset.table.Row)77 Test (org.junit.Test)26 Table (io.cdap.cdap.api.dataset.table.Table)14 ArrayList (java.util.ArrayList)14 Scan (io.cdap.cdap.api.dataset.table.Scan)12 MDSKey (io.cdap.cdap.data2.dataset2.lib.table.MDSKey)12 HashMap (java.util.HashMap)11 FuzzyRowFilter (io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter)10 DatasetId (io.cdap.cdap.proto.id.DatasetId)10 TransactionExecutor (org.apache.tephra.TransactionExecutor)10 Schema (io.cdap.cdap.api.data.schema.Schema)9 DatasetProperties (io.cdap.cdap.api.dataset.DatasetProperties)8 TableId (io.cdap.cdap.data2.util.TableId)8 IOException (java.io.IOException)8 List (java.util.List)8 Transaction (org.apache.tephra.Transaction)8 ReadOnly (io.cdap.cdap.api.annotation.ReadOnly)6 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)6 Delete (io.cdap.cdap.api.dataset.table.Delete)6