Search in sources :

Example 1 with ScanBuilder

use of co.cask.cdap.data2.util.hbase.ScanBuilder in project cdap by caskdata.

the class HBaseTable method setFilterIfNeeded.

private void setFilterIfNeeded(ScanBuilder scan, @Nullable Filter filter) {
    if (filter == null) {
        return;
    }
    if (filter instanceof FuzzyRowFilter) {
        FuzzyRowFilter fuzzyRowFilter = (FuzzyRowFilter) filter;
        List<Pair<byte[], byte[]>> fuzzyPairs = Lists.newArrayListWithExpectedSize(fuzzyRowFilter.getFuzzyKeysData().size());
        for (ImmutablePair<byte[], byte[]> pair : fuzzyRowFilter.getFuzzyKeysData()) {
            fuzzyPairs.add(Pair.newPair(pair.getFirst(), pair.getSecond()));
        }
        scan.setFilter(new org.apache.hadoop.hbase.filter.FuzzyRowFilter(fuzzyPairs));
    } else {
        throw new IllegalArgumentException("Unsupported filter: " + filter);
    }
}
Also used : FuzzyRowFilter(co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter) ImmutablePair(co.cask.cdap.common.utils.ImmutablePair) Pair(org.apache.hadoop.hbase.util.Pair)

Example 2 with ScanBuilder

use of co.cask.cdap.data2.util.hbase.ScanBuilder in project cdap by caskdata.

the class HBaseQueueDebugger method scanQueue.

private void scanQueue(TransactionExecutor txExecutor, HBaseConsumerStateStore stateStore, QueueName queueName, QueueBarrier start, @Nullable QueueBarrier end, final QueueStatistics outStats) throws Exception {
    final byte[] queueRowPrefix = QueueEntryRow.getQueueRowPrefix(queueName);
    ConsumerGroupConfig groupConfig = start.getGroupConfig();
    printProgress("Got consumer group config: %s\n", groupConfig);
    HBaseQueueAdmin admin = queueClientFactory.getQueueAdmin();
    TableId tableId = admin.getDataTableId(queueName, QueueConstants.QueueType.SHARDED_QUEUE);
    HTable hTable = queueClientFactory.createHTable(tableId);
    printProgress("Looking at HBase table: %s\n", Bytes.toString(hTable.getTableName()));
    final byte[] stateColumnName = Bytes.add(QueueEntryRow.STATE_COLUMN_PREFIX, Bytes.toBytes(groupConfig.getGroupId()));
    int distributorBuckets = queueClientFactory.getDistributorBuckets(hTable.getTableDescriptor());
    ShardedHBaseQueueStrategy queueStrategy = new ShardedHBaseQueueStrategy(tableUtil, distributorBuckets);
    ScanBuilder scan = tableUtil.buildScan();
    scan.setStartRow(start.getStartRow());
    if (end != null) {
        scan.setStopRow(end.getStartRow());
    } else {
        scan.setStopRow(QueueEntryRow.getQueueEntryRowKey(queueName, Long.MAX_VALUE, Integer.MAX_VALUE));
    }
    // Needs to include meta column for row that doesn't have state yet.
    scan.addColumn(QueueEntryRow.COLUMN_FAMILY, QueueEntryRow.META_COLUMN);
    scan.addColumn(QueueEntryRow.COLUMN_FAMILY, stateColumnName);
    // Don't do block cache for debug tool. We don't want old blocks get cached
    scan.setCacheBlocks(false);
    scan.setMaxVersions(1);
    printProgress("Scanning section with scan: %s\n", scan.toString());
    List<Integer> instanceIds = Lists.newArrayList();
    if (groupConfig.getDequeueStrategy() == DequeueStrategy.FIFO) {
        instanceIds.add(0);
    } else {
        for (int instanceId = 0; instanceId < groupConfig.getGroupSize(); instanceId++) {
            instanceIds.add(instanceId);
        }
    }
    final int rowsCache = Integer.parseInt(System.getProperty(PROP_ROWS_CACHE, "100000"));
    for (final int instanceId : instanceIds) {
        printProgress("Processing instance %d", instanceId);
        ConsumerConfig consConfig = new ConsumerConfig(groupConfig, instanceId);
        final QueueScanner scanner = queueStrategy.createScanner(consConfig, hTable, scan.build(), rowsCache);
        try {
            txExecutor.execute(new TransactionExecutor.Procedure<HBaseConsumerStateStore>() {

                @Override
                public void apply(HBaseConsumerStateStore input) throws Exception {
                    ImmutablePair<byte[], Map<byte[], byte[]>> result;
                    while ((result = scanner.next()) != null) {
                        byte[] rowKey = result.getFirst();
                        Map<byte[], byte[]> columns = result.getSecond();
                        visitRow(outStats, input.getTransaction(), rowKey, columns.get(stateColumnName), queueRowPrefix.length);
                        if (showProgress() && outStats.getTotal() % rowsCache == 0) {
                            System.out.printf("\rProcessing instance %d: %s", instanceId, outStats.getReport(showTxTimestampOnly()));
                        }
                    }
                }
            }, stateStore);
        } catch (TransactionFailureException e) {
            // Ignore transaction not in progress exception as it's caused by short TX timeout on commit
            if (!(Throwables.getRootCause(e) instanceof TransactionNotInProgressException)) {
                throw Throwables.propagate(e);
            }
        }
        printProgress("\rProcessing instance %d: %s\n", instanceId, outStats.getReport(showTxTimestampOnly()));
    }
}
Also used : TableId(co.cask.cdap.data2.util.TableId) ShardedHBaseQueueStrategy(co.cask.cdap.data2.transaction.queue.hbase.ShardedHBaseQueueStrategy) ScanBuilder(co.cask.cdap.data2.util.hbase.ScanBuilder) TransactionExecutor(org.apache.tephra.TransactionExecutor) TransactionNotInProgressException(org.apache.tephra.TransactionNotInProgressException) HTable(org.apache.hadoop.hbase.client.HTable) TransactionNotInProgressException(org.apache.tephra.TransactionNotInProgressException) TransactionFailureException(org.apache.tephra.TransactionFailureException) NotFoundException(co.cask.cdap.common.NotFoundException) HBaseConsumerStateStore(co.cask.cdap.data2.transaction.queue.hbase.HBaseConsumerStateStore) TransactionFailureException(org.apache.tephra.TransactionFailureException) ImmutablePair(co.cask.cdap.common.utils.ImmutablePair) HBaseQueueAdmin(co.cask.cdap.data2.transaction.queue.hbase.HBaseQueueAdmin) ConsumerConfig(co.cask.cdap.data2.queue.ConsumerConfig) QueueScanner(co.cask.cdap.data2.transaction.queue.QueueScanner) ConsumerGroupConfig(co.cask.cdap.data2.queue.ConsumerGroupConfig) Map(java.util.Map)

Example 3 with ScanBuilder

use of co.cask.cdap.data2.util.hbase.ScanBuilder in project cdap by caskdata.

the class ReplicationStatusTool method getMapFromTable.

private static Map<String, Long> getMapFromTable(String rowType) throws IOException {
    HBaseTableUtil tableUtil = new HBaseTableUtilFactory(cConf).get();
    HTable hTable = tableUtil.createHTable(hConf, getReplicationStateTableId(tableUtil));
    // Scan the table to scan for all regions.
    ScanBuilder scan = getScanBuilder(tableUtil, rowType);
    Result result;
    HashMap<String, Long> timeMap = new HashMap<>();
    try (ResultScanner resultScanner = hTable.getScanner(scan.build())) {
        while ((result = resultScanner.next()) != null) {
            ReplicationStatusKey key = new ReplicationStatusKey(result.getRow());
            String region = key.getRegionName();
            Long timestamp = getTimeFromResult(result, rowType);
            if (timeMap.get(region) == null || timestamp > timeMap.get(region)) {
                timeMap.put(region, timestamp);
            }
        }
    } catch (Exception e) {
        LOG.error("Error while reading table.", e);
        throw Throwables.propagate(e);
    } finally {
        hTable.close();
    }
    return timeMap;
}
Also used : ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) HashMap(java.util.HashMap) ReplicationStatusKey(co.cask.cdap.replication.ReplicationStatusKey) ScanBuilder(co.cask.cdap.data2.util.hbase.ScanBuilder) HBaseTableUtilFactory(co.cask.cdap.data2.util.hbase.HBaseTableUtilFactory) HTable(org.apache.hadoop.hbase.client.HTable) HBaseTableUtil(co.cask.cdap.data2.util.hbase.HBaseTableUtil) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) ParseException(org.apache.commons.cli.ParseException) Result(org.apache.hadoop.hbase.client.Result)

Example 4 with ScanBuilder

use of co.cask.cdap.data2.util.hbase.ScanBuilder in project cdap by caskdata.

the class SaltedHBaseQueueStrategy method createScanner.

@Override
public QueueScanner createScanner(ConsumerConfig consumerConfig, HTable hTable, Scan scan, int numRows) throws IOException {
    // we should roughly divide by number of buckets, but don't want another RPC for the case we are not exactly right
    ScanBuilder distributedScan = tableUtil.buildScan(scan);
    int caching = (int) (1.1 * numRows / distributorBuckets);
    distributedScan.setCaching(caching);
    ResultScanner scanner = DistributedScanner.create(hTable, distributedScan.build(), rowKeyDistributor, scansExecutor);
    return new HBaseQueueScanner(scanner, numRows, rowKeyConverter);
}
Also used : ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) ScanBuilder(co.cask.cdap.data2.util.hbase.ScanBuilder)

Example 5 with ScanBuilder

use of co.cask.cdap.data2.util.hbase.ScanBuilder in project cdap by caskdata.

the class ShardedHBaseQueueStrategy method createHBaseScanner.

private ResultScanner createHBaseScanner(ConsumerConfig consumerConfig, HTable hTable, Scan scan, int numRows) throws IOException {
    // Modify the scan with sharded key prefix
    ScanBuilder shardedScan = tableUtil.buildScan(scan);
    // we should roughly divide by number of buckets, but don't want another RPC for the case we are not exactly right
    int caching = (int) (1.1 * numRows / distributorBuckets);
    shardedScan.setCaching(caching);
    if (scan.getStartRow().length > 0) {
        byte[] rowKey = getShardedKey(consumerConfig, consumerConfig.getInstanceId(), scan.getStartRow());
        shardedScan.setStartRow(rowKey);
    }
    if (scan.getStopRow().length > 0) {
        byte[] rowKey = getShardedKey(consumerConfig, consumerConfig.getInstanceId(), scan.getStopRow());
        shardedScan.setStopRow(rowKey);
    }
    return DistributedScanner.create(hTable, shardedScan.build(), rowKeyDistributor, scansExecutor);
}
Also used : ScanBuilder(co.cask.cdap.data2.util.hbase.ScanBuilder)

Aggregations

ScanBuilder (co.cask.cdap.data2.util.hbase.ScanBuilder)9 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)5 HTable (org.apache.hadoop.hbase.client.HTable)3 ImmutablePair (co.cask.cdap.common.utils.ImmutablePair)2 HBaseTableUtil (co.cask.cdap.data2.util.hbase.HBaseTableUtil)2 HBaseTableUtilFactory (co.cask.cdap.data2.util.hbase.HBaseTableUtilFactory)2 ReplicationStatusKey (co.cask.cdap.replication.ReplicationStatusKey)2 IOException (java.io.IOException)2 Result (org.apache.hadoop.hbase.client.Result)2 ReadOnly (co.cask.cdap.api.annotation.ReadOnly)1 DataSetException (co.cask.cdap.api.dataset.DataSetException)1 NotFoundException (co.cask.cdap.common.NotFoundException)1 FuzzyRowFilter (co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter)1 ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)1 ConsumerGroupConfig (co.cask.cdap.data2.queue.ConsumerGroupConfig)1 QueueScanner (co.cask.cdap.data2.transaction.queue.QueueScanner)1 HBaseConsumerStateStore (co.cask.cdap.data2.transaction.queue.hbase.HBaseConsumerStateStore)1 HBaseQueueAdmin (co.cask.cdap.data2.transaction.queue.hbase.HBaseQueueAdmin)1 ShardedHBaseQueueStrategy (co.cask.cdap.data2.transaction.queue.hbase.ShardedHBaseQueueStrategy)1 TableId (co.cask.cdap.data2.util.TableId)1