Examples with RowKeyDistributorByHashPrefix - co.cask.cdap.hbase.wd.RowKeyDistributorByHashPrefix

Example 1 with RowKeyDistributorByHashPrefix

use of co.cask.cdap.hbase.wd.RowKeyDistributorByHashPrefix in project cdap by caskdata.

the class HBaseStreamFileConsumerFactory method create.

@Override
protected StreamConsumer create(TableId tableId, StreamConfig streamConfig, ConsumerConfig consumerConfig, StreamConsumerStateStore stateStore, StreamConsumerState beginConsumerState, FileReader<StreamEventOffset, Iterable<StreamFileOffset>> reader, @Nullable ReadFilter extraFilter) throws IOException {
    int splits = cConf.getInt(Constants.Stream.CONSUMER_TABLE_PRESPLITS);
    AbstractRowKeyDistributor distributor = new RowKeyDistributorByHashPrefix(new RowKeyDistributorByHashPrefix.OneByteSimpleHash(splits));
    byte[][] splitKeys = HBaseTableUtil.getSplitKeys(splits, splits, distributor);
    TableId hBaseTableId = tableUtil.createHTableId(new NamespaceId(tableId.getNamespace()), tableId.getTableName());
    TableDescriptorBuilder tdBuilder = HBaseTableUtil.getTableDescriptorBuilder(hBaseTableId, cConf);
    ColumnFamilyDescriptorBuilder cfdBuilder = HBaseTableUtil.getColumnFamilyDescriptorBuilder(Bytes.toString(QueueEntryRow.COLUMN_FAMILY), hConf);
    tdBuilder.addColumnFamily(cfdBuilder.build());
    tdBuilder.addProperty(QueueConstants.DISTRIBUTOR_BUCKETS, Integer.toString(splits));
    try (HBaseDDLExecutor ddlExecutor = ddlExecutorFactory.get()) {
        ddlExecutor.createTableIfNotExists(tdBuilder.build(), splitKeys);
    }
    HTable hTable = tableUtil.createHTable(hConf, hBaseTableId);
    hTable.setWriteBufferSize(Constants.Stream.HBASE_WRITE_BUFFER_SIZE);
    hTable.setAutoFlushTo(false);
    return new HBaseStreamFileConsumer(cConf, streamConfig, consumerConfig, tableUtil, hTable, reader, stateStore, beginConsumerState, extraFilter, createKeyDistributor(hTable.getTableDescriptor()));
}

Also used : TableId(co.cask.cdap.data2.util.TableId) HBaseDDLExecutor(co.cask.cdap.spi.hbase.HBaseDDLExecutor) RowKeyDistributorByHashPrefix(co.cask.cdap.hbase.wd.RowKeyDistributorByHashPrefix) AbstractRowKeyDistributor(co.cask.cdap.hbase.wd.AbstractRowKeyDistributor) ColumnFamilyDescriptorBuilder(co.cask.cdap.data2.util.hbase.ColumnFamilyDescriptorBuilder) TableDescriptorBuilder(co.cask.cdap.data2.util.hbase.TableDescriptorBuilder) NamespaceId(co.cask.cdap.proto.id.NamespaceId) HTable(org.apache.hadoop.hbase.client.HTable)

Example 2 with RowKeyDistributorByHashPrefix

use of co.cask.cdap.hbase.wd.RowKeyDistributorByHashPrefix in project cdap by caskdata.

the class HBaseTableFactory method createTable.

/**
 * Creates a new instance of {@link HTable} for the given {@link TableId}. If the hbase table doesn't
 * exist, a new one will be created with the given number of splits.
 */
private HTableWithRowKeyDistributor createTable(TableId tableId, int splits, Class<? extends Coprocessor> coprocessor) throws IOException {
    // Lookup the table descriptor from the cache first. If it is there, we assume the HBase table exists
    // Otherwise, attempt to create it.
    HTable hTable = null;
    HTableDescriptor htd = tableDescriptors.get(tableId);
    if (htd == null) {
        synchronized (this) {
            htd = tableDescriptors.get(tableId);
            if (htd == null) {
                boolean tableExists;
                try (HBaseAdmin admin = new HBaseAdmin(hConf)) {
                    tableExists = tableUtil.tableExists(admin, tableId);
                }
                // Create the table if the table doesn't exist
                try (HBaseDDLExecutor ddlExecutor = ddlExecutorFactory.get()) {
                    // If table exists, then skip creating coprocessor etc
                    if (!tableExists) {
                        TableId metadataTableId = tableUtil.createHTableId(NamespaceId.SYSTEM, cConf.get(Constants.MessagingSystem.METADATA_TABLE_NAME));
                        ColumnFamilyDescriptorBuilder cfdBuilder = HBaseTableUtil.getColumnFamilyDescriptorBuilder(Bytes.toString(COLUMN_FAMILY), hConf);
                        TableDescriptorBuilder tdBuilder = HBaseTableUtil.getTableDescriptorBuilder(tableId, cConf).addColumnFamily(cfdBuilder.build()).addProperty(Constants.MessagingSystem.HBASE_MESSAGING_TABLE_PREFIX_NUM_BYTES, Integer.toString(1)).addProperty(Constants.MessagingSystem.KEY_DISTRIBUTOR_BUCKETS_ATTR, Integer.toString(splits)).addProperty(Constants.MessagingSystem.HBASE_METADATA_TABLE_NAMESPACE, metadataTableId.getNamespace()).addProperty(HTableDescriptor.SPLIT_POLICY, cConf.get(Constants.MessagingSystem.TABLE_HBASE_SPLIT_POLICY)).addCoprocessor(coprocessorManager.getCoprocessorDescriptor(coprocessor, Coprocessor.PRIORITY_USER));
                        // Set the key distributor size the same as the initial number of splits,
                        // essentially one bucket per split.
                        byte[][] splitKeys = HBaseTableUtil.getSplitKeys(splits, splits, new RowKeyDistributorByHashPrefix(new OneByteSimpleHash(splits)));
                        ddlExecutor.createTableIfNotExists(tdBuilder.build(), splitKeys);
                        hTable = tableUtil.createHTable(hConf, tableId);
                        htd = hTable.getTableDescriptor();
                        tableDescriptors.put(tableId, htd);
                    } else {
                        hTable = tableUtil.createHTable(hConf, tableId);
                        htd = hTable.getTableDescriptor();
                        tableDescriptors.put(tableId, htd);
                    }
                }
            }
        }
    }
    if (hTable == null) {
        hTable = tableUtil.createHTable(hConf, tableId);
    }
    hTable.setAutoFlushTo(false);
    return new HTableWithRowKeyDistributor(hTable, new RowKeyDistributorByHashPrefix(new OneByteSimpleHash(getKeyDistributorBuckets(tableId, htd))));
}

Also used : HBaseDDLExecutor(co.cask.cdap.spi.hbase.HBaseDDLExecutor) TableId(co.cask.cdap.data2.util.TableId) HBaseAdmin(org.apache.hadoop.hbase.client.HBaseAdmin) RowKeyDistributorByHashPrefix(co.cask.cdap.hbase.wd.RowKeyDistributorByHashPrefix) ColumnFamilyDescriptorBuilder(co.cask.cdap.data2.util.hbase.ColumnFamilyDescriptorBuilder) OneByteSimpleHash(co.cask.cdap.hbase.wd.RowKeyDistributorByHashPrefix.OneByteSimpleHash) HTableDescriptorBuilder(co.cask.cdap.data2.util.hbase.HTableDescriptorBuilder) TableDescriptorBuilder(co.cask.cdap.data2.util.hbase.TableDescriptorBuilder) HTable(org.apache.hadoop.hbase.client.HTable) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor)

Example 3 with RowKeyDistributorByHashPrefix

use of co.cask.cdap.hbase.wd.RowKeyDistributorByHashPrefix in project cdap by caskdata.

the class HBaseTableUtilTest method testGetSplitKeys.

@Test
public void testGetSplitKeys() {
    int buckets = 16;
    AbstractRowKeyDistributor distributor = new RowKeyDistributorByHashPrefix(new RowKeyDistributorByHashPrefix.OneByteSimpleHash(buckets));
    // Number of splits will be no less than user asked. If splits > buckets, the number of splits will bumped to
    // next multiple of bucket that is no less than user splits requested.
    // it should return one key less than required splits count, because HBase will take care of the first automatically
    Assert.assertEquals(getSplitSize(buckets, 12) - 1, HBaseTableUtil.getSplitKeys(12, buckets, distributor).length);
    Assert.assertEquals(getSplitSize(buckets, 16) - 1, HBaseTableUtil.getSplitKeys(16, buckets, distributor).length);
    // at least #buckets - 1, but no less than user asked
    Assert.assertEquals(buckets - 1, HBaseTableUtil.getSplitKeys(6, buckets, distributor).length);
    Assert.assertEquals(buckets - 1, HBaseTableUtil.getSplitKeys(2, buckets, distributor).length);
    // "1" can be used for queue tables that we know are not "hot", so we do not pre-split in this case
    Assert.assertEquals(0, HBaseTableUtil.getSplitKeys(1, buckets, distributor).length);
    // allows up to 255 * 8 - 1 splits
    Assert.assertEquals(255 * buckets - 1, HBaseTableUtil.getSplitKeys(255 * buckets, buckets, distributor).length);
    try {
        HBaseTableUtil.getSplitKeys(256 * buckets, buckets, distributor);
        Assert.fail("getSplitKeys(256) should have thrown IllegalArgumentException");
    } catch (IllegalArgumentException e) {
    // expected
    }
    try {
        HBaseTableUtil.getSplitKeys(0, buckets, distributor);
        Assert.fail("getSplitKeys(0) should have thrown IllegalArgumentException");
    } catch (IllegalArgumentException e) {
    // expected
    }
}

Also used : RowKeyDistributorByHashPrefix(co.cask.cdap.hbase.wd.RowKeyDistributorByHashPrefix) AbstractRowKeyDistributor(co.cask.cdap.hbase.wd.AbstractRowKeyDistributor) Test(org.junit.Test)

Example 4 with RowKeyDistributorByHashPrefix

use of co.cask.cdap.hbase.wd.RowKeyDistributorByHashPrefix in project cdap by caskdata.

the class HBaseMetricsTable method initializeV3Vars.

private void initializeV3Vars(CConfiguration cConf, DatasetSpecification spec) {
    boolean isV3Table = spec.getName().contains("v3");
    this.scanExecutor = null;
    this.rowKeyDistributor = null;
    if (isV3Table) {
        RejectedExecutionHandler callerRunsPolicy = new RejectedExecutionHandler() {

            @Override
            public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) {
                REJECTION_LOG.info("No more threads in the HBase scan thread pool. Consider increase {}. Performing scan in caller thread {}", Constants.Metrics.METRICS_HBASE_MAX_SCAN_THREADS, Thread.currentThread().getName());
                // Runs it from the caller thread
                if (!executor.isShutdown()) {
                    r.run();
                }
            }
        };
        int maxScanThread = cConf.getInt(Constants.Metrics.METRICS_HBASE_MAX_SCAN_THREADS);
        // Creates a executor that will shrink to 0 threads if left idle
        // Uses daemon thread, hence no need to worry about shutdown
        // When all threads are busy, use the caller thread to execute
        this.scanExecutor = new ThreadPoolExecutor(0, maxScanThread, 60L, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(), Threads.createDaemonThreadFactory("metrics-hbase-scanner-%d"), callerRunsPolicy);
        this.rowKeyDistributor = new RowKeyDistributorByHashPrefix(new RowKeyDistributorByHashPrefix.OneByteSimpleHash(spec.getIntProperty(Constants.Metrics.METRICS_HBASE_TABLE_SPLITS, 16)));
    }
}

Also used : RowKeyDistributorByHashPrefix(co.cask.cdap.hbase.wd.RowKeyDistributorByHashPrefix) RejectedExecutionHandler(java.util.concurrent.RejectedExecutionHandler) SynchronousQueue(java.util.concurrent.SynchronousQueue) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor)

Aggregations

RowKeyDistributorByHashPrefix (co.cask.cdap.hbase.wd.RowKeyDistributorByHashPrefix)4 TableId (co.cask.cdap.data2.util.TableId)2 ColumnFamilyDescriptorBuilder (co.cask.cdap.data2.util.hbase.ColumnFamilyDescriptorBuilder)2 TableDescriptorBuilder (co.cask.cdap.data2.util.hbase.TableDescriptorBuilder)2 AbstractRowKeyDistributor (co.cask.cdap.hbase.wd.AbstractRowKeyDistributor)2 HBaseDDLExecutor (co.cask.cdap.spi.hbase.HBaseDDLExecutor)2 HTable (org.apache.hadoop.hbase.client.HTable)2 HTableDescriptorBuilder (co.cask.cdap.data2.util.hbase.HTableDescriptorBuilder)1 OneByteSimpleHash (co.cask.cdap.hbase.wd.RowKeyDistributorByHashPrefix.OneByteSimpleHash)1 NamespaceId (co.cask.cdap.proto.id.NamespaceId)1 RejectedExecutionHandler (java.util.concurrent.RejectedExecutionHandler)1 SynchronousQueue (java.util.concurrent.SynchronousQueue)1 ThreadPoolExecutor (java.util.concurrent.ThreadPoolExecutor)1 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)1 HBaseAdmin (org.apache.hadoop.hbase.client.HBaseAdmin)1 Test (org.junit.Test)1