Search in sources :

Example 1 with RowKeyDistributorByHashPrefix

use of io.cdap.cdap.hbase.wd.RowKeyDistributorByHashPrefix in project cdap by caskdata.

the class HBaseMetricsTable method initializeVars.

private void initializeVars(CConfiguration cConf, DatasetSpecification spec) {
    this.scanExecutor = null;
    this.rowKeyDistributor = null;
    RejectedExecutionHandler callerRunsPolicy = (r, executor) -> {
        REJECTION_LOG.info("No more threads in the HBase scan thread pool. Consider increase {}. Performing scan in caller thread {}", Constants.Metrics.METRICS_HBASE_MAX_SCAN_THREADS, Thread.currentThread().getName());
        // Runs it from the caller thread
        if (!executor.isShutdown()) {
            r.run();
        }
    };
    int maxScanThread = cConf.getInt(Constants.Metrics.METRICS_HBASE_MAX_SCAN_THREADS);
    // Creates a executor that will shrink to 0 threads if left idle
    // Uses daemon thread, hence no need to worry about shutdown
    // When all threads are busy, use the caller thread to execute
    this.scanExecutor = new ThreadPoolExecutor(0, maxScanThread, 60L, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(), Threads.createDaemonThreadFactory("metrics-hbase-scanner-%d"), callerRunsPolicy);
    this.rowKeyDistributor = new RowKeyDistributorByHashPrefix(new RowKeyDistributorByHashPrefix.OneByteSimpleHash(spec.getIntProperty(Constants.Metrics.METRICS_HBASE_TABLE_SPLITS, 16)));
}
Also used : Arrays(java.util.Arrays) ImmutablePair(io.cdap.cdap.common.utils.ImmutablePair) TableProperties(io.cdap.cdap.api.dataset.table.TableProperties) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) Result(org.apache.hadoop.hbase.client.Result) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) Increment(org.apache.hadoop.hbase.client.Increment) LoggerFactory(org.slf4j.LoggerFactory) TableId(io.cdap.cdap.data2.util.TableId) Bytes(io.cdap.cdap.api.common.Bytes) Loggers(io.cdap.cdap.common.logging.Loggers) FuzzyRowFilter(io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) MetricsTable(io.cdap.cdap.data2.dataset2.lib.table.MetricsTable) AbstractRowKeyDistributor(io.cdap.cdap.hbase.wd.AbstractRowKeyDistributor) DataSetException(io.cdap.cdap.api.dataset.DataSetException) PutBuilder(io.cdap.cdap.data2.util.hbase.PutBuilder) Lists(com.google.common.collect.Lists) Delete(org.apache.hadoop.hbase.client.Delete) RejectedExecutionHandler(java.util.concurrent.RejectedExecutionHandler) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Scanner(io.cdap.cdap.api.dataset.table.Scanner) BufferedMutator(org.apache.hadoop.hbase.client.BufferedMutator) DistributedScanner(io.cdap.cdap.hbase.wd.DistributedScanner) HBaseTableUtil(io.cdap.cdap.data2.util.hbase.HBaseTableUtil) ExecutorService(java.util.concurrent.ExecutorService) Nullable(javax.annotation.Nullable) Pair(org.apache.hadoop.hbase.util.Pair) Threads(org.apache.twill.common.Threads) Logger(org.slf4j.Logger) ScanBuilder(io.cdap.cdap.data2.util.hbase.ScanBuilder) SynchronousQueue(java.util.concurrent.SynchronousQueue) Put(org.apache.hadoop.hbase.client.Put) Get(org.apache.hadoop.hbase.client.Get) IOException(java.io.IOException) NavigableMap(java.util.NavigableMap) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) DeleteBuilder(io.cdap.cdap.data2.util.hbase.DeleteBuilder) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) Table(org.apache.hadoop.hbase.client.Table) Constants(io.cdap.cdap.common.conf.Constants) LogSamplers(io.cdap.cdap.common.logging.LogSamplers) DatasetContext(io.cdap.cdap.api.dataset.DatasetContext) RowKeyDistributorByHashPrefix(io.cdap.cdap.hbase.wd.RowKeyDistributorByHashPrefix) SortedMap(java.util.SortedMap) RowKeyDistributorByHashPrefix(io.cdap.cdap.hbase.wd.RowKeyDistributorByHashPrefix) RejectedExecutionHandler(java.util.concurrent.RejectedExecutionHandler) SynchronousQueue(java.util.concurrent.SynchronousQueue) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor)

Example 2 with RowKeyDistributorByHashPrefix

use of io.cdap.cdap.hbase.wd.RowKeyDistributorByHashPrefix in project cdap by caskdata.

the class HBaseTableFactory method createTable.

/**
 * Creates a new instance of {@link Table} for the given {@link TableId}. If the hbase table doesn't
 * exist, a new one will be created with the given number of splits.
 */
private HTableWithRowKeyDistributor createTable(TableId tableId, int splits, Class<? extends Coprocessor> coprocessor) throws IOException {
    // Lookup the table descriptor from the cache first. If it is there, we assume the HBase table exists
    // Otherwise, attempt to create it.
    Table table = null;
    HTableDescriptor htd = tableDescriptors.get(tableId);
    if (htd == null) {
        synchronized (this) {
            htd = tableDescriptors.get(tableId);
            if (htd == null) {
                boolean tableExists;
                try (HBaseAdmin admin = new HBaseAdmin(hConf)) {
                    tableExists = tableUtil.tableExists(admin, tableId);
                }
                // Create the table if the table doesn't exist
                try (HBaseDDLExecutor ddlExecutor = ddlExecutorFactory.get()) {
                    // If table exists, then skip creating coprocessor etc
                    if (!tableExists) {
                        TableId metadataTableId = tableUtil.createHTableId(NamespaceId.SYSTEM, cConf.get(Constants.MessagingSystem.METADATA_TABLE_NAME));
                        ColumnFamilyDescriptorBuilder cfdBuilder = HBaseTableUtil.getColumnFamilyDescriptorBuilder(Bytes.toString(COLUMN_FAMILY), hConf);
                        TableDescriptorBuilder tdBuilder = HBaseTableUtil.getTableDescriptorBuilder(tableId, cConf).addColumnFamily(cfdBuilder.build()).addProperty(Constants.MessagingSystem.HBASE_MESSAGING_TABLE_PREFIX_NUM_BYTES, Integer.toString(1)).addProperty(Constants.MessagingSystem.KEY_DISTRIBUTOR_BUCKETS_ATTR, Integer.toString(splits)).addProperty(Constants.MessagingSystem.HBASE_METADATA_TABLE_NAMESPACE, metadataTableId.getNamespace()).addProperty(HTableDescriptor.SPLIT_POLICY, cConf.get(Constants.MessagingSystem.TABLE_HBASE_SPLIT_POLICY)).addCoprocessor(coprocessorManager.getCoprocessorDescriptor(coprocessor, Coprocessor.PRIORITY_USER));
                        // Set the key distributor size the same as the initial number of splits,
                        // essentially one bucket per split.
                        byte[][] splitKeys = HBaseTableUtil.getSplitKeys(splits, splits, new RowKeyDistributorByHashPrefix(new OneByteSimpleHash(splits)));
                        ddlExecutor.createTableIfNotExists(tdBuilder.build(), splitKeys);
                        table = tableUtil.createTable(hConf, tableId);
                        htd = table.getTableDescriptor();
                        tableDescriptors.put(tableId, htd);
                    } else {
                        table = tableUtil.createTable(hConf, tableId);
                        htd = table.getTableDescriptor();
                        tableDescriptors.put(tableId, htd);
                    }
                }
            }
        }
    }
    if (table == null) {
        table = tableUtil.createTable(hConf, tableId);
    }
    return new HTableWithRowKeyDistributor(table, new RowKeyDistributorByHashPrefix(new OneByteSimpleHash(getKeyDistributorBuckets(tableId, htd))));
}
Also used : HBaseDDLExecutor(io.cdap.cdap.spi.hbase.HBaseDDLExecutor) TableId(io.cdap.cdap.data2.util.TableId) HBaseAdmin(org.apache.hadoop.hbase.client.HBaseAdmin) MetadataTable(io.cdap.cdap.messaging.store.MetadataTable) MessageTable(io.cdap.cdap.messaging.store.MessageTable) PayloadTable(io.cdap.cdap.messaging.store.PayloadTable) Table(org.apache.hadoop.hbase.client.Table) RowKeyDistributorByHashPrefix(io.cdap.cdap.hbase.wd.RowKeyDistributorByHashPrefix) ColumnFamilyDescriptorBuilder(io.cdap.cdap.data2.util.hbase.ColumnFamilyDescriptorBuilder) OneByteSimpleHash(io.cdap.cdap.hbase.wd.RowKeyDistributorByHashPrefix.OneByteSimpleHash) HTableDescriptorBuilder(io.cdap.cdap.data2.util.hbase.HTableDescriptorBuilder) TableDescriptorBuilder(io.cdap.cdap.data2.util.hbase.TableDescriptorBuilder) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor)

Example 3 with RowKeyDistributorByHashPrefix

use of io.cdap.cdap.hbase.wd.RowKeyDistributorByHashPrefix in project cdap by caskdata.

the class HBaseTableUtilTest method testGetSplitKeys.

@Test
public void testGetSplitKeys() {
    int buckets = 16;
    AbstractRowKeyDistributor distributor = new RowKeyDistributorByHashPrefix(new RowKeyDistributorByHashPrefix.OneByteSimpleHash(buckets));
    // Number of splits will be no less than user asked. If splits > buckets, the number of splits will bumped to
    // next multiple of bucket that is no less than user splits requested.
    // it should return one key less than required splits count, because HBase will take care of the first automatically
    Assert.assertEquals(getSplitSize(buckets, 12) - 1, HBaseTableUtil.getSplitKeys(12, buckets, distributor).length);
    Assert.assertEquals(getSplitSize(buckets, 16) - 1, HBaseTableUtil.getSplitKeys(16, buckets, distributor).length);
    // at least #buckets - 1, but no less than user asked
    Assert.assertEquals(buckets - 1, HBaseTableUtil.getSplitKeys(6, buckets, distributor).length);
    Assert.assertEquals(buckets - 1, HBaseTableUtil.getSplitKeys(2, buckets, distributor).length);
    // "1" can be used for queue tables that we know are not "hot", so we do not pre-split in this case
    Assert.assertEquals(0, HBaseTableUtil.getSplitKeys(1, buckets, distributor).length);
    // allows up to 255 * 8 - 1 splits
    Assert.assertEquals(255 * buckets - 1, HBaseTableUtil.getSplitKeys(255 * buckets, buckets, distributor).length);
    try {
        HBaseTableUtil.getSplitKeys(256 * buckets, buckets, distributor);
        Assert.fail("getSplitKeys(256) should have thrown IllegalArgumentException");
    } catch (IllegalArgumentException e) {
    // expected
    }
    try {
        HBaseTableUtil.getSplitKeys(0, buckets, distributor);
        Assert.fail("getSplitKeys(0) should have thrown IllegalArgumentException");
    } catch (IllegalArgumentException e) {
    // expected
    }
}
Also used : RowKeyDistributorByHashPrefix(io.cdap.cdap.hbase.wd.RowKeyDistributorByHashPrefix) AbstractRowKeyDistributor(io.cdap.cdap.hbase.wd.AbstractRowKeyDistributor) Test(org.junit.Test)

Aggregations

RowKeyDistributorByHashPrefix (io.cdap.cdap.hbase.wd.RowKeyDistributorByHashPrefix)3 TableId (io.cdap.cdap.data2.util.TableId)2 AbstractRowKeyDistributor (io.cdap.cdap.hbase.wd.AbstractRowKeyDistributor)2 Table (org.apache.hadoop.hbase.client.Table)2 Lists (com.google.common.collect.Lists)1 Bytes (io.cdap.cdap.api.common.Bytes)1 DataSetException (io.cdap.cdap.api.dataset.DataSetException)1 DatasetContext (io.cdap.cdap.api.dataset.DatasetContext)1 DatasetSpecification (io.cdap.cdap.api.dataset.DatasetSpecification)1 Scanner (io.cdap.cdap.api.dataset.table.Scanner)1 TableProperties (io.cdap.cdap.api.dataset.table.TableProperties)1 CConfiguration (io.cdap.cdap.common.conf.CConfiguration)1 Constants (io.cdap.cdap.common.conf.Constants)1 LogSamplers (io.cdap.cdap.common.logging.LogSamplers)1 Loggers (io.cdap.cdap.common.logging.Loggers)1 ImmutablePair (io.cdap.cdap.common.utils.ImmutablePair)1 FuzzyRowFilter (io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter)1 MetricsTable (io.cdap.cdap.data2.dataset2.lib.table.MetricsTable)1 ColumnFamilyDescriptorBuilder (io.cdap.cdap.data2.util.hbase.ColumnFamilyDescriptorBuilder)1 DeleteBuilder (io.cdap.cdap.data2.util.hbase.DeleteBuilder)1