Search in sources :

Example 1 with DatasetSpecification

use of io.cdap.cdap.api.dataset.DatasetSpecification in project cdap by caskdata.

the class HBaseMetricsTable method initializeVars.

private void initializeVars(CConfiguration cConf, DatasetSpecification spec) {
    this.scanExecutor = null;
    this.rowKeyDistributor = null;
    RejectedExecutionHandler callerRunsPolicy = (r, executor) -> {
        REJECTION_LOG.info("No more threads in the HBase scan thread pool. Consider increase {}. Performing scan in caller thread {}", Constants.Metrics.METRICS_HBASE_MAX_SCAN_THREADS, Thread.currentThread().getName());
        // Runs it from the caller thread
        if (!executor.isShutdown()) {
            r.run();
        }
    };
    int maxScanThread = cConf.getInt(Constants.Metrics.METRICS_HBASE_MAX_SCAN_THREADS);
    // Creates a executor that will shrink to 0 threads if left idle
    // Uses daemon thread, hence no need to worry about shutdown
    // When all threads are busy, use the caller thread to execute
    this.scanExecutor = new ThreadPoolExecutor(0, maxScanThread, 60L, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(), Threads.createDaemonThreadFactory("metrics-hbase-scanner-%d"), callerRunsPolicy);
    this.rowKeyDistributor = new RowKeyDistributorByHashPrefix(new RowKeyDistributorByHashPrefix.OneByteSimpleHash(spec.getIntProperty(Constants.Metrics.METRICS_HBASE_TABLE_SPLITS, 16)));
}
Also used : Arrays(java.util.Arrays) ImmutablePair(io.cdap.cdap.common.utils.ImmutablePair) TableProperties(io.cdap.cdap.api.dataset.table.TableProperties) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) Result(org.apache.hadoop.hbase.client.Result) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) Increment(org.apache.hadoop.hbase.client.Increment) LoggerFactory(org.slf4j.LoggerFactory) TableId(io.cdap.cdap.data2.util.TableId) Bytes(io.cdap.cdap.api.common.Bytes) Loggers(io.cdap.cdap.common.logging.Loggers) FuzzyRowFilter(io.cdap.cdap.data2.dataset2.lib.table.FuzzyRowFilter) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) MetricsTable(io.cdap.cdap.data2.dataset2.lib.table.MetricsTable) AbstractRowKeyDistributor(io.cdap.cdap.hbase.wd.AbstractRowKeyDistributor) DataSetException(io.cdap.cdap.api.dataset.DataSetException) PutBuilder(io.cdap.cdap.data2.util.hbase.PutBuilder) Lists(com.google.common.collect.Lists) Delete(org.apache.hadoop.hbase.client.Delete) RejectedExecutionHandler(java.util.concurrent.RejectedExecutionHandler) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Scanner(io.cdap.cdap.api.dataset.table.Scanner) BufferedMutator(org.apache.hadoop.hbase.client.BufferedMutator) DistributedScanner(io.cdap.cdap.hbase.wd.DistributedScanner) HBaseTableUtil(io.cdap.cdap.data2.util.hbase.HBaseTableUtil) ExecutorService(java.util.concurrent.ExecutorService) Nullable(javax.annotation.Nullable) Pair(org.apache.hadoop.hbase.util.Pair) Threads(org.apache.twill.common.Threads) Logger(org.slf4j.Logger) ScanBuilder(io.cdap.cdap.data2.util.hbase.ScanBuilder) SynchronousQueue(java.util.concurrent.SynchronousQueue) Put(org.apache.hadoop.hbase.client.Put) Get(org.apache.hadoop.hbase.client.Get) IOException(java.io.IOException) NavigableMap(java.util.NavigableMap) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) DeleteBuilder(io.cdap.cdap.data2.util.hbase.DeleteBuilder) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) Table(org.apache.hadoop.hbase.client.Table) Constants(io.cdap.cdap.common.conf.Constants) LogSamplers(io.cdap.cdap.common.logging.LogSamplers) DatasetContext(io.cdap.cdap.api.dataset.DatasetContext) RowKeyDistributorByHashPrefix(io.cdap.cdap.hbase.wd.RowKeyDistributorByHashPrefix) SortedMap(java.util.SortedMap) RowKeyDistributorByHashPrefix(io.cdap.cdap.hbase.wd.RowKeyDistributorByHashPrefix) RejectedExecutionHandler(java.util.concurrent.RejectedExecutionHandler) SynchronousQueue(java.util.concurrent.SynchronousQueue) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor)

Example 2 with DatasetSpecification

use of io.cdap.cdap.api.dataset.DatasetSpecification in project cdap by caskdata.

the class ObjectStoreDefinition method getDataset.

@Override
public ObjectStoreDataset<?> getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
    DatasetSpecification kvTableSpec = spec.getSpecification("objects");
    KeyValueTable table = tableDef.getDataset(datasetContext, kvTableSpec, arguments, classLoader);
    TypeRepresentation typeRep = GSON.fromJson(spec.getProperty("type"), TypeRepresentation.class);
    Schema schema = GSON.fromJson(spec.getProperty("schema"), Schema.class);
    return new ObjectStoreDataset(spec.getName(), table, typeRep, schema, classLoader);
}
Also used : KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) TypeRepresentation(io.cdap.cdap.internal.io.TypeRepresentation) Schema(io.cdap.cdap.api.data.schema.Schema) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification)

Example 3 with DatasetSpecification

use of io.cdap.cdap.api.dataset.DatasetSpecification in project cdap by caskdata.

the class HiveExploreStructuredRecordTestRun method testMissingSchemaFails.

@Test(expected = IllegalArgumentException.class)
public void testMissingSchemaFails() throws Exception {
    DatasetId instanceId = NAMESPACE_ID.dataset("badtable");
    datasetFramework.addInstance("TableWrapper", instanceId, DatasetProperties.EMPTY);
    DatasetSpecification spec = datasetFramework.getDatasetSpec(instanceId);
    try {
        exploreTableManager.enableDataset(instanceId, spec, false);
    } finally {
        datasetFramework.deleteInstance(instanceId);
    }
}
Also used : DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) DatasetId(io.cdap.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 4 with DatasetSpecification

use of io.cdap.cdap.api.dataset.DatasetSpecification in project cdap by caskdata.

the class HiveExploreStructuredRecordTestRun method testRecordScannableAndWritableIsOK.

@Test
public void testRecordScannableAndWritableIsOK() throws Exception {
    DatasetId instanceId = NAMESPACE_ID.dataset("tabul");
    datasetFramework.addInstance("TableWrapper", instanceId, DatasetProperties.builder().add(DatasetProperties.SCHEMA, Schema.recordOf("intRecord", Schema.Field.of("x", Schema.of(Schema.Type.STRING))).toString()).build());
    DatasetSpecification spec = datasetFramework.getDatasetSpec(instanceId);
    try {
        exploreTableManager.enableDataset(instanceId, spec, false);
        runCommand(NAMESPACE_ID, "describe dataset_tabul", true, Lists.newArrayList(new ColumnDesc("col_name", "STRING", 1, "from deserializer"), new ColumnDesc("data_type", "STRING", 2, "from deserializer"), new ColumnDesc("comment", "STRING", 3, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x", "string", "from deserializer"))));
    } finally {
        datasetFramework.deleteInstance(instanceId);
    }
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) DatasetId(io.cdap.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 5 with DatasetSpecification

use of io.cdap.cdap.api.dataset.DatasetSpecification in project cdap by caskdata.

the class HiveExploreTableTestRun method testNoOpOnMissingSchema.

@Test
public void testNoOpOnMissingSchema() throws Exception {
    DatasetId datasetId = NAMESPACE_ID.dataset("noschema");
    datasetFramework.addInstance(Table.class.getName(), datasetId, DatasetProperties.EMPTY);
    try {
        DatasetSpecification spec = datasetFramework.getDatasetSpec(datasetId);
        Assert.assertEquals(QueryHandle.NO_OP, exploreTableManager.enableDataset(datasetId, spec, false));
    } finally {
        datasetFramework.deleteInstance(datasetId);
    }
}
Also used : Table(io.cdap.cdap.api.dataset.table.Table) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) DatasetId(io.cdap.cdap.proto.id.DatasetId) Test(org.junit.Test)

Aggregations

DatasetSpecification (io.cdap.cdap.api.dataset.DatasetSpecification)62 DatasetId (io.cdap.cdap.proto.id.DatasetId)15 DatasetProperties (io.cdap.cdap.api.dataset.DatasetProperties)14 Test (org.junit.Test)14 DatasetDefinition (io.cdap.cdap.api.dataset.DatasetDefinition)12 IncompatibleUpdateException (io.cdap.cdap.api.dataset.IncompatibleUpdateException)11 DatasetAdmin (io.cdap.cdap.api.dataset.DatasetAdmin)10 DatasetManagementException (io.cdap.cdap.api.dataset.DatasetManagementException)9 IOException (java.io.IOException)9 AbstractDatasetDefinition (io.cdap.cdap.api.dataset.lib.AbstractDatasetDefinition)7 DatasetContext (io.cdap.cdap.api.dataset.DatasetContext)6 DatasetTypeMeta (io.cdap.cdap.proto.DatasetTypeMeta)6 TableId (io.cdap.cdap.data2.util.TableId)5 Map (java.util.Map)5 POST (javax.ws.rs.POST)5 Path (javax.ws.rs.Path)5 Reconfigurable (io.cdap.cdap.api.dataset.Reconfigurable)4 Updatable (io.cdap.cdap.api.dataset.Updatable)4 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)4 DatasetModuleMeta (io.cdap.cdap.proto.DatasetModuleMeta)4