Search in sources :

Example 21 with DatasetProperties

use of co.cask.cdap.api.dataset.DatasetProperties in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testPartitionedExisting.

private void testPartitionedExisting(String reuseProperty, boolean possessed) throws Exception {
    final DatasetId dummyInstanceId = NAMESPACE_ID.dataset("dummy");
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("tpExisting");
    File path = new File(tmpFolder.newFolder(), "base");
    String tableName = "reuse";
    // create a PFS in order to create a table in Hive and add a partition
    // create a time partitioned file set
    DatasetProperties props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("key STRING, value INT").setExploreFormat("csv").build();
    datasetFramework.addInstance(PartitionedFileSet.class.getName(), dummyInstanceId, props);
    PartitionedFileSet dummy = datasetFramework.getDataset(dummyInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(dummy);
    Location location = dummy.getEmbeddedFileSet().getLocation("number1").append("file1");
    PartitionKey key = PartitionKey.builder().addIntField("number", 1).build();
    FileWriterHelper.generateTextFile(location.getOutputStream(), ",", "x", 1, 2);
    addPartition(dummy, key, "number1");
    // validate data
    List<ColumnDesc> expectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".value", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("key STRING, value INT").setExploreFormat("csv").add(reuseProperty, "true").build();
    datasetFramework.addInstance(PartitionedFileSet.class.getName(), datasetInstanceId, props);
    PartitionedFileSet partitioned = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(partitioned);
    props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("k STRING, v INT").setExploreFormat("csv").add(reuseProperty, "true").build();
    datasetFramework.updateInstance(datasetInstanceId, props);
    // validate data
    if (!possessed) {
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    } else {
        List<ColumnDesc> newExpectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".k", "STRING", 1, null), new ColumnDesc(tableName + ".v", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, newExpectedColumns, null);
    }
    datasetFramework.deleteInstance(datasetInstanceId);
    if (!possessed) {
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    } else {
        runCommand(NAMESPACE_ID, "SHOW tables", false, null, Collections.<QueryResult>emptyList());
    }
    datasetFramework.deleteInstance(dummyInstanceId);
}
Also used : QueryResult(co.cask.cdap.proto.QueryResult) DatasetProperties(co.cask.cdap.api.dataset.DatasetProperties) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) ColumnDesc(co.cask.cdap.proto.ColumnDesc) File(java.io.File) DatasetId(co.cask.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location)

Example 22 with DatasetProperties

use of co.cask.cdap.api.dataset.DatasetProperties in project cdap by caskdata.

the class TableTest method testMetrics.

private void testMetrics(boolean readless) throws Exception {
    final String tableName = "survive";
    DatasetProperties props = TableProperties.builder().setReadlessIncrementSupport(readless).build();
    DatasetAdmin admin = getTableAdmin(CONTEXT1, tableName, props);
    admin.create();
    Table table = getTable(CONTEXT1, tableName, props);
    final Map<String, Long> metrics = Maps.newHashMap();
    ((MeteredDataset) table).setMetricsCollector(new MetricsCollector() {

        @Override
        public void increment(String metricName, long value) {
            Long old = metrics.get(metricName);
            metrics.put(metricName, old == null ? value : old + value);
        }

        @Override
        public void gauge(String metricName, long value) {
            metrics.put(metricName, value);
        }
    });
    // Note that we don't need to finish tx for metrics to be reported
    Transaction tx0 = txClient.startShort();
    ((TransactionAware) table).startTx(tx0);
    int writes = 0;
    int reads = 0;
    table.put(new Put(R1, C1, V1));
    verifyDatasetMetrics(metrics, ++writes, reads);
    table.compareAndSwap(R1, C1, V1, V2);
    verifyDatasetMetrics(metrics, ++writes, ++reads);
    // note: will not write anything as expected value will not match
    table.compareAndSwap(R1, C1, V1, V2);
    verifyDatasetMetrics(metrics, writes, ++reads);
    table.increment(new Increment(R2, C2, 1L));
    if (readless) {
        verifyDatasetMetrics(metrics, ++writes, reads);
    } else {
        verifyDatasetMetrics(metrics, ++writes, ++reads);
    }
    table.incrementAndGet(new Increment(R2, C2, 1L));
    verifyDatasetMetrics(metrics, ++writes, ++reads);
    table.get(new Get(R1, C1, V1));
    verifyDatasetMetrics(metrics, writes, ++reads);
    Scanner scanner = table.scan(new Scan(null, null));
    while (scanner.next() != null) {
        verifyDatasetMetrics(metrics, writes, ++reads);
    }
    table.delete(new Delete(R1, C1, V1));
    verifyDatasetMetrics(metrics, ++writes, reads);
    // drop table
    admin.drop();
}
Also used : MetricsCollector(co.cask.cdap.api.metrics.MetricsCollector) Delete(co.cask.cdap.api.dataset.table.Delete) Scanner(co.cask.cdap.api.dataset.table.Scanner) Table(co.cask.cdap.api.dataset.table.Table) HBaseTable(co.cask.cdap.data2.dataset2.lib.table.hbase.HBaseTable) DatasetProperties(co.cask.cdap.api.dataset.DatasetProperties) DatasetAdmin(co.cask.cdap.api.dataset.DatasetAdmin) Put(co.cask.cdap.api.dataset.table.Put) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) Increment(co.cask.cdap.api.dataset.table.Increment) Get(co.cask.cdap.api.dataset.table.Get) MeteredDataset(co.cask.cdap.api.dataset.metrics.MeteredDataset) Scan(co.cask.cdap.api.dataset.table.Scan)

Example 23 with DatasetProperties

use of co.cask.cdap.api.dataset.DatasetProperties in project cdap by caskdata.

the class TableTest method testBasicIncrementWriteWithTxSmall.

private void testBasicIncrementWriteWithTxSmall(boolean readless) throws Exception {
    DatasetProperties props = TableProperties.builder().setReadlessIncrementSupport(readless).build();
    DatasetAdmin admin = getTableAdmin(CONTEXT1, MY_TABLE, props);
    admin.create();
    Table myTable = getTable(CONTEXT1, MY_TABLE, props);
    // start 1st tx
    Transaction tx = txClient.startShort();
    ((TransactionAware) myTable).startTx(tx);
    myTable.increment(R1, a(C1), la(-3L));
    // we'll use this one to test that we can delete increment and increment again
    myTable.increment(R2, a(C2), la(5L));
    commitAndAssertSuccess(tx, (TransactionAware) myTable);
    // start 2nd tx
    tx = txClient.startShort();
    ((TransactionAware) myTable).startTx(tx);
    Assert.assertArrayEquals(Bytes.toBytes(-3L), myTable.get(R1, C1));
    myTable.increment(R1, a(C1), la(-3L));
    Assert.assertArrayEquals(Bytes.toBytes(-6L), myTable.get(R1, C1));
    Assert.assertArrayEquals(Bytes.toBytes(5L), myTable.get(R2, C2));
    myTable.delete(R2, C2);
    Assert.assertArrayEquals(null, myTable.get(R2, C2));
    commitAndAssertSuccess(tx, (TransactionAware) myTable);
    // start 3rd tx
    tx = txClient.startShort();
    ((TransactionAware) myTable).startTx(tx);
    Assert.assertArrayEquals(Bytes.toBytes(-6L), myTable.get(R1, C1));
    Assert.assertArrayEquals(null, myTable.get(R2, C2));
    myTable.increment(R2, a(C2), la(7L));
    Assert.assertArrayEquals(Bytes.toBytes(7L), myTable.get(R2, C2));
    commitAndAssertSuccess(tx, (TransactionAware) myTable);
    // start 4rd tx
    tx = txClient.startShort();
    ((TransactionAware) myTable).startTx(tx);
    Assert.assertArrayEquals(Bytes.toBytes(7L), myTable.get(R2, C2));
    commitAndAssertSuccess(tx, (TransactionAware) myTable);
    admin.drop();
}
Also used : Table(co.cask.cdap.api.dataset.table.Table) HBaseTable(co.cask.cdap.data2.dataset2.lib.table.hbase.HBaseTable) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) DatasetProperties(co.cask.cdap.api.dataset.DatasetProperties) DatasetAdmin(co.cask.cdap.api.dataset.DatasetAdmin)

Example 24 with DatasetProperties

use of co.cask.cdap.api.dataset.DatasetProperties in project cdap by caskdata.

the class DatasetSystemMetadataWriterTest method testFilesetSchema.

@Test
public void testFilesetSchema() throws Exception {
    DatasetProperties filesetAvroTableProps = FileSetProperties.builder().setTableProperty(DatasetSystemMetadataWriter.FILESET_AVRO_SCHEMA_PROPERTY, "avro-table-schema").build();
    assertDatasetSchema("avro-table-schema", filesetAvroTableProps);
    // When SCHEMA property is present, it should override
    filesetAvroTableProps = FileSetProperties.builder().setTableProperty(DatasetSystemMetadataWriter.FILESET_AVRO_SCHEMA_PROPERTY, "avro-table-schema").add(DatasetProperties.SCHEMA, "avro-schema").build();
    assertDatasetSchema("avro-schema", filesetAvroTableProps);
    DatasetProperties filesetAvroOutputProps = FileSetProperties.builder().setOutputProperty(DatasetSystemMetadataWriter.FILESET_AVRO_SCHEMA_OUTPUT_KEY, "avro-output-schema").build();
    assertDatasetSchema("avro-output-schema", filesetAvroOutputProps);
    // When SCHEMA property is present, it should override
    filesetAvroOutputProps = FileSetProperties.builder().setOutputProperty(DatasetSystemMetadataWriter.FILESET_AVRO_SCHEMA_OUTPUT_KEY, "avro-output-schema").add(DatasetProperties.SCHEMA, "avro-schema").build();
    assertDatasetSchema("avro-schema", filesetAvroOutputProps);
    DatasetProperties filesetParquetProps = FileSetProperties.builder().setOutputProperty(DatasetSystemMetadataWriter.FILESET_PARQUET_SCHEMA_OUTPUT_KEY, "parquet-output-schema").build();
    assertDatasetSchema("parquet-output-schema", filesetParquetProps);
    // When SCHEMA property is present, it should override
    filesetParquetProps = FileSetProperties.builder().setOutputProperty(DatasetSystemMetadataWriter.FILESET_PARQUET_SCHEMA_OUTPUT_KEY, "parquet-output-schema").add(DatasetProperties.SCHEMA, "parquet-schema").build();
    assertDatasetSchema("parquet-schema", filesetParquetProps);
}
Also used : DatasetProperties(co.cask.cdap.api.dataset.DatasetProperties) Test(org.junit.Test)

Example 25 with DatasetProperties

use of co.cask.cdap.api.dataset.DatasetProperties in project cdap by caskdata.

the class DatasetInstanceHandlerTest method testUpdateInstance.

@Test
public void testUpdateInstance() throws Exception {
    // nothing has been created, modules and types list is empty
    List<DatasetSpecificationSummary> instances = getInstances().getResponseObject();
    // nothing in the beginning
    Assert.assertEquals(0, instances.size());
    try {
        DatasetProperties props = DatasetProperties.builder().add("prop1", "val1").add(TestModule2.NOT_RECONFIGURABLE, "this").build();
        // deploy modules
        deployModule("module1", TestModule1.class);
        deployModule("module2", TestModule2.class);
        // create dataset instance
        Assert.assertEquals(HttpStatus.SC_OK, createInstance("dataset1", "datasetType2", props).getResponseCode());
        // verify instance was created
        instances = getInstances().getResponseObject();
        Assert.assertEquals(1, instances.size());
        DatasetMeta meta = getInstanceObject("dataset1").getResponseObject();
        Map<String, String> storedOriginalProps = meta.getSpec().getOriginalProperties();
        Assert.assertEquals(props.getProperties(), storedOriginalProps);
        Map<String, String> retrievedProps = getInstanceProperties("dataset1").getResponseObject();
        Assert.assertEquals(props.getProperties(), retrievedProps);
        // these properties are incompatible because TestModule1.NOT_RECONFIGURABLE may not change
        DatasetProperties newProps = DatasetProperties.builder().add("prop2", "val2").add(TestModule2.NOT_RECONFIGURABLE, "that").build();
        Assert.assertEquals(HttpStatus.SC_CONFLICT, updateInstance("dataset1", newProps).getResponseCode());
        // update dataset instance with valid properties
        newProps = DatasetProperties.builder().add("prop2", "val2").add(TestModule2.NOT_RECONFIGURABLE, "this").build();
        Assert.assertEquals(HttpStatus.SC_OK, updateInstance("dataset1", newProps).getResponseCode());
        meta = getInstanceObject("dataset1").getResponseObject();
        Assert.assertEquals(newProps.getProperties(), meta.getSpec().getOriginalProperties());
        Assert.assertEquals("val2", meta.getSpec().getProperty("prop2"));
        Assert.assertNull(meta.getSpec().getProperty("prop1"));
        retrievedProps = getInstanceProperties("dataset1").getResponseObject();
        Assert.assertEquals(newProps.getProperties(), retrievedProps);
    } finally {
        // delete dataset instance
        Assert.assertEquals(HttpStatus.SC_OK, deleteInstance("dataset1").getResponseCode());
        Assert.assertEquals(0, getInstances().getResponseObject().size());
        // delete dataset modules
        Assert.assertEquals(HttpStatus.SC_OK, deleteModule("module2").getResponseCode());
        Assert.assertEquals(HttpStatus.SC_OK, deleteModule("module1").getResponseCode());
    }
}
Also used : DatasetProperties(co.cask.cdap.api.dataset.DatasetProperties) DatasetMeta(co.cask.cdap.proto.DatasetMeta) DatasetSpecificationSummary(co.cask.cdap.proto.DatasetSpecificationSummary) Test(org.junit.Test)

Aggregations

DatasetProperties (co.cask.cdap.api.dataset.DatasetProperties)38 DatasetSpecification (co.cask.cdap.api.dataset.DatasetSpecification)17 Test (org.junit.Test)12 Transaction (org.apache.tephra.Transaction)11 DatasetId (co.cask.cdap.proto.id.DatasetId)10 IncompatibleUpdateException (co.cask.cdap.api.dataset.IncompatibleUpdateException)9 DatasetAdmin (co.cask.cdap.api.dataset.DatasetAdmin)8 Table (co.cask.cdap.api.dataset.table.Table)7 BufferingTableTest (co.cask.cdap.data2.dataset2.lib.table.BufferingTableTest)6 TransactionAware (org.apache.tephra.TransactionAware)6 HBaseTable (co.cask.cdap.data2.dataset2.lib.table.hbase.HBaseTable)5 DatasetTypeMeta (co.cask.cdap.proto.DatasetTypeMeta)5 IOException (java.io.IOException)5 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)4 Reconfigurable (co.cask.cdap.api.dataset.Reconfigurable)4 Get (co.cask.cdap.api.dataset.table.Get)4 Scan (co.cask.cdap.api.dataset.table.Scan)4 Scanner (co.cask.cdap.api.dataset.table.Scanner)4 DetachedTxSystemClient (org.apache.tephra.inmemory.DetachedTxSystemClient)4 BadRequestException (co.cask.cdap.common.BadRequestException)3