Search in sources :

Example 1 with DatasetProperties

use of io.cdap.cdap.api.dataset.DatasetProperties in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testPartitionedExisting.

private void testPartitionedExisting(String reuseProperty, boolean possessed) throws Exception {
    final DatasetId dummyInstanceId = NAMESPACE_ID.dataset("dummy");
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("tpExisting");
    File path = new File(tmpFolder.newFolder(), "base");
    String tableName = "reuse";
    // create a PFS in order to create a table in Hive and add a partition
    // create a time partitioned file set
    DatasetProperties props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("key STRING, value INT").setExploreFormat("csv").build();
    datasetFramework.addInstance(PartitionedFileSet.class.getName(), dummyInstanceId, props);
    PartitionedFileSet dummy = datasetFramework.getDataset(dummyInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(dummy);
    Location location = dummy.getEmbeddedFileSet().getLocation("number1").append("file1");
    PartitionKey key = PartitionKey.builder().addIntField("number", 1).build();
    FileWriterHelper.generateTextFile(location.getOutputStream(), ",", "x", 1, 2);
    addPartition(dummy, key, "number1");
    // validate data
    List<ColumnDesc> expectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".value", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("key STRING, value INT").setExploreFormat("csv").add(reuseProperty, "true").build();
    datasetFramework.addInstance(PartitionedFileSet.class.getName(), datasetInstanceId, props);
    PartitionedFileSet partitioned = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(partitioned);
    props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("k STRING, v INT").setExploreFormat("csv").add(reuseProperty, "true").build();
    datasetFramework.updateInstance(datasetInstanceId, props);
    // validate data
    if (!possessed) {
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    } else {
        List<ColumnDesc> newExpectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".k", "STRING", 1, null), new ColumnDesc(tableName + ".v", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, newExpectedColumns, null);
    }
    datasetFramework.deleteInstance(datasetInstanceId);
    if (!possessed) {
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    } else {
        runCommand(NAMESPACE_ID, "SHOW tables", false, null, Collections.<QueryResult>emptyList());
    }
    datasetFramework.deleteInstance(dummyInstanceId);
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) File(java.io.File) DatasetId(io.cdap.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location)

Example 2 with DatasetProperties

use of io.cdap.cdap.api.dataset.DatasetProperties in project cdap by caskdata.

the class DatasetInstanceHandlerTest method testUpdateInstance.

@Test
public void testUpdateInstance() throws Exception {
    // nothing has been created, modules and types list is empty
    List<DatasetSpecificationSummary> instances = getInstances().getResponseObject();
    // nothing in the beginning
    Assert.assertEquals(0, instances.size());
    try {
        DatasetProperties props = DatasetProperties.builder().add("prop1", "val1").add(TestModule2.NOT_RECONFIGURABLE, "this").build();
        // deploy modules
        deployModule("module1", TestModule1.class);
        deployModule("module2", TestModule2.class);
        // create dataset instance
        Assert.assertEquals(HttpStatus.SC_OK, createInstance("dataset1", "datasetType2", props).getResponseCode());
        // verify instance was created
        instances = getInstances().getResponseObject();
        Assert.assertEquals(1, instances.size());
        DatasetMeta meta = getInstanceObject("dataset1").getResponseObject();
        Map<String, String> storedOriginalProps = meta.getSpec().getOriginalProperties();
        Assert.assertEquals(props.getProperties(), storedOriginalProps);
        Map<String, String> retrievedProps = getInstanceProperties("dataset1").getResponseObject();
        Assert.assertEquals(props.getProperties(), retrievedProps);
        // these properties are incompatible because TestModule1.NOT_RECONFIGURABLE may not change
        DatasetProperties newProps = DatasetProperties.builder().add("prop2", "val2").add(TestModule2.NOT_RECONFIGURABLE, "that").build();
        Assert.assertEquals(HttpStatus.SC_CONFLICT, updateInstance("dataset1", newProps).getResponseCode());
        // update dataset instance with valid properties
        newProps = DatasetProperties.builder().add("prop2", "val2").add(TestModule2.NOT_RECONFIGURABLE, "this").build();
        Assert.assertEquals(HttpStatus.SC_OK, updateInstance("dataset1", newProps).getResponseCode());
        meta = getInstanceObject("dataset1").getResponseObject();
        Assert.assertEquals(newProps.getProperties(), meta.getSpec().getOriginalProperties());
        Assert.assertEquals("val2", meta.getSpec().getProperty("prop2"));
        Assert.assertNull(meta.getSpec().getProperty("prop1"));
        retrievedProps = getInstanceProperties("dataset1").getResponseObject();
        Assert.assertEquals(newProps.getProperties(), retrievedProps);
    } finally {
        // delete dataset instance
        Assert.assertEquals(HttpStatus.SC_OK, deleteInstance("dataset1").getResponseCode());
        Assert.assertEquals(0, getInstances().getResponseObject().size());
        // delete dataset modules
        Assert.assertEquals(HttpStatus.SC_OK, deleteModule("module2").getResponseCode());
        Assert.assertEquals(HttpStatus.SC_OK, deleteModule("module1").getResponseCode());
    }
}
Also used : DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) DatasetMeta(io.cdap.cdap.proto.DatasetMeta) DatasetSpecificationSummary(io.cdap.cdap.proto.DatasetSpecificationSummary) Test(org.junit.Test)

Example 3 with DatasetProperties

use of io.cdap.cdap.api.dataset.DatasetProperties in project cdap by caskdata.

the class DatasetDefinitionRegistryWithDefaultModules method testTimeSeriesReconfigure.

private void testTimeSeriesReconfigure(DatasetDefinition def) throws IncompatibleUpdateException {
    DatasetProperties props = DatasetProperties.builder().add(TimeseriesDataset.ATTR_TIME_INTERVAL_TO_STORE_PER_ROW, String.valueOf(TimeUnit.HOURS.toMillis(1))).build();
    DatasetProperties compatProps = TableProperties.builder().setTTL(TimeUnit.DAYS.toSeconds(1)).add(TimeseriesDataset.ATTR_TIME_INTERVAL_TO_STORE_PER_ROW, String.valueOf(TimeUnit.HOURS.toMillis(1))).build();
    DatasetProperties incompatProps = TableProperties.builder().setTTL(TimeUnit.DAYS.toSeconds(1)).add(TimeseriesDataset.ATTR_TIME_INTERVAL_TO_STORE_PER_ROW, String.valueOf(TimeUnit.HOURS.toMillis(2))).build();
    DatasetSpecification spec = def.configure("tt", props);
    Assert.assertTrue(def instanceof Reconfigurable);
    ((Reconfigurable) def).reconfigure("tt", compatProps, spec);
    try {
        ((Reconfigurable) def).reconfigure("tt", incompatProps, spec);
        Assert.fail("reconfigure should have thrown exception");
    } catch (IncompatibleUpdateException e) {
    // expected
    }
}
Also used : DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) Reconfigurable(io.cdap.cdap.api.dataset.Reconfigurable) IncompatibleUpdateException(io.cdap.cdap.api.dataset.IncompatibleUpdateException)

Example 4 with DatasetProperties

use of io.cdap.cdap.api.dataset.DatasetProperties in project cdap by caskdata.

the class DatasetDefinitionRegistryWithDefaultModules method testIndexedTableReconfigure.

@Test
public void testIndexedTableReconfigure() throws IncompatibleUpdateException {
    DatasetDefinition indexedTableDef = registry.get(IndexedTable.class.getName());
    Assert.assertTrue(indexedTableDef instanceof Reconfigurable);
    DatasetProperties props = TableProperties.builder().setReadlessIncrementSupport(false).add(IndexedTable.INDEX_COLUMNS_CONF_KEY, "a,b,c").build();
    DatasetSpecification spec = indexedTableDef.configure("idxtb", props);
    DatasetProperties compat = TableProperties.builder().setReadlessIncrementSupport(// turning on is ok
    true).add(IndexedTable.INDEX_COLUMNS_CONF_KEY, "c,b,a").build();
    spec = ((Reconfigurable) indexedTableDef).reconfigure("idxtb", compat, spec);
    DatasetProperties incompat = TableProperties.builder().setReadlessIncrementSupport(true).add(IndexedTable.INDEX_COLUMNS_CONF_KEY, "a,d").build();
    try {
        ((Reconfigurable) indexedTableDef).reconfigure("idxtb", incompat, spec);
        Assert.fail("reconfigure should have thrown exception");
    } catch (IncompatibleUpdateException e) {
    // expected
    }
    incompat = TableProperties.builder().setReadlessIncrementSupport(// turning off is not ok
    false).add(IndexedTable.INDEX_COLUMNS_CONF_KEY, "a,b,c").build();
    try {
        ((Reconfigurable) indexedTableDef).reconfigure("idxtb", incompat, spec);
        Assert.fail("reconfigure should have thrown exception");
    } catch (IncompatibleUpdateException e) {
    // expected
    }
}
Also used : DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) DatasetDefinition(io.cdap.cdap.api.dataset.DatasetDefinition) Reconfigurable(io.cdap.cdap.api.dataset.Reconfigurable) IncompatibleUpdateException(io.cdap.cdap.api.dataset.IncompatibleUpdateException) Test(org.junit.Test)

Example 5 with DatasetProperties

use of io.cdap.cdap.api.dataset.DatasetProperties in project cdap by caskdata.

the class HBaseTableTest method testCachedEncodedTransaction.

@Test
public void testCachedEncodedTransaction() throws Exception {
    String tableName = "testEncodedTxTable";
    DatasetProperties props = DatasetProperties.EMPTY;
    getTableAdmin(CONTEXT1, tableName, props).create();
    DatasetSpecification tableSpec = DatasetSpecification.builder(tableName, HBaseTable.class.getName()).build();
    // use a transaction codec that counts the number of times encode() is called
    final AtomicInteger encodeCount = new AtomicInteger();
    final TransactionCodec codec = new TransactionCodec() {

        @Override
        public byte[] encode(Transaction tx) throws IOException {
            encodeCount.incrementAndGet();
            return super.encode(tx);
        }
    };
    // use a table util that creates an HTable that validates the encoded tx on each get
    final AtomicReference<Transaction> txRef = new AtomicReference<>();
    HBaseTableUtil util = new DelegatingHBaseTableUtil(hBaseTableUtil) {

        @Override
        public org.apache.hadoop.hbase.client.Table createTable(Configuration conf, TableId tableId) throws IOException {
            org.apache.hadoop.hbase.client.Table table = super.createTable(conf, tableId);
            return new DelegatingTable(table) {

                @Override
                public Result get(org.apache.hadoop.hbase.client.Get get) throws IOException {
                    Assert.assertEquals(txRef.get().getTransactionId(), codec.decode(get.getAttribute(TxConstants.TX_OPERATION_ATTRIBUTE_KEY)).getTransactionId());
                    return super.get(get);
                }

                @Override
                public Result[] get(List<org.apache.hadoop.hbase.client.Get> gets) throws IOException {
                    for (org.apache.hadoop.hbase.client.Get get : gets) {
                        Assert.assertEquals(txRef.get().getTransactionId(), codec.decode(get.getAttribute(TxConstants.TX_OPERATION_ATTRIBUTE_KEY)).getTransactionId());
                    }
                    return super.get(gets);
                }

                @Override
                public ResultScanner getScanner(org.apache.hadoop.hbase.client.Scan scan) throws IOException {
                    Assert.assertEquals(txRef.get().getTransactionId(), codec.decode(scan.getAttribute(TxConstants.TX_OPERATION_ATTRIBUTE_KEY)).getTransactionId());
                    return super.getScanner(scan);
                }
            };
        }
    };
    HBaseTable table = new HBaseTable(CONTEXT1, tableSpec, Collections.<String, String>emptyMap(), cConf, TEST_HBASE.getConfiguration(), util, codec);
    DetachedTxSystemClient txSystemClient = new DetachedTxSystemClient();
    // test all operations: only the first one encodes
    Transaction tx = txSystemClient.startShort();
    txRef.set(tx);
    table.startTx(tx);
    table.put(b("row1"), b("col1"), b("val1"));
    Assert.assertEquals(0, encodeCount.get());
    table.get(b("row"));
    Assert.assertEquals(1, encodeCount.get());
    table.get(ImmutableList.of(new Get("a"), new Get("b")));
    Assert.assertEquals(1, encodeCount.get());
    Scanner scanner = table.scan(new Scan(null, null));
    Assert.assertEquals(1, encodeCount.get());
    scanner.close();
    table.increment(b("z"), b("z"), 0L);
    Assert.assertEquals(1, encodeCount.get());
    table.commitTx();
    table.postTxCommit();
    // test that for the next tx, we encode again
    tx = txSystemClient.startShort();
    txRef.set(tx);
    table.startTx(tx);
    table.get(b("row"));
    Assert.assertEquals(2, encodeCount.get());
    table.commitTx();
    // test that we encode again, even of postTxCommit was not called
    tx = txSystemClient.startShort();
    txRef.set(tx);
    table.startTx(tx);
    table.get(b("row"));
    Assert.assertEquals(3, encodeCount.get());
    table.commitTx();
    table.rollbackTx();
    // test that rollback does not encode the tx
    Assert.assertEquals(3, encodeCount.get());
    // test that we encode again if the previous tx rolled back
    tx = txSystemClient.startShort();
    txRef.set(tx);
    table.startTx(tx);
    table.get(b("row"));
    Assert.assertEquals(4, encodeCount.get());
    table.commitTx();
    table.close();
    Assert.assertEquals(4, encodeCount.get());
}
Also used : TableId(io.cdap.cdap.data2.util.TableId) RegionScanner(org.apache.hadoop.hbase.regionserver.RegionScanner) Scanner(io.cdap.cdap.api.dataset.table.Scanner) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) Configuration(org.apache.hadoop.conf.Configuration) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) Result(org.apache.hadoop.hbase.client.Result) DelegatingTable(io.cdap.cdap.data2.util.hbase.DelegatingTable) DetachedTxSystemClient(org.apache.tephra.inmemory.DetachedTxSystemClient) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) AtomicReference(java.util.concurrent.atomic.AtomicReference) HBaseTableUtil(io.cdap.cdap.data2.util.hbase.HBaseTableUtil) Transaction(org.apache.tephra.Transaction) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TransactionCodec(org.apache.tephra.TransactionCodec) Get(io.cdap.cdap.api.dataset.table.Get) Scan(io.cdap.cdap.api.dataset.table.Scan) BufferingTableTest(io.cdap.cdap.data2.dataset2.lib.table.BufferingTableTest) Test(org.junit.Test)

Aggregations

DatasetProperties (io.cdap.cdap.api.dataset.DatasetProperties)35 DatasetSpecification (io.cdap.cdap.api.dataset.DatasetSpecification)14 Test (org.junit.Test)12 Transaction (org.apache.tephra.Transaction)11 DatasetAdmin (io.cdap.cdap.api.dataset.DatasetAdmin)8 Table (io.cdap.cdap.api.dataset.table.Table)7 DatasetId (io.cdap.cdap.proto.id.DatasetId)7 IncompatibleUpdateException (io.cdap.cdap.api.dataset.IncompatibleUpdateException)6 BufferingTableTest (io.cdap.cdap.data2.dataset2.lib.table.BufferingTableTest)6 TransactionAware (org.apache.tephra.TransactionAware)6 HBaseTable (io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable)5 Reconfigurable (io.cdap.cdap.api.dataset.Reconfigurable)4 Get (io.cdap.cdap.api.dataset.table.Get)4 Scan (io.cdap.cdap.api.dataset.table.Scan)4 Scanner (io.cdap.cdap.api.dataset.table.Scanner)4 TableId (io.cdap.cdap.data2.util.TableId)4 DetachedTxSystemClient (org.apache.tephra.inmemory.DetachedTxSystemClient)4 BufferingTable (io.cdap.cdap.data2.dataset2.lib.table.BufferingTable)3 DatasetTypeMeta (io.cdap.cdap.proto.DatasetTypeMeta)3 IOException (java.io.IOException)3