Search in sources :

Example 11 with DatasetProperties

use of io.cdap.cdap.api.dataset.DatasetProperties in project cdap by caskdata.

the class TableTest method testConflictDetection.

private void testConflictDetection(ConflictDetection level) throws Exception {
    // we use tableX_Y format for variable names which means "tableX that is used in tx Y"
    String table1 = "table1";
    String table2 = "table2";
    DatasetProperties props = TableProperties.builder().setConflictDetection(level).build();
    DatasetAdmin admin1 = getTableAdmin(CONTEXT1, table1, props);
    DatasetAdmin admin2 = getTableAdmin(CONTEXT1, table2, props);
    admin1.create();
    admin2.create();
    try (Table table11 = getTable(CONTEXT1, table1, props);
        Table table22 = getTable(CONTEXT1, table2, props);
        Table table13 = getTable(CONTEXT1, table1, props);
        Table table14 = getTable(CONTEXT1, table1, props);
        Table table15 = getTable(CONTEXT1, table1, props);
        Table table16 = getTable(CONTEXT1, table1, props);
        Table table17 = getTable(CONTEXT1, table1, props);
        Table table18 = getTable(CONTEXT1, table1, props);
        Table table19 = getTable(CONTEXT1, table1, props)) {
        // 1) Test conflicts when using different tables
        Transaction tx1 = txClient.startShort();
        ((TransactionAware) table11).startTx(tx1);
        // write r1->c1,v1 but not commit
        table11.put(R1, a(C1), a(V1));
        // start new tx
        Transaction tx2 = txClient.startShort();
        ((TransactionAware) table22).startTx(tx2);
        // change in tx2 same data but in different table
        table22.put(R1, a(C1), a(V2));
        // start new tx
        Transaction tx3 = txClient.startShort();
        ((TransactionAware) table13).startTx(tx3);
        // change in tx3 same data in same table as tx1
        table13.put(R1, a(C1), a(V2));
        // committing tx1
        txClient.canCommitOrThrow(tx1, ((TransactionAware) table11).getTxChanges());
        Assert.assertTrue(((TransactionAware) table11).commitTx());
        txClient.commitOrThrow(tx1);
        // no conflict should be when committing tx2
        txClient.canCommitOrThrow(tx2, ((TransactionAware) table22).getTxChanges());
        // but conflict should be when committing tx3
        if (level != ConflictDetection.NONE) {
            try {
                txClient.canCommitOrThrow(tx3, ((TransactionAware) table13).getTxChanges());
                Assert.fail("Conflict not detected!");
            } catch (TransactionConflictException e) {
            // expected
            }
            ((TransactionAware) table13).rollbackTx();
            txClient.abort(tx3);
        } else {
            txClient.canCommitOrThrow(tx3, ((TransactionAware) table13).getTxChanges());
        }
        // 2) Test conflicts when using different rows
        Transaction tx4 = txClient.startShort();
        ((TransactionAware) table14).startTx(tx4);
        // write r1->c1,v1 but not commit
        table14.put(R1, a(C1), a(V1));
        // start new tx
        Transaction tx5 = txClient.startShort();
        ((TransactionAware) table15).startTx(tx5);
        // change in tx5 same data but in different row
        table15.put(R2, a(C1), a(V2));
        // start new tx
        Transaction tx6 = txClient.startShort();
        ((TransactionAware) table16).startTx(tx6);
        // change in tx6 in same row as tx1
        table16.put(R1, a(C2), a(V2));
        // committing tx4
        txClient.canCommitOrThrow(tx4, ((TransactionAware) table14).getTxChanges());
        Assert.assertTrue(((TransactionAware) table14).commitTx());
        txClient.commitOrThrow(tx4);
        // no conflict should be when committing tx5
        txClient.canCommitOrThrow(tx5, ((TransactionAware) table15).getTxChanges());
        // but conflict should be when committing tx6 iff we resolve on row level
        if (level == ConflictDetection.ROW) {
            try {
                txClient.canCommitOrThrow(tx6, ((TransactionAware) table16).getTxChanges());
                Assert.fail("Conflict not detected!");
            } catch (TransactionConflictException e) {
            // expected
            }
            ((TransactionAware) table16).rollbackTx();
            txClient.abort(tx6);
        } else {
            txClient.canCommitOrThrow(tx6, ((TransactionAware) table16).getTxChanges());
        }
        // 3) Test conflicts when using different columns
        Transaction tx7 = txClient.startShort();
        ((TransactionAware) table17).startTx(tx7);
        // write r1->c1,v1 but not commit
        table17.put(R1, a(C1), a(V1));
        // start new tx
        Transaction tx8 = txClient.startShort();
        ((TransactionAware) table18).startTx(tx8);
        // change in tx8 same data but in different column
        table18.put(R1, a(C2), a(V2));
        // start new tx
        Transaction tx9 = txClient.startShort();
        ((TransactionAware) table19).startTx(tx9);
        // change in tx9 same column in same column as tx1
        table19.put(R1, a(C1), a(V2));
        // committing tx7
        txClient.canCommitOrThrow(tx7, ((TransactionAware) table17).getTxChanges());
        Assert.assertTrue(((TransactionAware) table17).commitTx());
        txClient.commitOrThrow(tx7);
        // no conflict should be when committing tx8 iff we resolve on column level
        if (level == ConflictDetection.COLUMN || level == ConflictDetection.NONE) {
            txClient.canCommitOrThrow(tx8, ((TransactionAware) table18).getTxChanges());
        } else {
            try {
                txClient.canCommitOrThrow(tx8, ((TransactionAware) table18).getTxChanges());
                Assert.fail("Conflict not detected!");
            } catch (TransactionConflictException e) {
            // expected
            }
            ((TransactionAware) table18).rollbackTx();
            txClient.abort(tx8);
        }
        // but conflict should be when committing tx9
        if (level != ConflictDetection.NONE) {
            try {
                txClient.canCommitOrThrow(tx9, ((TransactionAware) table19).getTxChanges());
                Assert.fail("Conflict not detected!");
            } catch (TransactionConflictException e) {
            // expected
            }
            ((TransactionAware) table19).rollbackTx();
            txClient.abort(tx9);
        } else {
            txClient.canCommitOrThrow(tx9, ((TransactionAware) table19).getTxChanges());
        }
    } finally {
        // NOTE: we are doing our best to cleanup junk between tests to isolate errors, but we are not going to be
        // crazy about it
        admin1.drop();
        admin2.drop();
    }
}
Also used : Table(io.cdap.cdap.api.dataset.table.Table) HBaseTable(io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable) Transaction(org.apache.tephra.Transaction) TransactionAware(org.apache.tephra.TransactionAware) DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) DatasetAdmin(io.cdap.cdap.api.dataset.DatasetAdmin) TransactionConflictException(org.apache.tephra.TransactionConflictException)

Example 12 with DatasetProperties

use of io.cdap.cdap.api.dataset.DatasetProperties in project cdap by caskdata.

the class HiveExploreServiceFileSetTestRun method testPartitionedExisting.

private void testPartitionedExisting(String reuseProperty, boolean possessed) throws Exception {
    final DatasetId dummyInstanceId = NAMESPACE_ID.dataset("dummy");
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("tpExisting");
    File path = new File(tmpFolder.newFolder(), "base");
    String tableName = "reuse";
    // create a PFS in order to create a table in Hive and add a partition
    // create a time partitioned file set
    DatasetProperties props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("key STRING, value INT").setExploreFormat("csv").build();
    datasetFramework.addInstance(PartitionedFileSet.class.getName(), dummyInstanceId, props);
    PartitionedFileSet dummy = datasetFramework.getDataset(dummyInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(dummy);
    Location location = dummy.getEmbeddedFileSet().getLocation("number1").append("file1");
    PartitionKey key = PartitionKey.builder().addIntField("number", 1).build();
    FileWriterHelper.generateTextFile(location.getOutputStream(), ",", "x", 1, 2);
    addPartition(dummy, key, "number1");
    // validate data
    List<ColumnDesc> expectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".key", "STRING", 1, null), new ColumnDesc(tableName + ".value", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
    runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("key STRING, value INT").setExploreFormat("csv").add(reuseProperty, "true").build();
    datasetFramework.addInstance(PartitionedFileSet.class.getName(), datasetInstanceId, props);
    PartitionedFileSet partitioned = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(partitioned);
    props = PartitionedFileSetProperties.builder().setPartitioning(Partitioning.builder().addIntField("number").build()).setBasePath(path.toString()).setEnableExploreOnCreate(true).setExploreTableName(tableName).setExploreSchema("k STRING, v INT").setExploreFormat("csv").add(reuseProperty, "true").build();
    datasetFramework.updateInstance(datasetInstanceId, props);
    // validate data
    if (!possessed) {
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    } else {
        List<ColumnDesc> newExpectedColumns = Lists.newArrayList(new ColumnDesc(tableName + ".k", "STRING", 1, null), new ColumnDesc(tableName + ".v", "INT", 2, null), new ColumnDesc(tableName + ".number", "INT", 3, null));
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, newExpectedColumns, null);
    }
    datasetFramework.deleteInstance(datasetInstanceId);
    if (!possessed) {
        runCommand(NAMESPACE_ID, "SELECT * FROM " + tableName, true, expectedColumns, Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x1", 1, 1))));
    } else {
        runCommand(NAMESPACE_ID, "SHOW tables", false, null, Collections.<QueryResult>emptyList());
    }
    datasetFramework.deleteInstance(dummyInstanceId);
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) File(java.io.File) DatasetId(io.cdap.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location)

Example 13 with DatasetProperties

use of io.cdap.cdap.api.dataset.DatasetProperties in project cdap by caskdata.

the class WorkflowDriver method createLocalDatasets.

private void createLocalDatasets() throws IOException, DatasetManagementException {
    final KerberosPrincipalId principalId = ProgramRunners.getApplicationPrincipal(programOptions);
    for (final Map.Entry<String, String> entry : datasetFramework.getDatasetNameMapping().entrySet()) {
        final String localInstanceName = entry.getValue();
        final DatasetId instanceId = new DatasetId(workflowRunId.getNamespace(), localInstanceName);
        final DatasetCreationSpec instanceSpec = workflowSpec.getLocalDatasetSpecs().get(entry.getKey());
        LOG.debug("Adding Workflow local dataset instance: {}", localInstanceName);
        try {
            Retries.callWithRetries(new Retries.Callable<Void, Exception>() {

                @Override
                public Void call() throws Exception {
                    DatasetProperties properties = addLocalDatasetProperty(instanceSpec.getProperties(), keepLocal(entry.getKey()));
                    // we have to do this check since addInstance method can only be used when app impersonation is enabled
                    if (principalId != null) {
                        datasetFramework.addInstance(instanceSpec.getTypeName(), instanceId, properties, principalId);
                    } else {
                        datasetFramework.addInstance(instanceSpec.getTypeName(), instanceId, properties);
                    }
                    return null;
                }
            }, RetryStrategies.fixDelay(Constants.Retry.LOCAL_DATASET_OPERATION_RETRY_DELAY_SECONDS, TimeUnit.SECONDS));
        } catch (IOException | DatasetManagementException e) {
            throw e;
        } catch (Exception e) {
            // this should never happen
            throw new IllegalStateException(e);
        }
    }
}
Also used : DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) IOException(java.io.IOException) DatasetManagementException(io.cdap.cdap.api.dataset.DatasetManagementException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) DatasetId(io.cdap.cdap.proto.id.DatasetId) DatasetManagementException(io.cdap.cdap.api.dataset.DatasetManagementException) Retries(io.cdap.cdap.common.service.Retries) DatasetCreationSpec(io.cdap.cdap.internal.dataset.DatasetCreationSpec) KerberosPrincipalId(io.cdap.cdap.proto.id.KerberosPrincipalId) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 14 with DatasetProperties

use of io.cdap.cdap.api.dataset.DatasetProperties in project cdap by caskdata.

the class DatasetAdminOpHTTPHandler method update.

@POST
@Path("/data/datasets/{name}/admin/update")
public void update(FullHttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("name") String name) throws Exception {
    propagateUserId(request);
    InternalDatasetUpdateParams params = GSON.fromJson(request.content().toString(StandardCharsets.UTF_8), InternalDatasetUpdateParams.class);
    Preconditions.checkArgument(params.getProperties() != null, "Missing required 'instanceProps' parameter.");
    Preconditions.checkArgument(params.getTypeMeta() != null, "Missing required 'typeMeta' parameter.");
    Preconditions.checkArgument(params.getExistingSpec() != null, "Missing required 'existingSpec' parameter.");
    DatasetProperties props = params.getProperties();
    DatasetSpecification existing = params.getExistingSpec();
    DatasetTypeMeta typeMeta = params.getTypeMeta();
    try {
        DatasetId instanceId = new DatasetId(namespaceId, name);
        DatasetCreationResponse response = datasetAdminService.createOrUpdate(instanceId, typeMeta, props, existing);
        responder.sendJson(HttpResponseStatus.OK, GSON.toJson(response));
    } catch (IncompatibleUpdateException e) {
        throw new ConflictException(e.getMessage());
    }
}
Also used : ConflictException(io.cdap.cdap.common.ConflictException) DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) DatasetTypeMeta(io.cdap.cdap.proto.DatasetTypeMeta) DatasetId(io.cdap.cdap.proto.id.DatasetId) IncompatibleUpdateException(io.cdap.cdap.api.dataset.IncompatibleUpdateException) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST)

Example 15 with DatasetProperties

use of io.cdap.cdap.api.dataset.DatasetProperties in project cdap by caskdata.

the class CubeDatasetDefinition method configure.

@Override
public DatasetSpecification configure(String instanceName, DatasetProperties properties) {
    DatasetProperties factTableProperties = computeFactTableProperties(properties);
    List<DatasetSpecification> datasetSpecs = Lists.newArrayList();
    // Configuring table that hold mappings of tag names and values and such
    datasetSpecs.add(metricsTableDef.configure("entity", properties));
    // NOTE: we create a table per resolution; we later will use that to e.g. configure ttl separately for each
    for (int resolution : getResolutions(properties.getProperties())) {
        datasetSpecs.add(tableDef.configure(String.valueOf(resolution), factTableProperties));
    }
    return DatasetSpecification.builder(instanceName, getName()).properties(properties.getProperties()).datasets(datasetSpecs).build();
}
Also used : DatasetProperties(io.cdap.cdap.api.dataset.DatasetProperties) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification)

Aggregations

DatasetProperties (io.cdap.cdap.api.dataset.DatasetProperties)79 DatasetSpecification (io.cdap.cdap.api.dataset.DatasetSpecification)28 Test (org.junit.Test)24 Transaction (org.apache.tephra.Transaction)22 DatasetAdmin (io.cdap.cdap.api.dataset.DatasetAdmin)16 DatasetId (io.cdap.cdap.proto.id.DatasetId)16 Table (io.cdap.cdap.api.dataset.table.Table)15 HashMap (java.util.HashMap)13 IncompatibleUpdateException (io.cdap.cdap.api.dataset.IncompatibleUpdateException)12 BufferingTableTest (io.cdap.cdap.data2.dataset2.lib.table.BufferingTableTest)12 TransactionAware (org.apache.tephra.TransactionAware)12 HBaseTable (io.cdap.cdap.data2.dataset2.lib.table.hbase.HBaseTable)10 DetachedTxSystemClient (org.apache.tephra.inmemory.DetachedTxSystemClient)10 IOException (java.io.IOException)9 Reconfigurable (io.cdap.cdap.api.dataset.Reconfigurable)8 Get (io.cdap.cdap.api.dataset.table.Get)8 Scan (io.cdap.cdap.api.dataset.table.Scan)8 Scanner (io.cdap.cdap.api.dataset.table.Scanner)8 TableId (io.cdap.cdap.data2.util.TableId)8 DatasetTypeMeta (io.cdap.cdap.proto.DatasetTypeMeta)8