Search in sources :

Example 71 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class SingleThreadDatasetCache method getDataset.

@Override
public <T extends Dataset> T getDataset(DatasetCacheKey key, boolean bypass) throws DatasetInstantiationException {
    Dataset dataset;
    try {
        if (bypass) {
            dataset = datasetLoader.load(key);
        } else {
            try {
                dataset = datasetCache.get(key);
            } catch (ExecutionException | UncheckedExecutionException e) {
                throw e.getCause();
            }
        }
    } catch (DatasetInstantiationException | ServiceUnavailableException e) {
        throw e;
    } catch (Throwable t) {
        throw new DatasetInstantiationException(String.format("Could not instantiate dataset '%s:%s'", key.getNamespace(), key.getName()), t);
    }
    // make sure the dataset exists and is of the right type
    if (dataset == null) {
        throw new DatasetInstantiationException(String.format("Dataset '%s' does not exist", key.getName()));
    }
    T typedDataset;
    try {
        @SuppressWarnings("unchecked") T t = (T) dataset;
        typedDataset = t;
    } catch (Throwable t) {
        // must be ClassCastException
        throw new DatasetInstantiationException(String.format("Could not cast dataset '%s' to requested type. Actual type is %s.", key.getName(), dataset.getClass().getName()), t);
    }
    // any transaction aware that is not in the active tx-awares is added to the current tx context (if there is one).
    if (!bypass && dataset instanceof TransactionAware) {
        TransactionAware txAware = (TransactionAware) dataset;
        TransactionAware existing = activeTxAwares.get(key);
        if (existing == null) {
            activeTxAwares.put(key, txAware);
            if (txContext != null) {
                txContext.addTransactionAware(txAware);
            }
        } else if (existing != dataset) {
            // this better be the same dataset, otherwise the cache did not work
            throw new IllegalStateException(String.format("Unexpected state: Cache returned %s for %s, which is different from the " + "active transaction aware %s for the same key. This should never happen.", dataset, key, existing));
        }
    }
    return typedDataset;
}
Also used : UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) MeteredDataset(co.cask.cdap.api.dataset.metrics.MeteredDataset) Dataset(co.cask.cdap.api.dataset.Dataset) TransactionAware(org.apache.tephra.TransactionAware) ServiceUnavailableException(co.cask.cdap.common.ServiceUnavailableException) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) ExecutionException(java.util.concurrent.ExecutionException) DatasetInstantiationException(co.cask.cdap.api.data.DatasetInstantiationException)

Example 72 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class SingleThreadDatasetCache method close.

@Override
public void close() {
    for (TransactionAware txAware : extraTxAwares) {
        if (txAware instanceof Closeable) {
            Closeables.closeQuietly((Closeable) txAware);
        }
    }
    invalidate();
    super.close();
}
Also used : TransactionAware(org.apache.tephra.TransactionAware) Closeable(java.io.Closeable)

Example 73 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class AbstractDatasetFrameworkTest method testSimpleDataset.

@Test
public void testSimpleDataset() throws Exception {
    // Configuring Dataset types
    DatasetFramework framework = getFramework();
    // system namespace has a module orderedTable-inMemory
    Assert.assertTrue(framework.hasSystemType("table"));
    // myspace namespace has no modules
    Assert.assertFalse(framework.hasType(IN_MEMORY_TYPE));
    Assert.assertFalse(framework.hasType(SIMPLE_KV_TYPE));
    // add module to namespace 'myspace'
    framework.addModule(KEY_VALUE, new SingleTypeModule(SimpleKVTable.class));
    // make sure it got added to 'myspace'
    Assert.assertTrue(framework.hasType(SIMPLE_KV_TYPE));
    // but not to 'system'
    Assert.assertFalse(framework.hasSystemType(SimpleKVTable.class.getName()));
    Assert.assertFalse(framework.hasInstance(MY_TABLE));
    // Creating instance using a type from own namespace
    framework.addInstance(SimpleKVTable.class.getName(), MY_TABLE, DatasetProperties.EMPTY);
    // verify it got added to the right namespace
    Assert.assertTrue(framework.hasInstance(MY_TABLE));
    // and not to the system namespace
    Assert.assertFalse(framework.hasInstance(NamespaceId.SYSTEM.dataset("my_table")));
    // Doing some admin and data ops
    DatasetAdmin admin = framework.getAdmin(MY_TABLE, null);
    Assert.assertNotNull(admin);
    final SimpleKVTable kvTable = framework.getDataset(MY_TABLE, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(kvTable);
    TransactionExecutor txnl = new DefaultTransactionExecutor(new MinimalTxSystemClient(), (TransactionAware) kvTable);
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            kvTable.put("key1", "value1");
        }
    });
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Assert.assertEquals("value1", kvTable.get("key1"));
        }
    });
    admin.truncate();
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Assert.assertTrue(kvTable.get("key1") == null);
        }
    });
    // cleanup
    framework.deleteInstance(MY_TABLE);
    framework.deleteModule(KEY_VALUE);
    // recreate instance without adding a module in 'myspace'. This should use types from default namespace
    framework.addInstance("table", MY_TABLE, DatasetProperties.EMPTY);
    // verify it got added to the right namespace
    Assert.assertTrue(framework.hasInstance(MY_TABLE));
    admin = framework.getAdmin(MY_TABLE, null);
    Assert.assertNotNull(admin);
    final Table table = framework.getDataset(MY_TABLE, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(table);
    txnl = new DefaultTransactionExecutor(new MinimalTxSystemClient(), (TransactionAware) table);
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            table.put(Bytes.toBytes("key1"), Bytes.toBytes("column1"), Bytes.toBytes("value1"));
        }
    });
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Assert.assertEquals("value1", Bytes.toString(table.get(Bytes.toBytes("key1"), Bytes.toBytes("column1"))));
        }
    });
    // cleanup
    framework.deleteInstance(MY_TABLE);
}
Also used : LineageWriterDatasetFramework(co.cask.cdap.data2.metadata.writer.LineageWriterDatasetFramework) Table(co.cask.cdap.api.dataset.table.Table) TransactionAware(org.apache.tephra.TransactionAware) DefaultTransactionExecutor(org.apache.tephra.DefaultTransactionExecutor) DatasetAdmin(co.cask.cdap.api.dataset.DatasetAdmin) TransactionExecutor(org.apache.tephra.TransactionExecutor) DefaultTransactionExecutor(org.apache.tephra.DefaultTransactionExecutor) InstanceConflictException(co.cask.cdap.api.dataset.InstanceConflictException) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) IOException(java.io.IOException) MinimalTxSystemClient(org.apache.tephra.inmemory.MinimalTxSystemClient) Test(org.junit.Test)

Example 74 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class PartitionConsumerTest method testPartitionConsumingWithFilterAndLimit.

@Test
public void testPartitionConsumingWithFilterAndLimit() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final TransactionAware txAwareDataset = (TransactionAware) dataset;
    final Set<PartitionKey> partitionKeys1 = new HashSet<>();
    for (int i = 0; i < 10; i++) {
        partitionKeys1.add(generateUniqueKey());
    }
    final Set<PartitionKey> partitionKeys2 = new HashSet<>();
    for (int i = 0; i < 15; i++) {
        partitionKeys2.add(generateUniqueKey());
    }
    final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor());
    // (consumption only happens at transaction borders)
    for (final PartitionKey partitionKey : partitionKeys1) {
        dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                dataset.getPartitionOutput(partitionKey).addPartition();
            }
        });
    }
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // Initial consumption results in the partitions corresponding to partitionKeys1 to be consumed because only
            // those partitions are added to the dataset at this point
            List<Partition> consumedPartitions = new ArrayList<>();
            // with limit = 1, the returned iterator is only size 1, even though there are more unconsumed partitions
            Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(1).getPartitions());
            Assert.assertEquals(1, consumedPartitions.size());
            // ask for 5 more
            Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(5).getPartitions());
            Assert.assertEquals(6, consumedPartitions.size());
            // ask for 5 more, but there are only 4 more unconsumed partitions (size of partitionKeys1 is 10).
            Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(5).getPartitions());
            Assert.assertEquals(10, consumedPartitions.size());
            Assert.assertEquals(partitionKeys1, toKeys(consumedPartitions));
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            for (PartitionKey partitionKey : partitionKeys2) {
                dataset.getPartitionOutput(partitionKey).addPartition();
            }
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // using the same PartitionConsumer (which remembers the PartitionConsumerState) to consume additional
            // partitions results in only the newly added partitions (corresponding to partitionKeys2) to be returned
            Assert.assertEquals(partitionKeys2, toKeys(partitionConsumer.consumePartitions().getPartitions()));
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // consuming the partitions again, without adding any new partitions returns an empty iterator
            Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty());
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // creating a new PartitionConsumer resets the consumption state.
            // test combination of filter and limit
            // the partitionFilter will match partitionKeys [1, 7), of which there are 6
            final PartitionFilter partitionFilter = PartitionFilter.builder().addRangeCondition("i", 1, 7).build();
            final Predicate<PartitionDetail> predicate = new Predicate<PartitionDetail>() {

                @Override
                public boolean apply(PartitionDetail partitionDetail) {
                    return partitionFilter.match(partitionDetail.getPartitionKey());
                }
            };
            ConsumerConfiguration configuration = ConsumerConfiguration.builder().setPartitionPredicate(predicate).build();
            PartitionConsumer newPartitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), configuration);
            List<Partition> consumedPartitions = new ArrayList<>();
            // apply the filter (narrows it down to 6 elements) and apply a limit of 4 results in 4 consumed partitions
            Iterables.addAll(consumedPartitions, newPartitionConsumer.consumePartitions(4).getPartitions());
            Assert.assertEquals(4, consumedPartitions.size());
            // apply a limit of 3, using the same filter returns the remaining 2 elements that fit that filter
            Iterables.addAll(consumedPartitions, newPartitionConsumer.consumePartitions(3).getPartitions());
            Assert.assertEquals(6, consumedPartitions.size());
            // assert that the partitions returned have partition keys, where the i values range from [1, 7]
            Set<Integer> expectedIFields = new HashSet<>();
            for (int i = 1; i < 7; i++) {
                expectedIFields.add(i);
            }
            Set<Integer> actualIFields = new HashSet<>();
            for (Partition consumedPartition : consumedPartitions) {
                actualIFields.add((Integer) consumedPartition.getPartitionKey().getField("i"));
            }
            Assert.assertEquals(expectedIFields, actualIFields);
        }
    });
}
Also used : ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) ConsumablePartition(co.cask.cdap.api.dataset.lib.partitioned.ConsumablePartition) Partition(co.cask.cdap.api.dataset.lib.Partition) HashSet(java.util.HashSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) Set(java.util.Set) ConsumerWorkingSet(co.cask.cdap.api.dataset.lib.partitioned.ConsumerWorkingSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) Predicate(co.cask.cdap.api.Predicate) PartitionFilter(co.cask.cdap.api.dataset.lib.PartitionFilter) TransactionAware(org.apache.tephra.TransactionAware) ConsumerConfiguration(co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 75 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class PartitionConsumerTest method testPartitionConsumingWithPartitionAcceptor.

@Test
public void testPartitionConsumingWithPartitionAcceptor() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final TransactionAware txAwareDataset = (TransactionAware) dataset;
    // i will range from [0,10), s will always be 'partitionKeys1'
    final Set<PartitionKey> partitionKeys1 = new HashSet<>();
    for (int i = 0; i < 10; i++) {
        PartitionKey key = PartitionKey.builder().addIntField("i", i).addLongField("l", 17L).addStringField("s", "partitionKeys1").build();
        partitionKeys1.add(key);
    }
    // i will range from [0,15), s will always be 'partitionKeys2'
    final Set<PartitionKey> partitionKeys2 = new HashSet<>();
    for (int i = 0; i < 15; i++) {
        PartitionKey key = PartitionKey.builder().addIntField("i", i).addLongField("l", 17L).addStringField("s", "partitionKeys2").build();
        partitionKeys2.add(key);
    }
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            for (final PartitionKey partitionKey : partitionKeys1) {
                dataset.getPartitionOutput(partitionKey).addPartition();
            }
            for (final PartitionKey partitionKey : partitionKeys2) {
                dataset.getPartitionOutput(partitionKey).addPartition();
            }
        }
    });
    final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor());
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<Partition> consumedPartitions = new ArrayList<>();
            // specify a PartitionAcceptor that only limits to partitions where 's' field is equal to 'partitionKeys1'
            // so it will get all the partitions in partitionKeys1
            Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(new CustomAcceptor("partitionKeys1")).getPartitions());
            // assert that we consumed all the partitions represented by partitionsKeys1
            Assert.assertEquals(partitionKeys1, toKeys(consumedPartitions));
            consumedPartitions.clear();
            // ask for partitions where 's' field is equal to 'partitionKeys2', but stop iterating upon 'i' field == 8
            Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(new CustomAcceptor("partitionKeys2", 8)).getPartitions());
            // this will give us 8 of partitionKeys2
            Assert.assertEquals(8, consumedPartitions.size());
            // ask for the remainder of the partitions - i ranging from [8,15). Then, we will have all of 'partitionKeys2'
            Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions().getPartitions());
            Assert.assertEquals(partitionKeys2, toKeys(consumedPartitions));
        }
    });
}
Also used : ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

TransactionAware (org.apache.tephra.TransactionAware)91 Test (org.junit.Test)64 TransactionExecutor (org.apache.tephra.TransactionExecutor)47 Table (co.cask.cdap.api.dataset.table.Table)37 Transaction (org.apache.tephra.Transaction)30 IOException (java.io.IOException)27 DatasetAdmin (co.cask.cdap.api.dataset.DatasetAdmin)22 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)22 HBaseTable (co.cask.cdap.data2.dataset2.lib.table.hbase.HBaseTable)21 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)17 DataSetException (co.cask.cdap.api.dataset.DataSetException)16 TransactionFailureException (org.apache.tephra.TransactionFailureException)15 List (java.util.List)13 ArrayList (java.util.ArrayList)11 HashSet (java.util.HashSet)11 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)10 PartitionNotFoundException (co.cask.cdap.api.dataset.PartitionNotFoundException)10 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)10 ConcurrentPartitionConsumer (co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer)10 PartitionConsumer (co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer)10