Search in sources :

Example 6 with ConsumerConfiguration

use of co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration in project cdap by caskdata.

the class PartitionConsumerTest method testOnFinishWithInvalidPartition.

@Test
public void testOnFinishWithInvalidPartition() throws Exception {
    // tests:
    //     - attempts to abort a Partition that is not IN_PROGRESS
    //     - attempts to commit a Partition that is already committed
    // both of these throw IllegalArgumentException
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final TransactionAware txAwareDataset = (TransactionAware) dataset;
    ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxRetries(3).build();
    final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), configuration);
    final PartitionKey partitionKey = generateUniqueKey();
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            dataset.getPartitionOutput(partitionKey).addPartition();
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions();
            Assert.assertEquals(1, partitionDetails.size());
            // aborting the processing of the partition
            partitionConsumer.onFinish(partitionDetails, false);
            // abort were not found to have IN_PROGRESS state
            try {
                partitionConsumer.onFinish(partitionDetails, false);
                Assert.fail("Expected not to be able to abort a partition that is not IN_PROGRESS");
            } catch (IllegalStateException expected) {
            }
            // try to process the partition again, this time marking it as complete (by passing in true)
            partitionDetails = partitionConsumer.consumePartitions(1).getPartitions();
            Assert.assertEquals(1, partitionDetails.size());
            partitionConsumer.onFinish(partitionDetails, true);
            // is not found to have an IN_PROGRESS state
            try {
                partitionConsumer.onFinish(partitionDetails, true);
                Assert.fail("Expected not to be able to call onFinish on a partition is not IN_PROGRESS");
            } catch (IllegalArgumentException expected) {
            }
        }
    });
}
Also used : ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) TransactionAware(org.apache.tephra.TransactionAware) ConsumerConfiguration(co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer) Test(org.junit.Test)

Example 7 with ConsumerConfiguration

use of co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration in project cdap by caskdata.

the class PartitionConsumerTest method testSimpleConcurrency.

@Test
public void testSimpleConcurrency() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final TransactionAware txAwareDataset = (TransactionAware) dataset;
    final Set<PartitionKey> partitionKeys = new HashSet<>();
    for (int i = 0; i < 10; i++) {
        partitionKeys.add(generateUniqueKey());
    }
    // have ConcurrentPartitionConsumers that share the same state.
    InMemoryStatePersistor persistor = new InMemoryStatePersistor();
    ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxRetries(3).build();
    final PartitionConsumer partitionConsumer1 = new ConcurrentPartitionConsumer(dataset, persistor, configuration);
    final PartitionConsumer partitionConsumer2 = new ConcurrentPartitionConsumer(dataset, persistor, configuration);
    final PartitionConsumer partitionConsumer3 = new ConcurrentPartitionConsumer(dataset, persistor, configuration);
    // add all ten keys to the partitioned fileset
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            for (final PartitionKey partitionKey : partitionKeys) {
                dataset.getPartitionOutput(partitionKey).addPartition();
            }
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // with limit = 1, the returned iterator is only size 1, even though there are more unconsumed partitions
            List<PartitionDetail> consumedBy1 = partitionConsumer1.consumePartitions(1).getPartitions();
            Assert.assertEquals(1, consumedBy1.size());
            // partitionConsumer2 asks for 10 partitions, but 1 is currently in progress by partitionConsumer1, so it only
            // gets the remaining 9 partitions
            List<PartitionDetail> consumedBy2 = partitionConsumer2.consumePartitions(10).getPartitions();
            Assert.assertEquals(9, consumedBy2.size());
            // partitionConsumer3 tries to consume partitions, but all are marked in-progress by partitionConsumer 1 and 2
            Assert.assertEquals(0, partitionConsumer3.consumePartitions().getPartitions().size());
            // partitionConsumer1 aborts its partition, so it then becomes available for partitionConsumer3
            partitionConsumer1.onFinish(consumedBy1, false);
            consumedBy1.clear();
            // queries with limit=2, but only the 1 is available that partitionConsumer1 released
            List<PartitionDetail> consumedBy3 = partitionConsumer3.consumePartitions(2).getPartitions();
            Assert.assertEquals(1, consumedBy3.size());
            // partitionConsumers 2 and 3 marks that it successfully processed the partitions
            partitionConsumer3.onFinish(consumedBy3, true);
            // test onFinishWithKeys API
            List<PartitionKey> keysConsumedBy2 = Lists.transform(consumedBy2, new Function<PartitionDetail, PartitionKey>() {

                @Override
                public PartitionKey apply(PartitionDetail input) {
                    return input.getPartitionKey();
                }
            });
            partitionConsumer2.onFinishWithKeys(keysConsumedBy2, true);
            // at this point, all partitions are processed, so no additional partitions are available for consumption
            Assert.assertEquals(0, partitionConsumer3.consumePartitions().getPartitions().size());
            List<PartitionDetail> allProcessedPartitions = new ArrayList<>();
            allProcessedPartitions.addAll(consumedBy1);
            allProcessedPartitions.addAll(consumedBy2);
            allProcessedPartitions.addAll(consumedBy3);
            // ordering may be different, since all the partitions were added in the same transaction
            Assert.assertEquals(partitionKeys, toKeys(allProcessedPartitions));
        }
    });
}
Also used : ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) Function(com.google.common.base.Function) TransactionAware(org.apache.tephra.TransactionAware) ConsumerConfiguration(co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)7 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)7 ConcurrentPartitionConsumer (co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer)7 ConsumerConfiguration (co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration)7 PartitionConsumer (co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer)7 TransactionAware (org.apache.tephra.TransactionAware)7 TransactionExecutor (org.apache.tephra.TransactionExecutor)7 Test (org.junit.Test)7 ImmutableList (com.google.common.collect.ImmutableList)6 ArrayList (java.util.ArrayList)6 List (java.util.List)6 PartitionDetail (co.cask.cdap.api.dataset.lib.PartitionDetail)3 HashSet (java.util.HashSet)3 PartitionConsumerResult (co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumerResult)2 Predicate (co.cask.cdap.api.Predicate)1 Partition (co.cask.cdap.api.dataset.lib.Partition)1 PartitionFilter (co.cask.cdap.api.dataset.lib.PartitionFilter)1 ConsumablePartition (co.cask.cdap.api.dataset.lib.partitioned.ConsumablePartition)1 ConsumerWorkingSet (co.cask.cdap.api.dataset.lib.partitioned.ConsumerWorkingSet)1 Function (com.google.common.base.Function)1