Search in sources :

Example 1 with PartitionConsumer

use of co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer in project cdap by caskdata.

the class PartitionConsumerTest method testSimplePartitionConsuming.

@Test
public void testSimplePartitionConsuming() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final TransactionAware txAwareDataset = (TransactionAware) dataset;
    final Set<PartitionKey> partitionKeys1 = new HashSet<>();
    for (int i = 0; i < 10; i++) {
        partitionKeys1.add(generateUniqueKey());
    }
    final Set<PartitionKey> partitionKeys2 = new HashSet<>();
    for (int i = 0; i < 15; i++) {
        partitionKeys2.add(generateUniqueKey());
    }
    final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor());
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            for (PartitionKey partitionKey : partitionKeys1) {
                dataset.getPartitionOutput(partitionKey).addPartition();
            }
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // Initial consumption results in the partitions corresponding to partitionKeys1 to be consumed because only
            // those partitions are added to the dataset at this point
            List<? extends Partition> consumedPartitions = partitionConsumer.consumePartitions().getPartitions();
            Assert.assertEquals(partitionKeys1, toKeys(consumedPartitions));
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            for (PartitionKey partitionKey : partitionKeys2) {
                dataset.getPartitionOutput(partitionKey).addPartition();
            }
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // using the same PartitionConsumer (which remembers the PartitionConsumerState) to consume additional
            // partitions results in only the newly added partitions (corresponding to partitionKeys2) to be returned
            Assert.assertEquals(partitionKeys2, toKeys(partitionConsumer.consumePartitions().getPartitions()));
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // consuming the partitions again, without adding any new partitions returns an empty iterator
            Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty());
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // creating a new PartitionConsumer resets the consumption state. Consuming from it then returns an iterator
            // with all the partition keys
            List<? extends Partition> consumedPartitions = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor()).consumePartitions().getPartitions();
            Set<PartitionKey> allKeys = new HashSet<>();
            allKeys.addAll(partitionKeys1);
            allKeys.addAll(partitionKeys2);
            Assert.assertEquals(allKeys, toKeys(consumedPartitions));
        }
    });
}
Also used : ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) ConsumablePartition(co.cask.cdap.api.dataset.lib.partitioned.ConsumablePartition) Partition(co.cask.cdap.api.dataset.lib.Partition) HashSet(java.util.HashSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) Set(java.util.Set) ConsumerWorkingSet(co.cask.cdap.api.dataset.lib.partitioned.ConsumerWorkingSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 2 with PartitionConsumer

use of co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer in project cdap by caskdata.

the class PartitionConsumerTest method testNumRetries.

@Test
public void testNumRetries() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final TransactionAware txAwareDataset = (TransactionAware) dataset;
    final int numRetries = 1;
    ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxRetries(numRetries).build();
    final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), configuration);
    final PartitionKey partitionKey = generateUniqueKey();
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            dataset.getPartitionOutput(partitionKey).addPartition();
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // from the working set
            for (int i = 0; i < numRetries + 1; i++) {
                List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions();
                Assert.assertEquals(1, partitionDetails.size());
                Assert.assertEquals(partitionKey, partitionDetails.get(0).getPartitionKey());
                // aborting the processing of the partition
                partitionConsumer.onFinish(partitionDetails, false);
            }
            // after the 2nd abort, the partition is discarded entirely, and so no partitions are available for consuming
            PartitionConsumerResult result = partitionConsumer.consumePartitions(1);
            Assert.assertEquals(0, result.getPartitions().size());
            Assert.assertEquals(1, result.getFailedPartitions().size());
            Assert.assertEquals(partitionKey, result.getFailedPartitions().get(0).getPartitionKey());
        }
    });
}
Also used : ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumerResult(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumerResult) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) TransactionAware(org.apache.tephra.TransactionAware) ConsumerConfiguration(co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer) Test(org.junit.Test)

Example 3 with PartitionConsumer

use of co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer in project cdap by caskdata.

the class PartitionConsumerTest method testPartitionPutback.

@Test
public void testPartitionPutback() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final TransactionAware txAwareDataset = (TransactionAware) dataset;
    final Set<PartitionKey> partitionKeys = new HashSet<>();
    for (int i = 0; i < 10; i++) {
        partitionKeys.add(generateUniqueKey());
    }
    final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), ConsumerConfiguration.builder().setMaxRetries(1).build());
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            for (PartitionKey partitionKey : partitionKeys) {
                dataset.getPartitionOutput(partitionKey).addPartition();
            }
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // consume all the partitions
            List<? extends Partition> consumedPartitions = partitionConsumer.consumePartitions().getPartitions();
            Assert.assertEquals(partitionKeys, toKeys(consumedPartitions));
            // consuming the partitions again, without adding any new partitions returns an empty iterator
            Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty());
            // and testing that they are still available for processing, and that there are no failed partitions
            for (int i = 0; i < 5; i++) {
                partitionConsumer.untake(consumedPartitions);
                PartitionConsumerResult result = partitionConsumer.consumePartitions();
                consumedPartitions = result.getPartitions();
                Assert.assertEquals(partitionKeys, toKeys(consumedPartitions));
                Assert.assertEquals(0, result.getFailedPartitions().size());
            }
            // consuming the partitions again, without adding any new partitions returns an empty iterator
            Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty());
            // test functionality to put back a partial subset of the retrieved the partitions
            Partition firstConsumedPartition = consumedPartitions.get(0);
            // test the untakeWithKeys method
            partitionConsumer.untakeWithKeys(ImmutableList.of(firstConsumedPartition.getPartitionKey()));
            consumedPartitions = partitionConsumer.consumePartitions().getPartitions();
            Assert.assertEquals(1, consumedPartitions.size());
            Assert.assertEquals(firstConsumedPartition, consumedPartitions.get(0));
        }
    });
}
Also used : ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) ConsumablePartition(co.cask.cdap.api.dataset.lib.partitioned.ConsumablePartition) Partition(co.cask.cdap.api.dataset.lib.Partition) PartitionConsumerResult(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumerResult) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with PartitionConsumer

use of co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer in project cdap by caskdata.

the class PartitionConsumerTest method testDroppedPartitions.

@Test
public void testDroppedPartitions() throws Exception {
    // Tests the case of a partition in the partition consumer working set being dropped from the Partitioned
    // FileSet (See CDAP-6215)
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final TransactionAware txAwareDataset = (TransactionAware) dataset;
    ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxWorkingSetSize(1).setMaxRetries(2).build();
    final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), configuration);
    final PartitionKey partitionKey1 = generateUniqueKey();
    final PartitionKey partitionKey2 = generateUniqueKey();
    // Note: These two partitions are added in separate transactions, so that the first can exist in the working set
    // without the second. Partitions in the same transaction can not be split up (due to their index being the same)
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            dataset.getPartitionOutput(partitionKey1).addPartition();
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            dataset.getPartitionOutput(partitionKey2).addPartition();
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // consuming and aborting the partition numRetries times plus one (for the first attempt) makes it get removed
            // from the working set
            List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions();
            Assert.assertEquals(1, partitionDetails.size());
            Assert.assertEquals(partitionKey1, partitionDetails.get(0).getPartitionKey());
            // aborting the processing of the partition, to put it back in the working set
            partitionConsumer.onFinish(partitionDetails, false);
        }
    });
    // dropping partitionKey1 from the dataset makes it no longer available for consuming
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            dataset.dropPartition(partitionKey1);
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // first call to consume will drop the partition from the working set, and return nothing, since it was
            // the only partition in the working set
            PartitionConsumerResult result = partitionConsumer.consumePartitions(1);
            Assert.assertEquals(0, result.getPartitions().size());
            Assert.assertEquals(0, result.getFailedPartitions().size());
            // following calls to consumePartitions will repopulate the working set and return additional partition(s)
            result = partitionConsumer.consumePartitions(1);
            Assert.assertEquals(1, result.getPartitions().size());
            Assert.assertEquals(partitionKey2, result.getPartitions().get(0).getPartitionKey());
        }
    });
}
Also used : ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumerResult(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumerResult) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) TransactionAware(org.apache.tephra.TransactionAware) ConsumerConfiguration(co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer) Test(org.junit.Test)

Example 5 with PartitionConsumer

use of co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer in project cdap by caskdata.

the class PartitionConsumerTest method testCustomOperations.

@Test
public void testCustomOperations() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final TransactionAware txAwareDataset = (TransactionAware) dataset;
    ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxRetries(3).build();
    final PartitionConsumer partitionConsumer = new CustomConsumer(dataset, new InMemoryStatePersistor(), configuration);
    final int numPartitions = 3;
    final List<PartitionKey> partitionKeys = new ArrayList<>(numPartitions);
    for (int i = 0; i < numPartitions; i++) {
        partitionKeys.add(generateUniqueKey());
    }
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            for (PartitionKey partitionKey : partitionKeys) {
                dataset.getPartitionOutput(partitionKey).addPartition();
            }
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<PartitionDetail> partitions = partitionConsumer.consumePartitions().getPartitions();
            Assert.assertEquals(numPartitions, partitions.size());
            partitionConsumer.onFinish(partitions, false);
            partitions = partitionConsumer.consumePartitions().getPartitions();
            Assert.assertEquals(numPartitions, partitions.size());
            partitionConsumer.onFinish(partitions, false);
            // after two failure attempts, the partitions are now returned individually
            partitions = partitionConsumer.consumePartitions().getPartitions();
            Assert.assertEquals(1, partitions.size());
            partitionConsumer.onFinish(partitions, true);
            partitions = partitionConsumer.consumePartitions().getPartitions();
            Assert.assertEquals(1, partitions.size());
            partitionConsumer.onFinish(partitions, true);
            partitions = partitionConsumer.consumePartitions().getPartitions();
            Assert.assertEquals(1, partitions.size());
            partitionConsumer.onFinish(partitions, true);
        }
    });
}
Also used : ArrayList(java.util.ArrayList) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) TransactionAware(org.apache.tephra.TransactionAware) ConsumerConfiguration(co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer) Test(org.junit.Test)

Aggregations

PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)11 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)11 ConcurrentPartitionConsumer (co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer)11 PartitionConsumer (co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer)11 Test (org.junit.Test)11 TransactionAware (org.apache.tephra.TransactionAware)10 TransactionExecutor (org.apache.tephra.TransactionExecutor)10 ImmutableList (com.google.common.collect.ImmutableList)9 ArrayList (java.util.ArrayList)9 List (java.util.List)9 ConsumerConfiguration (co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration)7 HashSet (java.util.HashSet)6 Partition (co.cask.cdap.api.dataset.lib.Partition)3 PartitionDetail (co.cask.cdap.api.dataset.lib.PartitionDetail)3 ConsumablePartition (co.cask.cdap.api.dataset.lib.partitioned.ConsumablePartition)3 PartitionConsumerResult (co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumerResult)3 ConsumerWorkingSet (co.cask.cdap.api.dataset.lib.partitioned.ConsumerWorkingSet)2 Set (java.util.Set)2 Predicate (co.cask.cdap.api.Predicate)1 PartitionFilter (co.cask.cdap.api.dataset.lib.PartitionFilter)1