use of io.cdap.cdap.api.dataset.lib.PartitionedFileSet in project cdap by cdapio.
the class PartitionConsumerTest method testDroppedPartitions.
@Test
public void testDroppedPartitions() throws Exception {
// Tests the case of a partition in the partition consumer working set being dropped from the Partitioned
// FileSet (See CDAP-6215)
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxWorkingSetSize(1).setMaxRetries(2).build();
final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), configuration);
final PartitionKey partitionKey1 = generateUniqueKey();
final PartitionKey partitionKey2 = generateUniqueKey();
// Note: These two partitions are added in separate transactions, so that the first can exist in the working set
// without the second. Partitions in the same transaction can not be split up (due to their index being the same)
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.getPartitionOutput(partitionKey1).addPartition();
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.getPartitionOutput(partitionKey2).addPartition();
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// consuming and aborting the partition numRetries times plus one (for the first attempt) makes it get removed
// from the working set
List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions();
Assert.assertEquals(1, partitionDetails.size());
Assert.assertEquals(partitionKey1, partitionDetails.get(0).getPartitionKey());
// aborting the processing of the partition, to put it back in the working set
partitionConsumer.onFinish(partitionDetails, false);
}
});
// dropping partitionKey1 from the dataset makes it no longer available for consuming
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.dropPartition(partitionKey1);
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// first call to consume will drop the partition from the working set, and return nothing, since it was
// the only partition in the working set
PartitionConsumerResult result = partitionConsumer.consumePartitions(1);
Assert.assertEquals(0, result.getPartitions().size());
Assert.assertEquals(0, result.getFailedPartitions().size());
// following calls to consumePartitions will repopulate the working set and return additional partition(s)
result = partitionConsumer.consumePartitions(1);
Assert.assertEquals(1, result.getPartitions().size());
Assert.assertEquals(partitionKey2, result.getPartitions().get(0).getPartitionKey());
}
});
}
use of io.cdap.cdap.api.dataset.lib.PartitionedFileSet in project cdap by cdapio.
the class PartitionConsumerTest method testSimplePartitionConsuming.
@Test
public void testSimplePartitionConsuming() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
final Set<PartitionKey> partitionKeys1 = new HashSet<>();
for (int i = 0; i < 10; i++) {
partitionKeys1.add(generateUniqueKey());
}
final Set<PartitionKey> partitionKeys2 = new HashSet<>();
for (int i = 0; i < 15; i++) {
partitionKeys2.add(generateUniqueKey());
}
final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor());
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
for (PartitionKey partitionKey : partitionKeys1) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// Initial consumption results in the partitions corresponding to partitionKeys1 to be consumed because only
// those partitions are added to the dataset at this point
List<? extends Partition> consumedPartitions = partitionConsumer.consumePartitions().getPartitions();
Assert.assertEquals(partitionKeys1, toKeys(consumedPartitions));
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
for (PartitionKey partitionKey : partitionKeys2) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// using the same PartitionConsumer (which remembers the PartitionConsumerState) to consume additional
// partitions results in only the newly added partitions (corresponding to partitionKeys2) to be returned
Assert.assertEquals(partitionKeys2, toKeys(partitionConsumer.consumePartitions().getPartitions()));
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// consuming the partitions again, without adding any new partitions returns an empty iterator
Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty());
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// creating a new PartitionConsumer resets the consumption state. Consuming from it then returns an iterator
// with all the partition keys
List<? extends Partition> consumedPartitions = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor()).consumePartitions().getPartitions();
Set<PartitionKey> allKeys = new HashSet<>();
allKeys.addAll(partitionKeys1);
allKeys.addAll(partitionKeys2);
Assert.assertEquals(allKeys, toKeys(consumedPartitions));
}
});
}
use of io.cdap.cdap.api.dataset.lib.PartitionedFileSet in project cdap by cdapio.
the class PartitionConsumerTest method testPartitionPutback.
@Test
public void testPartitionPutback() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
final Set<PartitionKey> partitionKeys = new HashSet<>();
for (int i = 0; i < 10; i++) {
partitionKeys.add(generateUniqueKey());
}
final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), ConsumerConfiguration.builder().setMaxRetries(1).build());
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
for (PartitionKey partitionKey : partitionKeys) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// consume all the partitions
List<? extends Partition> consumedPartitions = partitionConsumer.consumePartitions().getPartitions();
Assert.assertEquals(partitionKeys, toKeys(consumedPartitions));
// consuming the partitions again, without adding any new partitions returns an empty iterator
Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty());
// and testing that they are still available for processing, and that there are no failed partitions
for (int i = 0; i < 5; i++) {
partitionConsumer.untake(consumedPartitions);
PartitionConsumerResult result = partitionConsumer.consumePartitions();
consumedPartitions = result.getPartitions();
Assert.assertEquals(partitionKeys, toKeys(consumedPartitions));
Assert.assertEquals(0, result.getFailedPartitions().size());
}
// consuming the partitions again, without adding any new partitions returns an empty iterator
Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty());
// test functionality to put back a partial subset of the retrieved the partitions
Partition firstConsumedPartition = consumedPartitions.get(0);
// test the untakeWithKeys method
partitionConsumer.untakeWithKeys(ImmutableList.of(firstConsumedPartition.getPartitionKey()));
consumedPartitions = partitionConsumer.consumePartitions().getPartitions();
Assert.assertEquals(1, consumedPartitions.size());
Assert.assertEquals(firstConsumedPartition, consumedPartitions.get(0));
}
});
}
use of io.cdap.cdap.api.dataset.lib.PartitionedFileSet in project cdap by cdapio.
the class PartitionConsumerTest method testPartitionConsumer.
@Test
public void testPartitionConsumer() throws Exception {
// exercises the edge case of partition consumption, when partitions are being consumed, while another in-progress
// transaction has added a partition, but it has not yet committed, so the partition is not available for the
// consumer
PartitionedFileSet dataset1 = dsFrameworkUtil.getInstance(pfsInstance);
PartitionedFileSet dataset2 = dsFrameworkUtil.getInstance(pfsInstance);
TransactionManager txManager = dsFrameworkUtil.getTxManager();
InMemoryTxSystemClient txClient = new InMemoryTxSystemClient(txManager);
// producer simply adds initial partition
TransactionContext txContext1 = new TransactionContext(txClient, (TransactionAware) dataset1);
txContext1.start();
PartitionKey partitionKey1 = generateUniqueKey();
dataset1.getPartitionOutput(partitionKey1).addPartition();
txContext1.finish();
// consumer simply consumes initial partition
TransactionContext txContext2 = new TransactionContext(txClient, (TransactionAware) dataset2);
txContext2.start();
PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset2, new InMemoryStatePersistor());
List<? extends PartitionDetail> partitionIterator = partitionConsumer.consumePartitions().getPartitions();
Assert.assertEquals(1, partitionIterator.size());
Assert.assertEquals(partitionKey1, partitionIterator.get(0).getPartitionKey());
txContext2.finish();
// producer adds a second partition, but does not yet commit the transaction
txContext1.start();
PartitionKey partitionKey2 = generateUniqueKey();
dataset1.getPartitionOutput(partitionKey2).addPartition();
// consumer attempts to consume at a time after the partition was added, but before it committed. Because of this,
// the partition is not visible and will not be consumed
txContext2.start();
Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty());
txContext2.finish();
// producer commits the transaction in which the second partition was added
txContext1.finish();
// the next time the consumer runs, it processes the second partition
txContext2.start();
partitionIterator = partitionConsumer.consumePartitions().getPartitions();
Assert.assertEquals(1, partitionIterator.size());
Assert.assertEquals(partitionKey2, partitionIterator.get(0).getPartitionKey());
txContext2.finish();
}
use of io.cdap.cdap.api.dataset.lib.PartitionedFileSet in project cdap by cdapio.
the class PartitionConsumerTest method testConsumeAfterDelete.
@Test
public void testConsumeAfterDelete() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
final Set<PartitionKey> partitionKeys1 = new HashSet<>();
for (int i = 0; i < 3; i++) {
partitionKeys1.add(generateUniqueKey());
}
// need to ensure that our consumerConfiguration is larger than the amount we consume initially, so that
// additional partitions (which will be deleted afterwards) are brought into the working set
ConsumerConfiguration consumerConfiguration = ConsumerConfiguration.builder().setMaxWorkingSetSize(100).build();
final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), consumerConfiguration);
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
for (PartitionKey partitionKey : partitionKeys1) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// and not consumed
for (int i = 0; i < 2; i++) {
dataset.getPartitionOutput(generateUniqueKey()).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// consume 3 of the 5 initial partitions
Assert.assertEquals(partitionKeys1, toKeys(partitionConsumer.consumePartitions(3).getPartitions()));
}
});
final Set<PartitionKey> partitionKeys2 = new HashSet<>();
for (int i = 0; i < 5; i++) {
partitionKeys2.add(generateUniqueKey());
}
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// drop all existing partitions (2 of which are not consumed)
for (PartitionDetail partitionDetail : dataset.getPartitions(PartitionFilter.ALWAYS_MATCH)) {
dataset.dropPartition(partitionDetail.getPartitionKey());
}
// add 5 new ones
for (PartitionKey partitionKey : partitionKeys2) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// the consumed partition keys should correspond to partitionKeys2, and not include the dropped, but unconsumed
// partitions added before them
Assert.assertEquals(partitionKeys2, toKeys(partitionConsumer.consumePartitions().getPartitions()));
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// consuming the partitions again, without adding any new partitions returns an empty iterator
Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty());
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// creating a new PartitionConsumer resets the consumption state. Consuming from it then returns an iterator
// with all the partition keys added after the deletions
ConcurrentPartitionConsumer partitionConsumer2 = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor());
Assert.assertEquals(partitionKeys2, toKeys(partitionConsumer2.consumePartitions().getPartitions()));
}
});
}
Aggregations