use of co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration in project cdap by caskdata.
the class PartitionConsumerTest method testNumRetries.
@Test
public void testNumRetries() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
final int numRetries = 1;
ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxRetries(numRetries).build();
final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), configuration);
final PartitionKey partitionKey = generateUniqueKey();
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.getPartitionOutput(partitionKey).addPartition();
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// from the working set
for (int i = 0; i < numRetries + 1; i++) {
List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions();
Assert.assertEquals(1, partitionDetails.size());
Assert.assertEquals(partitionKey, partitionDetails.get(0).getPartitionKey());
// aborting the processing of the partition
partitionConsumer.onFinish(partitionDetails, false);
}
// after the 2nd abort, the partition is discarded entirely, and so no partitions are available for consuming
PartitionConsumerResult result = partitionConsumer.consumePartitions(1);
Assert.assertEquals(0, result.getPartitions().size());
Assert.assertEquals(1, result.getFailedPartitions().size());
Assert.assertEquals(partitionKey, result.getFailedPartitions().get(0).getPartitionKey());
}
});
}
use of co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration in project cdap by caskdata.
the class PartitionConsumerTest method testDroppedPartitions.
@Test
public void testDroppedPartitions() throws Exception {
// Tests the case of a partition in the partition consumer working set being dropped from the Partitioned
// FileSet (See CDAP-6215)
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxWorkingSetSize(1).setMaxRetries(2).build();
final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), configuration);
final PartitionKey partitionKey1 = generateUniqueKey();
final PartitionKey partitionKey2 = generateUniqueKey();
// Note: These two partitions are added in separate transactions, so that the first can exist in the working set
// without the second. Partitions in the same transaction can not be split up (due to their index being the same)
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.getPartitionOutput(partitionKey1).addPartition();
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.getPartitionOutput(partitionKey2).addPartition();
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// consuming and aborting the partition numRetries times plus one (for the first attempt) makes it get removed
// from the working set
List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions();
Assert.assertEquals(1, partitionDetails.size());
Assert.assertEquals(partitionKey1, partitionDetails.get(0).getPartitionKey());
// aborting the processing of the partition, to put it back in the working set
partitionConsumer.onFinish(partitionDetails, false);
}
});
// dropping partitionKey1 from the dataset makes it no longer available for consuming
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.dropPartition(partitionKey1);
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// first call to consume will drop the partition from the working set, and return nothing, since it was
// the only partition in the working set
PartitionConsumerResult result = partitionConsumer.consumePartitions(1);
Assert.assertEquals(0, result.getPartitions().size());
Assert.assertEquals(0, result.getFailedPartitions().size());
// following calls to consumePartitions will repopulate the working set and return additional partition(s)
result = partitionConsumer.consumePartitions(1);
Assert.assertEquals(1, result.getPartitions().size());
Assert.assertEquals(partitionKey2, result.getPartitions().get(0).getPartitionKey());
}
});
}
use of co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration in project cdap by caskdata.
the class PartitionConsumerTest method testCustomOperations.
@Test
public void testCustomOperations() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxRetries(3).build();
final PartitionConsumer partitionConsumer = new CustomConsumer(dataset, new InMemoryStatePersistor(), configuration);
final int numPartitions = 3;
final List<PartitionKey> partitionKeys = new ArrayList<>(numPartitions);
for (int i = 0; i < numPartitions; i++) {
partitionKeys.add(generateUniqueKey());
}
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
for (PartitionKey partitionKey : partitionKeys) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
List<PartitionDetail> partitions = partitionConsumer.consumePartitions().getPartitions();
Assert.assertEquals(numPartitions, partitions.size());
partitionConsumer.onFinish(partitions, false);
partitions = partitionConsumer.consumePartitions().getPartitions();
Assert.assertEquals(numPartitions, partitions.size());
partitionConsumer.onFinish(partitions, false);
// after two failure attempts, the partitions are now returned individually
partitions = partitionConsumer.consumePartitions().getPartitions();
Assert.assertEquals(1, partitions.size());
partitionConsumer.onFinish(partitions, true);
partitions = partitionConsumer.consumePartitions().getPartitions();
Assert.assertEquals(1, partitions.size());
partitionConsumer.onFinish(partitions, true);
partitions = partitionConsumer.consumePartitions().getPartitions();
Assert.assertEquals(1, partitions.size());
partitionConsumer.onFinish(partitions, true);
}
});
}
use of co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration in project cdap by caskdata.
the class PartitionConsumerTest method testSimpleConcurrency.
@Test
public void testSimpleConcurrency() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
final Set<PartitionKey> partitionKeys = new HashSet<>();
for (int i = 0; i < 10; i++) {
partitionKeys.add(generateUniqueKey());
}
// have ConcurrentPartitionConsumers that share the same state.
InMemoryStatePersistor persistor = new InMemoryStatePersistor();
ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxRetries(3).build();
final PartitionConsumer partitionConsumer1 = new ConcurrentPartitionConsumer(dataset, persistor, configuration);
final PartitionConsumer partitionConsumer2 = new ConcurrentPartitionConsumer(dataset, persistor, configuration);
final PartitionConsumer partitionConsumer3 = new ConcurrentPartitionConsumer(dataset, persistor, configuration);
// add all ten keys to the partitioned fileset
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
for (final PartitionKey partitionKey : partitionKeys) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// with limit = 1, the returned iterator is only size 1, even though there are more unconsumed partitions
List<PartitionDetail> consumedBy1 = partitionConsumer1.consumePartitions(1).getPartitions();
Assert.assertEquals(1, consumedBy1.size());
// partitionConsumer2 asks for 10 partitions, but 1 is currently in progress by partitionConsumer1, so it only
// gets the remaining 9 partitions
List<PartitionDetail> consumedBy2 = partitionConsumer2.consumePartitions(10).getPartitions();
Assert.assertEquals(9, consumedBy2.size());
// partitionConsumer3 tries to consume partitions, but all are marked in-progress by partitionConsumer 1 and 2
Assert.assertEquals(0, partitionConsumer3.consumePartitions().getPartitions().size());
// partitionConsumer1 aborts its partition, so it then becomes available for partitionConsumer3
partitionConsumer1.onFinish(consumedBy1, false);
consumedBy1.clear();
// queries with limit=2, but only the 1 is available that partitionConsumer1 released
List<PartitionDetail> consumedBy3 = partitionConsumer3.consumePartitions(2).getPartitions();
Assert.assertEquals(1, consumedBy3.size());
// partitionConsumers 2 and 3 marks that it successfully processed the partitions
partitionConsumer3.onFinish(consumedBy3, true);
// test onFinishWithKeys API
List<PartitionKey> keysConsumedBy2 = Lists.transform(consumedBy2, new Function<PartitionDetail, PartitionKey>() {
@Override
public PartitionKey apply(PartitionDetail input) {
return input.getPartitionKey();
}
});
partitionConsumer2.onFinishWithKeys(keysConsumedBy2, true);
// at this point, all partitions are processed, so no additional partitions are available for consumption
Assert.assertEquals(0, partitionConsumer3.consumePartitions().getPartitions().size());
List<PartitionDetail> allProcessedPartitions = new ArrayList<>();
allProcessedPartitions.addAll(consumedBy1);
allProcessedPartitions.addAll(consumedBy2);
allProcessedPartitions.addAll(consumedBy3);
// ordering may be different, since all the partitions were added in the same transaction
Assert.assertEquals(partitionKeys, toKeys(allProcessedPartitions));
}
});
}
use of co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration in project cdap by caskdata.
the class PartitionConsumerTest method testOnFinishWithInvalidPartition.
@Test
public void testOnFinishWithInvalidPartition() throws Exception {
// tests:
// - attempts to abort a Partition that is not IN_PROGRESS
// - attempts to commit a Partition that is already committed
// both of these throw IllegalArgumentException
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxRetries(3).build();
final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), configuration);
final PartitionKey partitionKey = generateUniqueKey();
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.getPartitionOutput(partitionKey).addPartition();
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions();
Assert.assertEquals(1, partitionDetails.size());
// aborting the processing of the partition
partitionConsumer.onFinish(partitionDetails, false);
// abort were not found to have IN_PROGRESS state
try {
partitionConsumer.onFinish(partitionDetails, false);
Assert.fail("Expected not to be able to abort a partition that is not IN_PROGRESS");
} catch (IllegalStateException expected) {
}
// try to process the partition again, this time marking it as complete (by passing in true)
partitionDetails = partitionConsumer.consumePartitions(1).getPartitions();
Assert.assertEquals(1, partitionDetails.size());
partitionConsumer.onFinish(partitionDetails, true);
// is not found to have an IN_PROGRESS state
try {
partitionConsumer.onFinish(partitionDetails, true);
Assert.fail("Expected not to be able to call onFinish on a partition is not IN_PROGRESS");
} catch (IllegalArgumentException expected) {
}
}
});
}
Aggregations