use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class PartitionedFileSetArgumentsTest method testGetInputPartitionKeys.
@Test
public void testGetInputPartitionKeys() throws Exception {
Map<String, String> arguments = new HashMap<>();
Assert.assertEquals(0, PartitionedFileSetArguments.getInputPartitionKeys(arguments).size());
List<? extends Partition> partitions = Lists.newArrayList(new BasicPartition(null, "path/doesn't/matter/1", generateUniqueKey()), new BasicPartition(null, "path/doesn't/matter/2", generateUniqueKey()), new BasicPartition(null, "path/doesn't/matter/3", generateUniqueKey()));
for (Partition partition : partitions) {
PartitionedFileSetArguments.addInputPartition(arguments, partition);
}
List<PartitionKey> inputPartitionKeys = Lists.transform(partitions, new Function<Partition, PartitionKey>() {
@Nullable
@Override
public PartitionKey apply(Partition input) {
return input.getPartitionKey();
}
});
Assert.assertEquals(inputPartitionKeys, PartitionedFileSetArguments.getInputPartitionKeys(arguments));
arguments.clear();
PartitionedFileSetArguments.addInputPartitions(arguments, partitions.iterator());
Assert.assertEquals(inputPartitionKeys, PartitionedFileSetArguments.getInputPartitionKeys(arguments));
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class PartitionedFileSetArgumentsTest method testSetGetOutputPartitionKey.
@Test
public void testSetGetOutputPartitionKey() throws Exception {
Map<String, String> arguments = new HashMap<>();
PartitionKey key = PartitionKey.builder().addIntField("i", 42).addLongField("l", 17L).addStringField("s", "x").build();
PartitionedFileSetArguments.setOutputPartitionKey(arguments, key);
Assert.assertEquals(key, PartitionedFileSetArguments.getOutputPartitionKey(arguments, PARTITIONING));
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class PartitionedFileSetTest method testSimplePartitionConsuming.
@Test
public void testSimplePartitionConsuming() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
final Set<PartitionKey> partitionKeys1 = Sets.newHashSet();
for (int i = 0; i < 10; i++) {
partitionKeys1.add(generateUniqueKey());
}
final Set<PartitionKey> partitionKeys2 = Sets.newHashSet();
for (int i = 0; i < 15; i++) {
partitionKeys2.add(generateUniqueKey());
}
final SimplePartitionConsumer partitionConsumer = new SimplePartitionConsumer(dataset);
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
for (PartitionKey partitionKey : partitionKeys1) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// Initial consumption results in the partitions corresponding to partitionKeys1 to be consumed because only
// those partitions are added to the dataset at this point
List<Partition> consumedPartitions = Lists.newArrayList();
Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions());
Set<PartitionKey> retrievedKeys = Sets.newHashSet();
for (Partition consumedPartition : consumedPartitions) {
retrievedKeys.add(consumedPartition.getPartitionKey());
}
Assert.assertEquals(partitionKeys1, retrievedKeys);
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
for (PartitionKey partitionKey : partitionKeys2) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// using the same PartitionConsumer (which remembers the PartitionConsumerState) to consume additional
// partitions results in only the newly added partitions (corresponding to partitionKeys2) to be returned
List<Partition> consumedPartitions = Lists.newArrayList();
Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions());
Set<PartitionKey> retrievedKeys = Sets.newHashSet();
for (Partition consumedPartition : consumedPartitions) {
retrievedKeys.add(consumedPartition.getPartitionKey());
}
Assert.assertEquals(partitionKeys2, retrievedKeys);
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// consuming the partitions again, without adding any new partitions returns an empty iterator
Assert.assertTrue(partitionConsumer.consumePartitions().isEmpty());
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// creating a new PartitionConsumer resets the consumption state. Consuming from it then returns an iterator
// with all the partition keys
List<Partition> consumedPartitions = Lists.newArrayList();
Iterables.addAll(consumedPartitions, new SimplePartitionConsumer(dataset).consumePartitions());
Set<PartitionKey> retrievedKeys = Sets.newHashSet();
for (Partition consumedPartition : consumedPartitions) {
retrievedKeys.add(consumedPartition.getPartitionKey());
}
Set<PartitionKey> allKeys = Sets.newHashSet();
allKeys.addAll(partitionKeys1);
allKeys.addAll(partitionKeys2);
Assert.assertEquals(allKeys, retrievedKeys);
}
});
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class PartitionConsumerTest method testOnFinishWithInvalidPartition.
@Test
public void testOnFinishWithInvalidPartition() throws Exception {
// tests:
// - attempts to abort a Partition that is not IN_PROGRESS
// - attempts to commit a Partition that is already committed
// both of these throw IllegalArgumentException
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxRetries(3).build();
final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), configuration);
final PartitionKey partitionKey = generateUniqueKey();
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.getPartitionOutput(partitionKey).addPartition();
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions();
Assert.assertEquals(1, partitionDetails.size());
// aborting the processing of the partition
partitionConsumer.onFinish(partitionDetails, false);
// abort were not found to have IN_PROGRESS state
try {
partitionConsumer.onFinish(partitionDetails, false);
Assert.fail("Expected not to be able to abort a partition that is not IN_PROGRESS");
} catch (IllegalStateException expected) {
}
// try to process the partition again, this time marking it as complete (by passing in true)
partitionDetails = partitionConsumer.consumePartitions(1).getPartitions();
Assert.assertEquals(1, partitionDetails.size());
partitionConsumer.onFinish(partitionDetails, true);
// is not found to have an IN_PROGRESS state
try {
partitionConsumer.onFinish(partitionDetails, true);
Assert.fail("Expected not to be able to call onFinish on a partition is not IN_PROGRESS");
} catch (IllegalArgumentException expected) {
}
}
});
}
use of co.cask.cdap.api.dataset.lib.PartitionKey in project cdap by caskdata.
the class PartitionConsumerTest method testPartitionConsumer.
@Test
public void testPartitionConsumer() throws Exception {
// exercises the edge case of partition consumption, when partitions are being consumed, while another in-progress
// transaction has added a partition, but it has not yet committed, so the partition is not available for the
// consumer
PartitionedFileSet dataset1 = dsFrameworkUtil.getInstance(pfsInstance);
PartitionedFileSet dataset2 = dsFrameworkUtil.getInstance(pfsInstance);
TransactionManager txManager = dsFrameworkUtil.getTxManager();
InMemoryTxSystemClient txClient = new InMemoryTxSystemClient(txManager);
// producer simply adds initial partition
TransactionContext txContext1 = new TransactionContext(txClient, (TransactionAware) dataset1);
txContext1.start();
PartitionKey partitionKey1 = generateUniqueKey();
dataset1.getPartitionOutput(partitionKey1).addPartition();
txContext1.finish();
// consumer simply consumes initial partition
TransactionContext txContext2 = new TransactionContext(txClient, (TransactionAware) dataset2);
txContext2.start();
PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset2, new InMemoryStatePersistor());
List<? extends PartitionDetail> partitionIterator = partitionConsumer.consumePartitions().getPartitions();
Assert.assertEquals(1, partitionIterator.size());
Assert.assertEquals(partitionKey1, partitionIterator.get(0).getPartitionKey());
txContext2.finish();
// producer adds a second partition, but does not yet commit the transaction
txContext1.start();
PartitionKey partitionKey2 = generateUniqueKey();
dataset1.getPartitionOutput(partitionKey2).addPartition();
// consumer attempts to consume at a time after the partition was added, but before it committed. Because of this,
// the partition is not visible and will not be consumed
txContext2.start();
Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty());
txContext2.finish();
// producer commits the transaction in which the second partition was added
txContext1.finish();
// the next time the consumer runs, it processes the second partition
txContext2.start();
partitionIterator = partitionConsumer.consumePartitions().getPartitions();
Assert.assertEquals(1, partitionIterator.size());
Assert.assertEquals(partitionKey2, partitionIterator.get(0).getPartitionKey());
txContext2.finish();
}
Aggregations