use of io.cdap.cdap.api.dataset.lib.PartitionDetail in project cdap by cdapio.
the class PartitionConsumerTest method testSimpleConcurrency.
@Test
public void testSimpleConcurrency() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
final Set<PartitionKey> partitionKeys = new HashSet<>();
for (int i = 0; i < 10; i++) {
partitionKeys.add(generateUniqueKey());
}
// have ConcurrentPartitionConsumers that share the same state.
InMemoryStatePersistor persistor = new InMemoryStatePersistor();
ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxRetries(3).build();
final PartitionConsumer partitionConsumer1 = new ConcurrentPartitionConsumer(dataset, persistor, configuration);
final PartitionConsumer partitionConsumer2 = new ConcurrentPartitionConsumer(dataset, persistor, configuration);
final PartitionConsumer partitionConsumer3 = new ConcurrentPartitionConsumer(dataset, persistor, configuration);
// add all ten keys to the partitioned fileset
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
for (final PartitionKey partitionKey : partitionKeys) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// with limit = 1, the returned iterator is only size 1, even though there are more unconsumed partitions
List<PartitionDetail> consumedBy1 = partitionConsumer1.consumePartitions(1).getPartitions();
Assert.assertEquals(1, consumedBy1.size());
// partitionConsumer2 asks for 10 partitions, but 1 is currently in progress by partitionConsumer1, so it only
// gets the remaining 9 partitions
List<PartitionDetail> consumedBy2 = partitionConsumer2.consumePartitions(10).getPartitions();
Assert.assertEquals(9, consumedBy2.size());
// partitionConsumer3 tries to consume partitions, but all are marked in-progress by partitionConsumer 1 and 2
Assert.assertEquals(0, partitionConsumer3.consumePartitions().getPartitions().size());
// partitionConsumer1 aborts its partition, so it then becomes available for partitionConsumer3
partitionConsumer1.onFinish(consumedBy1, false);
consumedBy1.clear();
// queries with limit=2, but only the 1 is available that partitionConsumer1 released
List<PartitionDetail> consumedBy3 = partitionConsumer3.consumePartitions(2).getPartitions();
Assert.assertEquals(1, consumedBy3.size());
// partitionConsumers 2 and 3 marks that it successfully processed the partitions
partitionConsumer3.onFinish(consumedBy3, true);
// test onFinishWithKeys API
List<PartitionKey> keysConsumedBy2 = Lists.transform(consumedBy2, new Function<PartitionDetail, PartitionKey>() {
@Override
public PartitionKey apply(PartitionDetail input) {
return input.getPartitionKey();
}
});
partitionConsumer2.onFinishWithKeys(keysConsumedBy2, true);
// at this point, all partitions are processed, so no additional partitions are available for consumption
Assert.assertEquals(0, partitionConsumer3.consumePartitions().getPartitions().size());
List<PartitionDetail> allProcessedPartitions = new ArrayList<>();
allProcessedPartitions.addAll(consumedBy1);
allProcessedPartitions.addAll(consumedBy2);
allProcessedPartitions.addAll(consumedBy3);
// ordering may be different, since all the partitions were added in the same transaction
Assert.assertEquals(partitionKeys, toKeys(allProcessedPartitions));
}
});
}
use of io.cdap.cdap.api.dataset.lib.PartitionDetail in project cdap by cdapio.
the class PartitionConsumerTest method testConsumeAfterDelete.
@Test
public void testConsumeAfterDelete() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
final Set<PartitionKey> partitionKeys1 = new HashSet<>();
for (int i = 0; i < 3; i++) {
partitionKeys1.add(generateUniqueKey());
}
// need to ensure that our consumerConfiguration is larger than the amount we consume initially, so that
// additional partitions (which will be deleted afterwards) are brought into the working set
ConsumerConfiguration consumerConfiguration = ConsumerConfiguration.builder().setMaxWorkingSetSize(100).build();
final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), consumerConfiguration);
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
for (PartitionKey partitionKey : partitionKeys1) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// and not consumed
for (int i = 0; i < 2; i++) {
dataset.getPartitionOutput(generateUniqueKey()).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// consume 3 of the 5 initial partitions
Assert.assertEquals(partitionKeys1, toKeys(partitionConsumer.consumePartitions(3).getPartitions()));
}
});
final Set<PartitionKey> partitionKeys2 = new HashSet<>();
for (int i = 0; i < 5; i++) {
partitionKeys2.add(generateUniqueKey());
}
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// drop all existing partitions (2 of which are not consumed)
for (PartitionDetail partitionDetail : dataset.getPartitions(PartitionFilter.ALWAYS_MATCH)) {
dataset.dropPartition(partitionDetail.getPartitionKey());
}
// add 5 new ones
for (PartitionKey partitionKey : partitionKeys2) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// the consumed partition keys should correspond to partitionKeys2, and not include the dropped, but unconsumed
// partitions added before them
Assert.assertEquals(partitionKeys2, toKeys(partitionConsumer.consumePartitions().getPartitions()));
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// consuming the partitions again, without adding any new partitions returns an empty iterator
Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty());
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// creating a new PartitionConsumer resets the consumption state. Consuming from it then returns an iterator
// with all the partition keys added after the deletions
ConcurrentPartitionConsumer partitionConsumer2 = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor());
Assert.assertEquals(partitionKeys2, toKeys(partitionConsumer2.consumePartitions().getPartitions()));
}
});
}
use of io.cdap.cdap.api.dataset.lib.PartitionDetail in project hydrator-plugins by cdapio.
the class SnapshotFileSet method getInputArguments.
public Map<String, String> getInputArguments(Map<String, String> otherProperties) throws IOException, InterruptedException {
Location lock = lock();
try {
PartitionDetail partition = getLatestPartition();
if (partition == null) {
throw new IllegalArgumentException("Snapshot fileset does not have a latest snapshot, so cannot be read.");
}
Map<String, String> args = new HashMap<>();
args.putAll(otherProperties);
PartitionedFileSetArguments.addInputPartition(args, partition);
return args;
} finally {
lock.delete();
}
}
use of io.cdap.cdap.api.dataset.lib.PartitionDetail in project hydrator-plugins by cdapio.
the class SnapshotFileSet method deleteMatchingPartitionsByTime.
public void deleteMatchingPartitionsByTime(long upperLimit) throws IOException {
if (upperLimit > 0 && upperLimit < Long.MAX_VALUE) {
PartitionFilter filter = PartitionFilter.builder().addRangeCondition(SNAPSHOT_FIELD, null, upperLimit).build();
Set<PartitionDetail> partitions = files.getPartitions(filter);
for (PartitionDetail partition : partitions) {
files.dropPartition(partition.getPartitionKey());
}
}
}
use of io.cdap.cdap.api.dataset.lib.PartitionDetail in project cdap by caskdata.
the class DynamicPartitionerWithAvroTest method runDynamicPartitionerMR.
private void runDynamicPartitionerMR(final List<? extends GenericRecord> records, boolean allowConcurrentWriters, final boolean precreatePartitions, @Nullable final DynamicPartitioner.PartitionWriteOption partitionWriteOption, boolean expectedStatus) throws Exception {
ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingAvroDynamicPartitioner.class);
final long now = System.currentTimeMillis();
final Multimap<PartitionKey, GenericRecord> keyToRecordsMap = groupByPartitionKey(records, now);
// write values to the input kvTable
final KeyValueTable kvTable = datasetCache.getDataset(INPUT_DATASET);
Transactions.createTransactionExecutor(txExecutorFactory, kvTable).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
// the keys are not used; it matters that they're unique though
for (int i = 0; i < records.size(); i++) {
kvTable.write(Integer.toString(i), records.get(i).toString());
}
}
});
final PartitionedFileSet pfs = datasetCache.getDataset(OUTPUT_DATASET);
if (precreatePartitions) {
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws IOException {
writeFile(pfs, createKey(now, 95111));
writeFile(pfs, createKey(now, 98123));
writeFile(pfs, createKey(now, 84125));
}
});
}
String allowConcurrencyKey = "dataset." + OUTPUT_DATASET + "." + PartitionedFileSetArguments.DYNAMIC_PARTITIONER_ALLOW_CONCURRENCY;
// run the partition writer m/r with this output partition time
Map<String, String> arguments = new HashMap<>();
arguments.put(OUTPUT_PARTITION_KEY, Long.toString(now));
arguments.put(allowConcurrencyKey, Boolean.toString(allowConcurrentWriters));
if (partitionWriteOption != null) {
arguments.put("partitionWriteOption", partitionWriteOption.name());
}
long startTime = System.currentTimeMillis();
boolean status = runProgram(app, AppWithMapReduceUsingAvroDynamicPartitioner.DynamicPartitioningMapReduce.class, new BasicArguments(arguments));
Assert.assertEquals(expectedStatus, status);
if (!expectedStatus) {
// if we expect the program to fail, no need to check the output data for expected results
return;
}
// Verify notifications
List<Notification> notifications = getDataNotifications(startTime);
Assert.assertEquals(1, notifications.size());
Assert.assertEquals(NamespaceId.DEFAULT.dataset(OUTPUT_DATASET), DatasetId.fromString(notifications.get(0).getProperties().get("datasetId")));
// this should have created a partition in the pfs
final Location pfsBaseLocation = pfs.getEmbeddedFileSet().getBaseLocation();
Transactions.createTransactionExecutor(txExecutorFactory, (TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws IOException {
Map<PartitionKey, PartitionDetail> partitions = new HashMap<>();
for (PartitionDetail partition : pfs.getPartitions(null)) {
partitions.put(partition.getPartitionKey(), partition);
// check that the mapreduce wrote the output partition metadata to all the output partitions
Assert.assertEquals(getExpectedMetadata(precreatePartitions, partitionWriteOption), partition.getMetadata().asMap());
// if files were precreated, and the option is to append, expect the empty file to exist
// if partition write option is configured to overwrite, then the file is expected to not exist
Location preexistingFile = partition.getLocation().append("file");
if (precreatePartitions && partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE_OR_APPEND) {
Assert.assertTrue(preexistingFile.exists());
try (InputStream inputStream = preexistingFile.getInputStream()) {
Assert.assertEquals(-1, inputStream.read());
}
} else {
Assert.assertFalse(preexistingFile.exists());
}
}
Assert.assertEquals(3, partitions.size());
Assert.assertEquals(keyToRecordsMap.keySet(), partitions.keySet());
// Check relative paths of the partitions. Also check that their location = pfs baseLocation + relativePath
for (Map.Entry<PartitionKey, PartitionDetail> partitionKeyEntry : partitions.entrySet()) {
PartitionDetail partitionDetail = partitionKeyEntry.getValue();
String relativePath = partitionDetail.getRelativePath();
int zip = (int) partitionKeyEntry.getKey().getField("zip");
Assert.assertEquals(Long.toString(now) + Path.SEPARATOR + zip, relativePath);
Assert.assertEquals(pfsBaseLocation.append(relativePath), partitionDetail.getLocation());
}
for (Map.Entry<PartitionKey, Collection<GenericRecord>> keyToRecordsEntry : keyToRecordsMap.asMap().entrySet()) {
Set<GenericRecord> genericRecords = new HashSet<>(keyToRecordsEntry.getValue());
Assert.assertEquals(genericRecords, readOutput(partitions.get(keyToRecordsEntry.getKey()).getLocation()));
}
}
});
}
Aggregations