use of io.cdap.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.
the class PartitionedFileSetTest method testPermissions.
@Test
public void testPermissions() throws Exception {
// validate that the fileset permissions and group were applied to the embedded fileset (just sanity test)
PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance);
Location loc = pfs.getEmbeddedFileSet().getLocation("some/random/path");
loc.getOutputStream().close();
Assert.assertEquals(fsPermissions, loc.getPermissions());
Assert.assertEquals(group, loc.getGroup());
Map<String, String> props = dsFrameworkUtil.getSpec(pfsInstance).getSpecification("partitions").getProperties();
Assert.assertEquals(tablePermissions, TableProperties.getTablePermissions(props));
}
use of io.cdap.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.
the class PartitionedFileSetTest method testRollbackOfPartitionDelete.
@Test
public void testRollbackOfPartitionDelete() throws Exception {
PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance);
TransactionContext txContext = new TransactionContext(txClient, (TransactionAware) pfs);
txContext.start();
// write 1 to the first file
Location outputLocation = createPartition(pfs, PARTITION_KEY, "file", 1);
Assert.assertNotNull(pfs.getPartition(PARTITION_KEY));
Assert.assertTrue(pfs.getPartition(PARTITION_KEY).getLocation().exists());
txContext.finish();
txContext.start();
pfs.dropPartition(PARTITION_KEY);
Assert.assertNull(pfs.getPartition(PARTITION_KEY));
Assert.assertFalse(outputLocation.exists());
// create a new partition with the same partition key (same relative path for the partition)
// write 2 to the second file
Location outputLocation2 = createPartition(pfs, PARTITION_KEY, "file", 2);
Assert.assertTrue(outputLocation2.exists());
txContext.abort();
// since the previous transaction aborted, the partition and its files should still exist
txContext.start();
Assert.assertNotNull(pfs.getPartition(PARTITION_KEY));
Assert.assertTrue(outputLocation.exists());
try (InputStream inputStream = outputLocation.getInputStream()) {
// should be 1, written by the first partition, not 2 (which was written by the second partition)
Assert.assertEquals(1, inputStream.read());
// should be nothing else in the file
Assert.assertEquals(0, inputStream.available());
}
txContext.finish();
}
use of io.cdap.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.
the class PartitionedFileSetTest method testDefaultBasePath.
@Test
public void testDefaultBasePath() throws Exception {
DatasetId id = DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testDefaultPath");
dsFrameworkUtil.createInstance("partitionedFileSet", id, PartitionedFileSetProperties.builder().setPartitioning(PARTITIONING_1).build());
PartitionedFileSet pfs = dsFrameworkUtil.getInstance(id);
Location baseLocation = pfs.getEmbeddedFileSet().getBaseLocation();
Assert.assertEquals(baseLocation.getName(), id.getDataset());
Assert.assertTrue(baseLocation.exists());
Assert.assertTrue(baseLocation.isDirectory());
DatasetId fid = DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testDefaultPathFileSet");
dsFrameworkUtil.createInstance("fileSet", fid, FileSetProperties.builder().build());
FileSet fs = dsFrameworkUtil.getInstance(fid);
Location fsBaseLocation = fs.getBaseLocation();
Assert.assertEquals(Locations.getParent(baseLocation), Locations.getParent(fsBaseLocation));
dsFrameworkUtil.deleteInstance(fid);
dsFrameworkUtil.deleteInstance(id);
Assert.assertFalse(baseLocation.exists());
}
use of io.cdap.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.
the class PartitionConsumerTest method testPartitionConsumingWithFilterAndLimit.
@Test
public void testPartitionConsumingWithFilterAndLimit() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
final Set<PartitionKey> partitionKeys1 = new HashSet<>();
for (int i = 0; i < 10; i++) {
partitionKeys1.add(generateUniqueKey());
}
final Set<PartitionKey> partitionKeys2 = new HashSet<>();
for (int i = 0; i < 15; i++) {
partitionKeys2.add(generateUniqueKey());
}
final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor());
// (consumption only happens at transaction borders)
for (final PartitionKey partitionKey : partitionKeys1) {
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.getPartitionOutput(partitionKey).addPartition();
}
});
}
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// Initial consumption results in the partitions corresponding to partitionKeys1 to be consumed because only
// those partitions are added to the dataset at this point
List<Partition> consumedPartitions = new ArrayList<>();
// with limit = 1, the returned iterator is only size 1, even though there are more unconsumed partitions
Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(1).getPartitions());
Assert.assertEquals(1, consumedPartitions.size());
// ask for 5 more
Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(5).getPartitions());
Assert.assertEquals(6, consumedPartitions.size());
// ask for 5 more, but there are only 4 more unconsumed partitions (size of partitionKeys1 is 10).
Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(5).getPartitions());
Assert.assertEquals(10, consumedPartitions.size());
Assert.assertEquals(partitionKeys1, toKeys(consumedPartitions));
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
for (PartitionKey partitionKey : partitionKeys2) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// using the same PartitionConsumer (which remembers the PartitionConsumerState) to consume additional
// partitions results in only the newly added partitions (corresponding to partitionKeys2) to be returned
Assert.assertEquals(partitionKeys2, toKeys(partitionConsumer.consumePartitions().getPartitions()));
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// consuming the partitions again, without adding any new partitions returns an empty iterator
Assert.assertTrue(partitionConsumer.consumePartitions().getPartitions().isEmpty());
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// creating a new PartitionConsumer resets the consumption state.
// test combination of filter and limit
// the partitionFilter will match partitionKeys [1, 7), of which there are 6
final PartitionFilter partitionFilter = PartitionFilter.builder().addRangeCondition("i", 1, 7).build();
final Predicate<PartitionDetail> predicate = new Predicate<PartitionDetail>() {
@Override
public boolean apply(PartitionDetail partitionDetail) {
return partitionFilter.match(partitionDetail.getPartitionKey());
}
};
ConsumerConfiguration configuration = ConsumerConfiguration.builder().setPartitionPredicate(predicate).build();
PartitionConsumer newPartitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), configuration);
List<Partition> consumedPartitions = new ArrayList<>();
// apply the filter (narrows it down to 6 elements) and apply a limit of 4 results in 4 consumed partitions
Iterables.addAll(consumedPartitions, newPartitionConsumer.consumePartitions(4).getPartitions());
Assert.assertEquals(4, consumedPartitions.size());
// apply a limit of 3, using the same filter returns the remaining 2 elements that fit that filter
Iterables.addAll(consumedPartitions, newPartitionConsumer.consumePartitions(3).getPartitions());
Assert.assertEquals(6, consumedPartitions.size());
// assert that the partitions returned have partition keys, where the i values range from [1, 7]
Set<Integer> expectedIFields = new HashSet<>();
for (int i = 1; i < 7; i++) {
expectedIFields.add(i);
}
Set<Integer> actualIFields = new HashSet<>();
for (Partition consumedPartition : consumedPartitions) {
actualIFields.add((Integer) consumedPartition.getPartitionKey().getField("i"));
}
Assert.assertEquals(expectedIFields, actualIFields);
}
});
}
use of io.cdap.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.
the class PartitionConsumerTest method testSimpleConcurrency.
@Test
public void testSimpleConcurrency() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
final Set<PartitionKey> partitionKeys = new HashSet<>();
for (int i = 0; i < 10; i++) {
partitionKeys.add(generateUniqueKey());
}
// have ConcurrentPartitionConsumers that share the same state.
InMemoryStatePersistor persistor = new InMemoryStatePersistor();
ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxRetries(3).build();
final PartitionConsumer partitionConsumer1 = new ConcurrentPartitionConsumer(dataset, persistor, configuration);
final PartitionConsumer partitionConsumer2 = new ConcurrentPartitionConsumer(dataset, persistor, configuration);
final PartitionConsumer partitionConsumer3 = new ConcurrentPartitionConsumer(dataset, persistor, configuration);
// add all ten keys to the partitioned fileset
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
for (final PartitionKey partitionKey : partitionKeys) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// with limit = 1, the returned iterator is only size 1, even though there are more unconsumed partitions
List<PartitionDetail> consumedBy1 = partitionConsumer1.consumePartitions(1).getPartitions();
Assert.assertEquals(1, consumedBy1.size());
// partitionConsumer2 asks for 10 partitions, but 1 is currently in progress by partitionConsumer1, so it only
// gets the remaining 9 partitions
List<PartitionDetail> consumedBy2 = partitionConsumer2.consumePartitions(10).getPartitions();
Assert.assertEquals(9, consumedBy2.size());
// partitionConsumer3 tries to consume partitions, but all are marked in-progress by partitionConsumer 1 and 2
Assert.assertEquals(0, partitionConsumer3.consumePartitions().getPartitions().size());
// partitionConsumer1 aborts its partition, so it then becomes available for partitionConsumer3
partitionConsumer1.onFinish(consumedBy1, false);
consumedBy1.clear();
// queries with limit=2, but only the 1 is available that partitionConsumer1 released
List<PartitionDetail> consumedBy3 = partitionConsumer3.consumePartitions(2).getPartitions();
Assert.assertEquals(1, consumedBy3.size());
// partitionConsumers 2 and 3 marks that it successfully processed the partitions
partitionConsumer3.onFinish(consumedBy3, true);
// test onFinishWithKeys API
List<PartitionKey> keysConsumedBy2 = Lists.transform(consumedBy2, new Function<PartitionDetail, PartitionKey>() {
@Override
public PartitionKey apply(PartitionDetail input) {
return input.getPartitionKey();
}
});
partitionConsumer2.onFinishWithKeys(keysConsumedBy2, true);
// at this point, all partitions are processed, so no additional partitions are available for consumption
Assert.assertEquals(0, partitionConsumer3.consumePartitions().getPartitions().size());
List<PartitionDetail> allProcessedPartitions = new ArrayList<>();
allProcessedPartitions.addAll(consumedBy1);
allProcessedPartitions.addAll(consumedBy2);
allProcessedPartitions.addAll(consumedBy3);
// ordering may be different, since all the partitions were added in the same transaction
Assert.assertEquals(partitionKeys, toKeys(allProcessedPartitions));
}
});
}
Aggregations