use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.
the class PartitionedFileSetTest method testAddRemoveGetPartition.
@Test
public void testAddRemoveGetPartition() throws Exception {
final PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance);
final AtomicReference<Location> outputLocationRef = new AtomicReference<>();
dsFrameworkUtil.newTransactionExecutor((TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
Location outputLocation = createPartition(pfs, PARTITION_KEY, "file");
outputLocationRef.set(outputLocation);
Assert.assertTrue(outputLocation.exists());
Assert.assertNotNull(pfs.getPartition(PARTITION_KEY));
Assert.assertTrue(pfs.getPartition(PARTITION_KEY).getLocation().exists());
pfs.dropPartition(PARTITION_KEY);
Assert.assertFalse(outputLocation.exists());
Assert.assertNull(pfs.getPartition(PARTITION_KEY));
pfs.dropPartition(PARTITION_KEY);
}
});
// the files of the partition are dropped upon transaction commit
Assert.assertFalse(outputLocationRef.get().exists());
}
use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.
the class PartitionedFileSetTest method testAddRemoveGetPartitions.
@Test
@Category(SlowTests.class)
public void testAddRemoveGetPartitions() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final PartitionKey[][][] keys = new PartitionKey[4][4][4];
final String[][][] paths = new String[4][4][4];
final Set<BasicPartition> allPartitionDetails = Sets.newHashSet();
// add a bunch of partitions
for (int s = 0; s < 4; s++) {
for (int i = 0; i < 4; i++) {
for (int l = 0; l < 4; l++) {
final PartitionKey key = PartitionKey.builder().addField("s", String.format("%c-%d", 'a' + s, s)).addField("i", i * 100).addField("l", 15L - 10 * l).build();
BasicPartition basicPartition = dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new Callable<BasicPartition>() {
@Override
public BasicPartition call() throws Exception {
PartitionOutput p = dataset.getPartitionOutput(key);
p.addPartition();
return new BasicPartition((PartitionedFileSetDataset) dataset, p.getRelativePath(), p.getPartitionKey());
}
});
keys[s][i][l] = key;
paths[s][i][l] = basicPartition.getRelativePath();
allPartitionDetails.add(basicPartition);
}
}
}
// validate getPartition with exact partition key
for (int s = 0; s < 4; s++) {
for (int i = 0; i < 4; i++) {
for (int l = 0; l < 4; l++) {
final PartitionKey key = keys[s][i][l];
final String path = paths[s][i][l];
dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
PartitionDetail partitionDetail = dataset.getPartition(key);
Assert.assertNotNull(partitionDetail);
Assert.assertEquals(path, partitionDetail.getRelativePath());
}
});
// also test getPartitionPaths() and getPartitions() for the filter matching this
@SuppressWarnings({ "unchecked", "unused" }) boolean success = testFilter(dataset, allPartitionDetails, PartitionFilter.builder().addValueCondition("l", key.getField("l")).addValueCondition("s", key.getField("s")).addValueCondition("i", key.getField("i")).build());
}
}
}
// test whether query works without filter
testFilter(dataset, allPartitionDetails, null);
// generate an list of partition filters with exhaustive coverage
List<PartitionFilter> filters = generateFilters();
// test all kinds of filters
testAllFilters(dataset, allPartitionDetails, filters);
// remove a few of the partitions and test again, repeatedly
PartitionKey[] keysToRemove = { keys[1][2][3], keys[0][1][0], keys[2][3][2], keys[3][1][2] };
for (final PartitionKey key : keysToRemove) {
// remove in a transaction
dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new TransactionExecutor.Procedure<PartitionKey>() {
@Override
public void apply(PartitionKey partitionKey) throws Exception {
dataset.dropPartition(partitionKey);
}
}, key);
// test all filters
BasicPartition toRemove = Iterables.tryFind(allPartitionDetails, new com.google.common.base.Predicate<BasicPartition>() {
@Override
public boolean apply(BasicPartition partition) {
return key.equals(partition.getPartitionKey());
}
}).get();
allPartitionDetails.remove(toRemove);
testAllFilters(dataset, allPartitionDetails, filters);
}
}
use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.
the class PartitionedFileSetTest method testRollbackOnTransactionAbort.
@Test
public void testRollbackOnTransactionAbort() throws Exception {
PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance);
TransactionContext txContext = new TransactionContext(txClient, (TransactionAware) pfs);
txContext.start();
Location outputLocation = createPartition(pfs, PARTITION_KEY, "file");
;
Assert.assertNotNull(pfs.getPartition(PARTITION_KEY));
Assert.assertTrue(pfs.getPartition(PARTITION_KEY).getLocation().exists());
txContext.abort();
// because the previous transaction aborted, the partition as well as the file will not exist
txContext.start();
Assert.assertNull(pfs.getPartition(PARTITION_KEY));
Assert.assertFalse(outputLocation.exists());
txContext.finish();
}
use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.
the class PartitionedFileSetTest method testInvalidPartitionKey.
@Test
public void testInvalidPartitionKey() throws Exception {
final PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance);
dsFrameworkUtil.newTransactionExecutor((TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
try {
pfs.getPartitionOutput(PartitionKey.builder().addField("i", 1).addField("l", 2L).build());
Assert.fail("should have thrown exception due to missing field");
} catch (IllegalArgumentException e) {
// expected
}
try {
pfs.addPartition(PartitionKey.builder().addField("i", 1).addField("l", "2").addField("s", "a").build(), "some/location");
Assert.fail("should have thrown exception due to incompatible field");
} catch (IllegalArgumentException e) {
// expected
}
try {
pfs.addPartition(PartitionKey.builder().addField("i", 1).addField("l", 2L).addField("s", "a").addField("x", "x").build(), "some/location", ImmutableMap.of("a", "b"));
Assert.fail("should have thrown exception due to extra field");
} catch (IllegalArgumentException e) {
// expected
}
pfs.addPartition(PartitionKey.builder().addField("i", 1).addField("l", 2L).addField("s", "a").build(), "some/location", ImmutableMap.of("a", "b"));
try {
pfs.addMetadata(PartitionKey.builder().addField("i", 1).addField("l", 2L).addField("s", "a").addField("x", "x").build(), ImmutableMap.of("abc", "xyz"));
Assert.fail("should have thrown exception due to extra field");
} catch (IllegalArgumentException e) {
// expected
}
try {
pfs.dropPartition(PartitionKey.builder().addField("i", 1).addField("l", 2L).addField("s", 0).build());
Assert.fail("should have thrown exception due to incompatible field");
} catch (IllegalArgumentException e) {
// expected
}
}
});
}
use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.
the class PartitionedFileSetTest method testSimplePartitionConsuming.
@Test
public void testSimplePartitionConsuming() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
final Set<PartitionKey> partitionKeys1 = Sets.newHashSet();
for (int i = 0; i < 10; i++) {
partitionKeys1.add(generateUniqueKey());
}
final Set<PartitionKey> partitionKeys2 = Sets.newHashSet();
for (int i = 0; i < 15; i++) {
partitionKeys2.add(generateUniqueKey());
}
final SimplePartitionConsumer partitionConsumer = new SimplePartitionConsumer(dataset);
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
for (PartitionKey partitionKey : partitionKeys1) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// Initial consumption results in the partitions corresponding to partitionKeys1 to be consumed because only
// those partitions are added to the dataset at this point
List<Partition> consumedPartitions = Lists.newArrayList();
Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions());
Set<PartitionKey> retrievedKeys = Sets.newHashSet();
for (Partition consumedPartition : consumedPartitions) {
retrievedKeys.add(consumedPartition.getPartitionKey());
}
Assert.assertEquals(partitionKeys1, retrievedKeys);
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
for (PartitionKey partitionKey : partitionKeys2) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// using the same PartitionConsumer (which remembers the PartitionConsumerState) to consume additional
// partitions results in only the newly added partitions (corresponding to partitionKeys2) to be returned
List<Partition> consumedPartitions = Lists.newArrayList();
Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions());
Set<PartitionKey> retrievedKeys = Sets.newHashSet();
for (Partition consumedPartition : consumedPartitions) {
retrievedKeys.add(consumedPartition.getPartitionKey());
}
Assert.assertEquals(partitionKeys2, retrievedKeys);
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// consuming the partitions again, without adding any new partitions returns an empty iterator
Assert.assertTrue(partitionConsumer.consumePartitions().isEmpty());
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// creating a new PartitionConsumer resets the consumption state. Consuming from it then returns an iterator
// with all the partition keys
List<Partition> consumedPartitions = Lists.newArrayList();
Iterables.addAll(consumedPartitions, new SimplePartitionConsumer(dataset).consumePartitions());
Set<PartitionKey> retrievedKeys = Sets.newHashSet();
for (Partition consumedPartition : consumedPartitions) {
retrievedKeys.add(consumedPartition.getPartitionKey());
}
Set<PartitionKey> allKeys = Sets.newHashSet();
allKeys.addAll(partitionKeys1);
allKeys.addAll(partitionKeys2);
Assert.assertEquals(allKeys, retrievedKeys);
}
});
}
Aggregations