use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.
the class PartitionedFileSetTest method testPermissions.
@Test
public void testPermissions() throws Exception {
// validate that the fileset permissions and group were applied to the embedded fileset (just sanity test)
PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance);
Location loc = pfs.getEmbeddedFileSet().getLocation("some/random/path");
loc.getOutputStream().close();
Assert.assertEquals(fsPermissions, loc.getPermissions());
Assert.assertEquals(group, loc.getGroup());
Map<String, String> props = dsFrameworkUtil.getSpec(pfsInstance).getSpecification("partitions").getProperties();
Assert.assertEquals(tablePermissions, TableProperties.getTablePermissions(props));
}
use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.
the class PartitionedFileSetTest method testInvalidPartitionFilter.
@Test
public void testInvalidPartitionFilter() throws Exception {
final PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance);
dsFrameworkUtil.newTransactionExecutor((TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// this should succeed without error (but log a warning)
Assert.assertEquals(Collections.EMPTY_SET, pfs.getPartitions(PartitionFilter.builder().addValueCondition("me-not-there", 42).build()));
}
});
}
use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.
the class PartitionedFileSetTest method testRollbackOnJobFailure.
@Test
public void testRollbackOnJobFailure() throws Exception {
// tests the logic of #onFailure method
Map<String, String> args = new HashMap<>();
FileSetArguments.setOutputPath(args, "custom/output/path");
PartitionedFileSetArguments.setOutputPartitionKey(args, PARTITION_KEY);
PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance, args);
TransactionContext txContext = new TransactionContext(txClient, (TransactionAware) pfs);
txContext.start();
Location outputLocation = pfs.getEmbeddedFileSet().getOutputLocation();
Assert.assertFalse(outputLocation.exists());
outputLocation.mkdirs();
Assert.assertTrue(outputLocation.exists());
((PartitionedFileSetDataset) pfs).onFailure();
txContext.abort();
// because the previous transaction aborted, the partition as well as the directory for it will not exist
txContext.start();
Assert.assertNull(pfs.getPartition(PARTITION_KEY));
Assert.assertFalse(outputLocation.exists());
txContext.finish();
}
use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.
the class PartitionedFileSetTest method testAddRemoveGetPartitionExternal.
@Test
public void testAddRemoveGetPartitionExternal() throws Exception {
final File absolutePath = tmpFolder.newFolder();
absolutePath.mkdirs();
dsFrameworkUtil.createInstance("partitionedFileSet", pfsExternalInstance, PartitionedFileSetProperties.builder().setPartitioning(PARTITIONING_1).setBasePath(absolutePath.getPath()).setDataExternal(true).build());
final PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsExternalInstance);
dsFrameworkUtil.newTransactionExecutor((TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
Assert.assertTrue(pfsBaseLocation.exists());
// attempt to write a new partition - should fail
try {
pfs.getPartitionOutput(PARTITION_KEY);
Assert.fail("External partitioned file set should not allow writing files");
} catch (UnsupportedOperationException e) {
// expected
}
// create an external file and add it as a partition
File someFile = new File(absolutePath, "some.file");
OutputStream out = new FileOutputStream(someFile);
out.close();
Assert.assertTrue(someFile.exists());
pfs.addPartition(PARTITION_KEY, "some.file");
Assert.assertNotNull(pfs.getPartition(PARTITION_KEY));
Assert.assertTrue(pfs.getPartition(PARTITION_KEY).getLocation().exists());
// now drop the partition and validate the file is still there
pfs.dropPartition(PARTITION_KEY);
Assert.assertNull(pfs.getPartition(PARTITION_KEY));
Assert.assertTrue(someFile.exists());
}
});
// drop the dataset and validate that the base dir still exists
dsFrameworkUtil.deleteInstance(pfsExternalInstance);
Assert.assertTrue(pfsBaseLocation.exists());
Assert.assertTrue(absolutePath.isDirectory());
}
use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.
the class DataCleansingMapReduceTest method getDataFromFile.
private Set<String> getDataFromFile(Long time, String dsName) throws Exception {
DataSetManager<PartitionedFileSet> cleanRecords = getDataset(dsName);
PartitionFilter filter = PartitionFilter.builder().addValueCondition("time", time).build();
return getDataFromFilter(cleanRecords.get(), filter);
}
Aggregations