Search in sources :

Example 56 with PartitionedFileSet

use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class PartitionedFileSetTest method testPermissions.

@Test
public void testPermissions() throws Exception {
    // validate that the fileset permissions and group were applied to the embedded fileset (just sanity test)
    PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance);
    Location loc = pfs.getEmbeddedFileSet().getLocation("some/random/path");
    loc.getOutputStream().close();
    Assert.assertEquals(fsPermissions, loc.getPermissions());
    Assert.assertEquals(group, loc.getGroup());
    Map<String, String> props = dsFrameworkUtil.getSpec(pfsInstance).getSpecification("partitions").getProperties();
    Assert.assertEquals(tablePermissions, TableProperties.getTablePermissions(props));
}
Also used : PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 57 with PartitionedFileSet

use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class PartitionedFileSetTest method testInvalidPartitionFilter.

@Test
public void testInvalidPartitionFilter() throws Exception {
    final PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance);
    dsFrameworkUtil.newTransactionExecutor((TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // this should succeed without error (but log a warning)
            Assert.assertEquals(Collections.EMPTY_SET, pfs.getPartitions(PartitionFilter.builder().addValueCondition("me-not-there", 42).build()));
        }
    });
}
Also used : TransactionAware(org.apache.tephra.TransactionAware) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) PartitionNotFoundException(co.cask.cdap.api.dataset.PartitionNotFoundException) PartitionAlreadyExistsException(co.cask.cdap.api.dataset.lib.PartitionAlreadyExistsException) IOException(java.io.IOException) DataSetException(co.cask.cdap.api.dataset.DataSetException) Test(org.junit.Test)

Example 58 with PartitionedFileSet

use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class PartitionedFileSetTest method testRollbackOnJobFailure.

@Test
public void testRollbackOnJobFailure() throws Exception {
    // tests the logic of #onFailure method
    Map<String, String> args = new HashMap<>();
    FileSetArguments.setOutputPath(args, "custom/output/path");
    PartitionedFileSetArguments.setOutputPartitionKey(args, PARTITION_KEY);
    PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance, args);
    TransactionContext txContext = new TransactionContext(txClient, (TransactionAware) pfs);
    txContext.start();
    Location outputLocation = pfs.getEmbeddedFileSet().getOutputLocation();
    Assert.assertFalse(outputLocation.exists());
    outputLocation.mkdirs();
    Assert.assertTrue(outputLocation.exists());
    ((PartitionedFileSetDataset) pfs).onFailure();
    txContext.abort();
    // because the previous transaction aborted, the partition as well as the directory for it will not exist
    txContext.start();
    Assert.assertNull(pfs.getPartition(PARTITION_KEY));
    Assert.assertFalse(outputLocation.exists());
    txContext.finish();
}
Also used : HashMap(java.util.HashMap) TransactionContext(org.apache.tephra.TransactionContext) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 59 with PartitionedFileSet

use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class PartitionedFileSetTest method testAddRemoveGetPartitionExternal.

@Test
public void testAddRemoveGetPartitionExternal() throws Exception {
    final File absolutePath = tmpFolder.newFolder();
    absolutePath.mkdirs();
    dsFrameworkUtil.createInstance("partitionedFileSet", pfsExternalInstance, PartitionedFileSetProperties.builder().setPartitioning(PARTITIONING_1).setBasePath(absolutePath.getPath()).setDataExternal(true).build());
    final PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsExternalInstance);
    dsFrameworkUtil.newTransactionExecutor((TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Assert.assertTrue(pfsBaseLocation.exists());
            // attempt to write a new partition - should fail
            try {
                pfs.getPartitionOutput(PARTITION_KEY);
                Assert.fail("External partitioned file set should not allow writing files");
            } catch (UnsupportedOperationException e) {
            // expected
            }
            // create an external file and add it as a partition
            File someFile = new File(absolutePath, "some.file");
            OutputStream out = new FileOutputStream(someFile);
            out.close();
            Assert.assertTrue(someFile.exists());
            pfs.addPartition(PARTITION_KEY, "some.file");
            Assert.assertNotNull(pfs.getPartition(PARTITION_KEY));
            Assert.assertTrue(pfs.getPartition(PARTITION_KEY).getLocation().exists());
            // now drop the partition and validate the file is still there
            pfs.dropPartition(PARTITION_KEY);
            Assert.assertNull(pfs.getPartition(PARTITION_KEY));
            Assert.assertTrue(someFile.exists());
        }
    });
    // drop the dataset and validate that the base dir still exists
    dsFrameworkUtil.deleteInstance(pfsExternalInstance);
    Assert.assertTrue(pfsBaseLocation.exists());
    Assert.assertTrue(absolutePath.isDirectory());
}
Also used : TransactionAware(org.apache.tephra.TransactionAware) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) FileOutputStream(java.io.FileOutputStream) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) File(java.io.File) PartitionNotFoundException(co.cask.cdap.api.dataset.PartitionNotFoundException) PartitionAlreadyExistsException(co.cask.cdap.api.dataset.lib.PartitionAlreadyExistsException) IOException(java.io.IOException) DataSetException(co.cask.cdap.api.dataset.DataSetException) Test(org.junit.Test)

Example 60 with PartitionedFileSet

use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class DataCleansingMapReduceTest method getDataFromFile.

private Set<String> getDataFromFile(Long time, String dsName) throws Exception {
    DataSetManager<PartitionedFileSet> cleanRecords = getDataset(dsName);
    PartitionFilter filter = PartitionFilter.builder().addValueCondition("time", time).build();
    return getDataFromFilter(cleanRecords.get(), filter);
}
Also used : PartitionFilter(co.cask.cdap.api.dataset.lib.PartitionFilter) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet)

Aggregations

PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)65 Test (org.junit.Test)39 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)32 Location (org.apache.twill.filesystem.Location)25 TransactionAware (org.apache.tephra.TransactionAware)24 TransactionExecutor (org.apache.tephra.TransactionExecutor)24 PartitionDetail (co.cask.cdap.api.dataset.lib.PartitionDetail)18 IOException (java.io.IOException)17 DataSetException (co.cask.cdap.api.dataset.DataSetException)12 FileSet (co.cask.cdap.api.dataset.lib.FileSet)12 HashSet (java.util.HashSet)12 List (java.util.List)12 PartitionNotFoundException (co.cask.cdap.api.dataset.PartitionNotFoundException)11 PartitionAlreadyExistsException (co.cask.cdap.api.dataset.lib.PartitionAlreadyExistsException)11 ConcurrentPartitionConsumer (co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer)11 PartitionConsumer (co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer)11 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)9 ImmutableList (com.google.common.collect.ImmutableList)9 ArrayList (java.util.ArrayList)9 HashMap (java.util.HashMap)9