Search in sources :

Example 11 with PartitionOutput

use of co.cask.cdap.api.dataset.lib.PartitionOutput in project cdap by caskdata.

the class PartitionedFileSetTest method testAddRemoveGetPartitions.

@Test
@Category(SlowTests.class)
public void testAddRemoveGetPartitions() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final PartitionKey[][][] keys = new PartitionKey[4][4][4];
    final String[][][] paths = new String[4][4][4];
    final Set<BasicPartition> allPartitionDetails = Sets.newHashSet();
    // add a bunch of partitions
    for (int s = 0; s < 4; s++) {
        for (int i = 0; i < 4; i++) {
            for (int l = 0; l < 4; l++) {
                final PartitionKey key = PartitionKey.builder().addField("s", String.format("%c-%d", 'a' + s, s)).addField("i", i * 100).addField("l", 15L - 10 * l).build();
                BasicPartition basicPartition = dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new Callable<BasicPartition>() {

                    @Override
                    public BasicPartition call() throws Exception {
                        PartitionOutput p = dataset.getPartitionOutput(key);
                        p.addPartition();
                        return new BasicPartition((PartitionedFileSetDataset) dataset, p.getRelativePath(), p.getPartitionKey());
                    }
                });
                keys[s][i][l] = key;
                paths[s][i][l] = basicPartition.getRelativePath();
                allPartitionDetails.add(basicPartition);
            }
        }
    }
    // validate getPartition with exact partition key
    for (int s = 0; s < 4; s++) {
        for (int i = 0; i < 4; i++) {
            for (int l = 0; l < 4; l++) {
                final PartitionKey key = keys[s][i][l];
                final String path = paths[s][i][l];
                dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

                    @Override
                    public void apply() throws Exception {
                        PartitionDetail partitionDetail = dataset.getPartition(key);
                        Assert.assertNotNull(partitionDetail);
                        Assert.assertEquals(path, partitionDetail.getRelativePath());
                    }
                });
                // also test getPartitionPaths() and getPartitions() for the filter matching this
                @SuppressWarnings({ "unchecked", "unused" }) boolean success = testFilter(dataset, allPartitionDetails, PartitionFilter.builder().addValueCondition("l", key.getField("l")).addValueCondition("s", key.getField("s")).addValueCondition("i", key.getField("i")).build());
            }
        }
    }
    // test whether query works without filter
    testFilter(dataset, allPartitionDetails, null);
    // generate an list of partition filters with exhaustive coverage
    List<PartitionFilter> filters = generateFilters();
    // test all kinds of filters
    testAllFilters(dataset, allPartitionDetails, filters);
    // remove a few of the partitions and test again, repeatedly
    PartitionKey[] keysToRemove = { keys[1][2][3], keys[0][1][0], keys[2][3][2], keys[3][1][2] };
    for (final PartitionKey key : keysToRemove) {
        // remove in a transaction
        dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new TransactionExecutor.Procedure<PartitionKey>() {

            @Override
            public void apply(PartitionKey partitionKey) throws Exception {
                dataset.dropPartition(partitionKey);
            }
        }, key);
        // test all filters
        BasicPartition toRemove = Iterables.tryFind(allPartitionDetails, new com.google.common.base.Predicate<BasicPartition>() {

            @Override
            public boolean apply(BasicPartition partition) {
                return key.equals(partition.getPartitionKey());
            }
        }).get();
        allPartitionDetails.remove(toRemove);
        testAllFilters(dataset, allPartitionDetails, filters);
    }
}
Also used : PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) Predicate(co.cask.cdap.api.Predicate) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) PartitionNotFoundException(co.cask.cdap.api.dataset.PartitionNotFoundException) IOException(java.io.IOException) DataSetException(co.cask.cdap.api.dataset.DataSetException) PartitionFilter(co.cask.cdap.api.dataset.lib.PartitionFilter) PartitionOutput(co.cask.cdap.api.dataset.lib.PartitionOutput) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 12 with PartitionOutput

use of co.cask.cdap.api.dataset.lib.PartitionOutput in project cdap by caskdata.

the class PartitionedFileSetTest method testAddRemoveGetPartition.

@Test
public void testAddRemoveGetPartition() throws Exception {
    final PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance);
    final AtomicReference<Location> outputLocationRef = new AtomicReference<>();
    dsFrameworkUtil.newTransactionExecutor((TransactionAware) pfs).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            PartitionOutput output = pfs.getPartitionOutput(PARTITION_KEY);
            Location outputLocation = output.getLocation().append("file");
            outputLocationRef.set(outputLocation);
            OutputStream out = outputLocation.getOutputStream();
            out.close();
            output.addPartition();
            Assert.assertTrue(outputLocation.exists());
            Assert.assertNotNull(pfs.getPartition(PARTITION_KEY));
            Assert.assertTrue(pfs.getPartition(PARTITION_KEY).getLocation().exists());
            pfs.dropPartition(PARTITION_KEY);
            Assert.assertFalse(outputLocation.exists());
            Assert.assertNull(pfs.getPartition(PARTITION_KEY));
            pfs.dropPartition(PARTITION_KEY);
        }
    });
    // the files of the partition are dropped upon transaction commit
    Assert.assertFalse(outputLocationRef.get().exists());
}
Also used : PartitionOutput(co.cask.cdap.api.dataset.lib.PartitionOutput) TransactionAware(org.apache.tephra.TransactionAware) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) AtomicReference(java.util.concurrent.atomic.AtomicReference) TransactionExecutor(org.apache.tephra.TransactionExecutor) PartitionNotFoundException(co.cask.cdap.api.dataset.PartitionNotFoundException) IOException(java.io.IOException) DataSetException(co.cask.cdap.api.dataset.DataSetException) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Aggregations

PartitionOutput (co.cask.cdap.api.dataset.lib.PartitionOutput)12 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)10 Test (org.junit.Test)9 Location (org.apache.twill.filesystem.Location)7 IOException (java.io.IOException)6 DataSetException (co.cask.cdap.api.dataset.DataSetException)5 PartitionNotFoundException (co.cask.cdap.api.dataset.PartitionNotFoundException)5 PartitionDetail (co.cask.cdap.api.dataset.lib.PartitionDetail)5 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)5 TransactionAware (org.apache.tephra.TransactionAware)5 TransactionExecutor (org.apache.tephra.TransactionExecutor)5 FileOutputStream (java.io.FileOutputStream)3 OutputStream (java.io.OutputStream)3 TransactionContext (org.apache.tephra.TransactionContext)3 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 HashMap (java.util.HashMap)2 Predicate (co.cask.cdap.api.Predicate)1 PartitionFilter (co.cask.cdap.api.dataset.lib.PartitionFilter)1 Partitioning (co.cask.cdap.api.dataset.lib.Partitioning)1