Search in sources :

Example 16 with PartitionedFileSet

use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class PartitionConsumerTest method testDroppedPartitions.

@Test
public void testDroppedPartitions() throws Exception {
    // Tests the case of a partition in the partition consumer working set being dropped from the Partitioned
    // FileSet (See CDAP-6215)
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final TransactionAware txAwareDataset = (TransactionAware) dataset;
    ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxWorkingSetSize(1).setMaxRetries(2).build();
    final PartitionConsumer partitionConsumer = new ConcurrentPartitionConsumer(dataset, new InMemoryStatePersistor(), configuration);
    final PartitionKey partitionKey1 = generateUniqueKey();
    final PartitionKey partitionKey2 = generateUniqueKey();
    // Note: These two partitions are added in separate transactions, so that the first can exist in the working set
    // without the second. Partitions in the same transaction can not be split up (due to their index being the same)
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            dataset.getPartitionOutput(partitionKey1).addPartition();
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            dataset.getPartitionOutput(partitionKey2).addPartition();
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // consuming and aborting the partition numRetries times plus one (for the first attempt) makes it get removed
            // from the working set
            List<PartitionDetail> partitionDetails = partitionConsumer.consumePartitions(1).getPartitions();
            Assert.assertEquals(1, partitionDetails.size());
            Assert.assertEquals(partitionKey1, partitionDetails.get(0).getPartitionKey());
            // aborting the processing of the partition, to put it back in the working set
            partitionConsumer.onFinish(partitionDetails, false);
        }
    });
    // dropping partitionKey1 from the dataset makes it no longer available for consuming
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            dataset.dropPartition(partitionKey1);
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // first call to consume will drop the partition from the working set, and return nothing, since it was
            // the only partition in the working set
            PartitionConsumerResult result = partitionConsumer.consumePartitions(1);
            Assert.assertEquals(0, result.getPartitions().size());
            Assert.assertEquals(0, result.getFailedPartitions().size());
            // following calls to consumePartitions will repopulate the working set and return additional partition(s)
            result = partitionConsumer.consumePartitions(1);
            Assert.assertEquals(1, result.getPartitions().size());
            Assert.assertEquals(partitionKey2, result.getPartitions().get(0).getPartitionKey());
        }
    });
}
Also used : ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumerResult(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumerResult) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) TransactionAware(org.apache.tephra.TransactionAware) ConsumerConfiguration(co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer) Test(org.junit.Test)

Example 17 with PartitionedFileSet

use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class PartitionConsumerTest method testCustomOperations.

@Test
public void testCustomOperations() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final TransactionAware txAwareDataset = (TransactionAware) dataset;
    ConsumerConfiguration configuration = ConsumerConfiguration.builder().setMaxRetries(3).build();
    final PartitionConsumer partitionConsumer = new CustomConsumer(dataset, new InMemoryStatePersistor(), configuration);
    final int numPartitions = 3;
    final List<PartitionKey> partitionKeys = new ArrayList<>(numPartitions);
    for (int i = 0; i < numPartitions; i++) {
        partitionKeys.add(generateUniqueKey());
    }
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            for (PartitionKey partitionKey : partitionKeys) {
                dataset.getPartitionOutput(partitionKey).addPartition();
            }
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            List<PartitionDetail> partitions = partitionConsumer.consumePartitions().getPartitions();
            Assert.assertEquals(numPartitions, partitions.size());
            partitionConsumer.onFinish(partitions, false);
            partitions = partitionConsumer.consumePartitions().getPartitions();
            Assert.assertEquals(numPartitions, partitions.size());
            partitionConsumer.onFinish(partitions, false);
            // after two failure attempts, the partitions are now returned individually
            partitions = partitionConsumer.consumePartitions().getPartitions();
            Assert.assertEquals(1, partitions.size());
            partitionConsumer.onFinish(partitions, true);
            partitions = partitionConsumer.consumePartitions().getPartitions();
            Assert.assertEquals(1, partitions.size());
            partitionConsumer.onFinish(partitions, true);
            partitions = partitionConsumer.consumePartitions().getPartitions();
            Assert.assertEquals(1, partitions.size());
            partitionConsumer.onFinish(partitions, true);
        }
    });
}
Also used : ArrayList(java.util.ArrayList) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) TransactionAware(org.apache.tephra.TransactionAware) ConsumerConfiguration(co.cask.cdap.api.dataset.lib.partitioned.ConsumerConfiguration) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ConcurrentPartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer) PartitionConsumer(co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer) Test(org.junit.Test)

Example 18 with PartitionedFileSet

use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class PartitionedFileSetTest method before.

@Before
public void before() throws Exception {
    txClient = new InMemoryTxSystemClient(dsFrameworkUtil.getTxManager());
    dsFrameworkUtil.createInstance("partitionedFileSet", pfsInstance, PartitionedFileSetProperties.builder().setPartitioning(PARTITIONING_1).setTablePermissions(tablePermissions).setBasePath("testDir").setFilePermissions(fsPermissions).setFileGroup(group).build());
    pfsBaseLocation = ((PartitionedFileSet) dsFrameworkUtil.getInstance(pfsInstance)).getEmbeddedFileSet().getBaseLocation();
    Assert.assertTrue(pfsBaseLocation.exists());
}
Also used : PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) InMemoryTxSystemClient(org.apache.tephra.inmemory.InMemoryTxSystemClient) Before(org.junit.Before)

Example 19 with PartitionedFileSet

use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class PartitionedFileSetTest method testPartitionCreationTime.

@Test
public void testPartitionCreationTime() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            PartitionOutput partitionOutput = dataset.getPartitionOutput(PARTITION_KEY);
            long beforeTime = System.currentTimeMillis();
            partitionOutput.addPartition();
            long afterTime = System.currentTimeMillis();
            PartitionDetail partitionDetail = dataset.getPartition(PARTITION_KEY);
            Assert.assertNotNull(partitionDetail);
            long creationTime = partitionDetail.getMetadata().getCreationTime();
            long lastModificationTime = partitionDetail.getMetadata().lastModificationTime();
            // lastModificationTime time should be equal to creationTime for a partition that has not been appended to
            Assert.assertEquals(creationTime, lastModificationTime);
            Assert.assertTrue(creationTime >= beforeTime && creationTime <= afterTime);
        }
    });
}
Also used : PartitionOutput(co.cask.cdap.api.dataset.lib.PartitionOutput) TransactionAware(org.apache.tephra.TransactionAware) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) PartitionNotFoundException(co.cask.cdap.api.dataset.PartitionNotFoundException) PartitionAlreadyExistsException(co.cask.cdap.api.dataset.lib.PartitionAlreadyExistsException) IOException(java.io.IOException) DataSetException(co.cask.cdap.api.dataset.DataSetException) Test(org.junit.Test)

Example 20 with PartitionedFileSet

use of co.cask.cdap.api.dataset.lib.PartitionedFileSet in project cdap by caskdata.

the class PartitionedFileSetTest method testRollbackOfPartitionCreateThenDelete.

@Test
public void testRollbackOfPartitionCreateThenDelete() throws Exception {
    PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance);
    TransactionContext txContext = new TransactionContext(txClient, (TransactionAware) pfs);
    txContext.start();
    Assert.assertNull(pfs.getPartition(PARTITION_KEY));
    Location outputLocation = createPartition(pfs, PARTITION_KEY, "file");
    Assert.assertNotNull(pfs.getPartition(PARTITION_KEY));
    pfs.dropPartition(PARTITION_KEY);
    txContext.abort();
    // the file shouldn't exist because the transaction was aborted (AND because it was dropped at the end of the tx)
    Assert.assertFalse(outputLocation.exists());
}
Also used : TransactionContext(org.apache.tephra.TransactionContext) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Aggregations

PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)65 Test (org.junit.Test)39 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)32 Location (org.apache.twill.filesystem.Location)25 TransactionAware (org.apache.tephra.TransactionAware)24 TransactionExecutor (org.apache.tephra.TransactionExecutor)24 PartitionDetail (co.cask.cdap.api.dataset.lib.PartitionDetail)18 IOException (java.io.IOException)17 DataSetException (co.cask.cdap.api.dataset.DataSetException)12 FileSet (co.cask.cdap.api.dataset.lib.FileSet)12 HashSet (java.util.HashSet)12 List (java.util.List)12 PartitionNotFoundException (co.cask.cdap.api.dataset.PartitionNotFoundException)11 PartitionAlreadyExistsException (co.cask.cdap.api.dataset.lib.PartitionAlreadyExistsException)11 ConcurrentPartitionConsumer (co.cask.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer)11 PartitionConsumer (co.cask.cdap.api.dataset.lib.partitioned.PartitionConsumer)11 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)9 ImmutableList (com.google.common.collect.ImmutableList)9 ArrayList (java.util.ArrayList)9 HashMap (java.util.HashMap)9