Search in sources :

Example 56 with PartitionKey

use of io.cdap.cdap.api.dataset.lib.PartitionKey in project cdap by cdapio.

the class PartitionedFileSetArgumentsTest method testSetGetOutputPartitionKey.

@Test
public void testSetGetOutputPartitionKey() throws Exception {
    Map<String, String> arguments = new HashMap<>();
    PartitionKey key = PartitionKey.builder().addIntField("i", 42).addLongField("l", 17L).addStringField("s", "x").build();
    PartitionedFileSetArguments.setOutputPartitionKey(arguments, key);
    Assert.assertEquals(key, PartitionedFileSetArguments.getOutputPartitionKey(arguments, PARTITIONING));
}
Also used : HashMap(java.util.HashMap) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) Test(org.junit.Test)

Example 57 with PartitionKey

use of io.cdap.cdap.api.dataset.lib.PartitionKey in project cdap by cdapio.

the class PartitionedFileSetTest method testEncodeDecode.

@Test
public void testEncodeDecode() {
    byte[] rowKey = PartitionedFileSetDataset.generateRowKey(PARTITION_KEY, PARTITIONING_1);
    PartitionKey decoded = PartitionedFileSetDataset.parseRowKey(rowKey, PARTITIONING_1);
    Assert.assertEquals(PARTITION_KEY, decoded);
}
Also used : PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) Test(org.junit.Test)

Example 58 with PartitionKey

use of io.cdap.cdap.api.dataset.lib.PartitionKey in project cdap by cdapio.

the class PartitionedFileSetTest method testEncodeIncompleteKey.

@Test(expected = IllegalArgumentException.class)
public void testEncodeIncompleteKey() {
    PartitionKey key = PartitionKey.builder().addIntField("i", 42).addStringField("s", "x").build();
    PartitionedFileSetDataset.generateRowKey(key, PARTITIONING_1);
}
Also used : PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) Test(org.junit.Test)

Example 59 with PartitionKey

use of io.cdap.cdap.api.dataset.lib.PartitionKey in project cdap by cdapio.

the class PartitionedFileSetTest method testSimplePartitionConsuming.

@Test
public void testSimplePartitionConsuming() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final TransactionAware txAwareDataset = (TransactionAware) dataset;
    final Set<PartitionKey> partitionKeys1 = Sets.newHashSet();
    for (int i = 0; i < 10; i++) {
        partitionKeys1.add(generateUniqueKey());
    }
    final Set<PartitionKey> partitionKeys2 = Sets.newHashSet();
    for (int i = 0; i < 15; i++) {
        partitionKeys2.add(generateUniqueKey());
    }
    final SimplePartitionConsumer partitionConsumer = new SimplePartitionConsumer(dataset);
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            for (PartitionKey partitionKey : partitionKeys1) {
                dataset.getPartitionOutput(partitionKey).addPartition();
            }
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // Initial consumption results in the partitions corresponding to partitionKeys1 to be consumed because only
            // those partitions are added to the dataset at this point
            List<Partition> consumedPartitions = Lists.newArrayList();
            Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions());
            Set<PartitionKey> retrievedKeys = Sets.newHashSet();
            for (Partition consumedPartition : consumedPartitions) {
                retrievedKeys.add(consumedPartition.getPartitionKey());
            }
            Assert.assertEquals(partitionKeys1, retrievedKeys);
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            for (PartitionKey partitionKey : partitionKeys2) {
                dataset.getPartitionOutput(partitionKey).addPartition();
            }
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // using the same PartitionConsumer (which remembers the PartitionConsumerState) to consume additional
            // partitions results in only the newly added partitions (corresponding to partitionKeys2) to be returned
            List<Partition> consumedPartitions = Lists.newArrayList();
            Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions());
            Set<PartitionKey> retrievedKeys = Sets.newHashSet();
            for (Partition consumedPartition : consumedPartitions) {
                retrievedKeys.add(consumedPartition.getPartitionKey());
            }
            Assert.assertEquals(partitionKeys2, retrievedKeys);
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // consuming the partitions again, without adding any new partitions returns an empty iterator
            Assert.assertTrue(partitionConsumer.consumePartitions().isEmpty());
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // creating a new PartitionConsumer resets the consumption state. Consuming from it then returns an iterator
            // with all the partition keys
            List<Partition> consumedPartitions = Lists.newArrayList();
            Iterables.addAll(consumedPartitions, new SimplePartitionConsumer(dataset).consumePartitions());
            Set<PartitionKey> retrievedKeys = Sets.newHashSet();
            for (Partition consumedPartition : consumedPartitions) {
                retrievedKeys.add(consumedPartition.getPartitionKey());
            }
            Set<PartitionKey> allKeys = Sets.newHashSet();
            allKeys.addAll(partitionKeys1);
            allKeys.addAll(partitionKeys2);
            Assert.assertEquals(allKeys, retrievedKeys);
        }
    });
}
Also used : Partition(io.cdap.cdap.api.dataset.lib.Partition) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) HashSet(java.util.HashSet) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) DataSetException(io.cdap.cdap.api.dataset.DataSetException) PartitionNotFoundException(io.cdap.cdap.api.dataset.PartitionNotFoundException) PartitionAlreadyExistsException(io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException) IOException(java.io.IOException) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) List(java.util.List) Test(org.junit.Test)

Example 60 with PartitionKey

use of io.cdap.cdap.api.dataset.lib.PartitionKey in project cdap by cdapio.

the class PartitionedFileSetTest method testPartitionMetadata.

@Test
public void testPartitionMetadata() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    dsFrameworkUtil.newTransactionExecutor((TransactionAware) dataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            PartitionKey partitionKey = PartitionKey.builder().addIntField("i", 42).addLongField("l", 17L).addStringField("s", "x").build();
            ImmutableMap<String, String> metadata = ImmutableMap.of("key1", "value", "key2", "value2", "key3", "value2");
            PartitionOutput partitionOutput = dataset.getPartitionOutput(partitionKey);
            partitionOutput.setMetadata(metadata);
            partitionOutput.addPartition();
            PartitionDetail partitionDetail = dataset.getPartition(partitionKey);
            Assert.assertNotNull(partitionDetail);
            Assert.assertEquals(metadata, partitionDetail.getMetadata().asMap());
        }
    });
}
Also used : PartitionOutput(io.cdap.cdap.api.dataset.lib.PartitionOutput) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(io.cdap.cdap.api.dataset.lib.PartitionKey) PartitionedFileSet(io.cdap.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) PartitionDetail(io.cdap.cdap.api.dataset.lib.PartitionDetail) DataSetException(io.cdap.cdap.api.dataset.DataSetException) PartitionNotFoundException(io.cdap.cdap.api.dataset.PartitionNotFoundException) PartitionAlreadyExistsException(io.cdap.cdap.api.dataset.lib.PartitionAlreadyExistsException) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Aggregations

PartitionKey (io.cdap.cdap.api.dataset.lib.PartitionKey)121 Test (org.junit.Test)55 PartitionedFileSet (io.cdap.cdap.api.dataset.lib.PartitionedFileSet)53 TransactionAware (org.apache.tephra.TransactionAware)34 TransactionExecutor (org.apache.tephra.TransactionExecutor)34 IOException (java.io.IOException)26 PartitionDetail (io.cdap.cdap.api.dataset.lib.PartitionDetail)23 ConcurrentPartitionConsumer (io.cdap.cdap.api.dataset.lib.partitioned.ConcurrentPartitionConsumer)22 PartitionConsumer (io.cdap.cdap.api.dataset.lib.partitioned.PartitionConsumer)22 ArrayList (java.util.ArrayList)22 List (java.util.List)22 HashMap (java.util.HashMap)21 ImmutableList (com.google.common.collect.ImmutableList)18 DataSetException (io.cdap.cdap.api.dataset.DataSetException)18 HashSet (java.util.HashSet)18 Partition (io.cdap.cdap.api.dataset.lib.Partition)14 ConsumerConfiguration (io.cdap.cdap.api.dataset.lib.partitioned.ConsumerConfiguration)14 DatasetId (io.cdap.cdap.proto.id.DatasetId)14 Map (java.util.Map)14 Location (org.apache.twill.filesystem.Location)14