Search in sources :

Example 31 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class PartitionedFileSetTest method testPartitionConsumingWithFilterAndLimit.

@Test
public void testPartitionConsumingWithFilterAndLimit() throws Exception {
    final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
    final TransactionAware txAwareDataset = (TransactionAware) dataset;
    final Set<PartitionKey> partitionKeys1 = Sets.newHashSet();
    for (int i = 0; i < 10; i++) {
        partitionKeys1.add(generateUniqueKey());
    }
    final Set<PartitionKey> partitionKeys2 = Sets.newHashSet();
    for (int i = 0; i < 15; i++) {
        partitionKeys2.add(generateUniqueKey());
    }
    final SimplePartitionConsumer partitionConsumer = new SimplePartitionConsumer(dataset);
    // (consumption only happens at transaction borders)
    for (final PartitionKey partitionKey : partitionKeys1) {
        dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                dataset.getPartitionOutput(partitionKey).addPartition();
            }
        });
    }
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // Initial consumption results in the partitions corresponding to partitionKeys1 to be consumed because only
            // those partitions are added to the dataset at this point
            List<Partition> consumedPartitions = Lists.newArrayList();
            // with limit = 1, the returned iterator is only size 1, even though there are more unconsumed partitions
            Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(1));
            Assert.assertEquals(1, consumedPartitions.size());
            // ask for 5 more
            Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(5));
            Assert.assertEquals(6, consumedPartitions.size());
            // ask for 5 more, but there are only 4 more unconsumed partitions (size of partitionKeys1 is 10).
            Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(5));
            Assert.assertEquals(10, consumedPartitions.size());
            Set<PartitionKey> retrievedKeys = Sets.newHashSet();
            for (Partition consumedPartition : consumedPartitions) {
                retrievedKeys.add(consumedPartition.getPartitionKey());
            }
            Assert.assertEquals(partitionKeys1, retrievedKeys);
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            for (PartitionKey partitionKey : partitionKeys2) {
                dataset.getPartitionOutput(partitionKey).addPartition();
            }
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // using the same PartitionConsumer (which remembers the PartitionConsumerState) to consume additional
            // partitions results in only the newly added partitions (corresponding to partitionKeys2) to be returned
            List<Partition> consumedPartitions = Lists.newArrayList();
            Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(1));
            // even though we set limit to 1 in the previous call to consumePartitions, we get all the elements of
            // partitionKeys2, because they were all added in the same transaction
            Set<PartitionKey> retrievedKeys = Sets.newHashSet();
            for (Partition consumedPartition : consumedPartitions) {
                retrievedKeys.add(consumedPartition.getPartitionKey());
            }
            Assert.assertEquals(partitionKeys2, retrievedKeys);
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // consuming the partitions again, without adding any new partitions returns an empty iterator
            Assert.assertTrue(partitionConsumer.consumePartitions().isEmpty());
        }
    });
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // creating a new PartitionConsumer resets the consumption state.
            // test combination of filter and limit
            SimplePartitionConsumer newPartitionConsumer = new SimplePartitionConsumer(dataset);
            List<Partition> consumedPartitions = Lists.newArrayList();
            // the partitionFilter will match partitionKeys [1, 7), of which there are 6
            final PartitionFilter partitionFilter = PartitionFilter.builder().addRangeCondition("i", 1, 7).build();
            final Predicate<PartitionDetail> predicate = new Predicate<PartitionDetail>() {

                @Override
                public boolean apply(PartitionDetail partitionDetail) {
                    return partitionFilter.match(partitionDetail.getPartitionKey());
                }
            };
            // apply the filter (narrows it down to 6 elements) and apply a limit of 4 results in 4 consumed partitions
            Iterables.addAll(consumedPartitions, newPartitionConsumer.consumePartitions(4, predicate));
            Assert.assertEquals(4, consumedPartitions.size());
            // apply a limit of 3, using the same filter returns the remaining 2 elements that fit that filter
            Iterables.addAll(consumedPartitions, newPartitionConsumer.consumePartitions(3, predicate));
            Assert.assertEquals(6, consumedPartitions.size());
            // assert that the partitions returned have partition keys, where the i values range from [1, 7]
            Set<Integer> expectedIFields = new HashSet<>();
            for (int i = 1; i < 7; i++) {
                expectedIFields.add(i);
            }
            Set<Integer> actualIFields = new HashSet<>();
            for (Partition consumedPartition : consumedPartitions) {
                actualIFields.add((Integer) consumedPartition.getPartitionKey().getField("i"));
            }
            Assert.assertEquals(expectedIFields, actualIFields);
        }
    });
}
Also used : Partition(co.cask.cdap.api.dataset.lib.Partition) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) FileSet(co.cask.cdap.api.dataset.lib.FileSet) HashSet(java.util.HashSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionExecutor(org.apache.tephra.TransactionExecutor) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) PartitionNotFoundException(co.cask.cdap.api.dataset.PartitionNotFoundException) PartitionAlreadyExistsException(co.cask.cdap.api.dataset.lib.PartitionAlreadyExistsException) IOException(java.io.IOException) DataSetException(co.cask.cdap.api.dataset.DataSetException) Predicate(co.cask.cdap.api.Predicate) PartitionFilter(co.cask.cdap.api.dataset.lib.PartitionFilter) TransactionAware(org.apache.tephra.TransactionAware) PartitionKey(co.cask.cdap.api.dataset.lib.PartitionKey) List(java.util.List) Test(org.junit.Test)

Example 32 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class TimePartitionedFileSetTest method testInputConfigurationFailure.

private void testInputConfigurationFailure(Map<String, String> arguments, final String why) throws Exception {
    final TimePartitionedFileSet dataset = dsFrameworkUtil.getInstance(TPFS_INSTANCE, arguments);
    TransactionAware txAwareDataset = (TransactionAware) dataset;
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            try {
                dataset.getInputFormatConfiguration();
                Assert.fail("getInputFormatConfiguration should fail " + why);
            } catch (Exception e) {
            // expected
            }
        }
    });
}
Also used : TransactionAware(org.apache.tephra.TransactionAware) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) TransactionFailureException(org.apache.tephra.TransactionFailureException) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) IOException(java.io.IOException) DataSetException(co.cask.cdap.api.dataset.DataSetException)

Example 33 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class TimePartitionedFileSetTest method testAddGetPartitions.

@Test
public void testAddGetPartitions() throws Exception {
    final TimePartitionedFileSet fileSet = dsFrameworkUtil.getInstance(TPFS_INSTANCE);
    TransactionAware txAwareDataset = (TransactionAware) fileSet;
    dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            // this is an arbitrary data to use as the test time
            long time = DATE_FORMAT.parse("12/10/14 5:10 am").getTime();
            long time2 = time + HOUR;
            String firstPath = "first/partition";
            String secondPath = "second/partition";
            // make sure the file set has no partitions initially
            validateTimePartition(fileSet, time, null);
            validateTimePartitions(fileSet, 0L, MAX, Collections.<Long, String>emptyMap());
            // add a partition, verify getPartition() works
            fileSet.addPartition(time, firstPath);
            validateTimePartition(fileSet, time, firstPath);
            Map<Long, String> expectNone = Collections.emptyMap();
            Map<Long, String> expectFirst = ImmutableMap.of(time, firstPath);
            Map<Long, String> expectSecond = ImmutableMap.of(time2, secondPath);
            Map<Long, String> expectBoth = ImmutableMap.of(time, firstPath, time2, secondPath);
            // verify various ways to list partitions with various ranges
            validateTimePartitions(fileSet, time + MINUTE, MAX, expectNone);
            validateTimePartitions(fileSet, 0L, time, expectNone);
            validateTimePartitions(fileSet, 0L, MAX, expectFirst);
            validateTimePartitions(fileSet, 0L, time + MINUTE, expectFirst);
            validateTimePartitions(fileSet, 0L, time + MINUTE, expectFirst);
            validateTimePartitions(fileSet, 0L, time + HOUR, expectFirst);
            validateTimePartitions(fileSet, time - HOUR, time + HOUR, expectFirst);
            // add and verify another partition
            fileSet.addPartition(time2, secondPath);
            validateTimePartition(fileSet, time2, secondPath);
            // verify various ways to list partitions with various ranges
            validateTimePartitions(fileSet, 0L, MAX, expectBoth);
            validateTimePartitions(fileSet, time, time + 30 * MINUTE, expectFirst);
            validateTimePartitions(fileSet, time + 30 * MINUTE, time2, expectNone);
            validateTimePartitions(fileSet, time + 30 * MINUTE, time2 + 30 * MINUTE, expectSecond);
            validateTimePartitions(fileSet, time - 30 * MINUTE, time2 + 30 * MINUTE, expectBoth);
            // try to add another partition with the same key
            try {
                fileSet.addPartition(time2, "third/partition");
                Assert.fail("Should have thrown Exception for duplicate partition");
            } catch (DataSetException e) {
            // expected
            }
            // remove first partition and validate
            fileSet.dropPartition(time);
            validateTimePartition(fileSet, time, null);
            // verify various ways to list partitions with various ranges
            validateTimePartitions(fileSet, 0L, MAX, expectSecond);
            validateTimePartitions(fileSet, time, time + 30 * MINUTE, expectNone);
            validateTimePartitions(fileSet, time + 30 * MINUTE, time2, expectNone);
            validateTimePartitions(fileSet, time + 30 * MINUTE, time2 + 30 * MINUTE, expectSecond);
            validateTimePartitions(fileSet, time - 30 * MINUTE, time2 + 30 * MINUTE, expectSecond);
            // try to delete  another partition with the same key
            try {
                fileSet.dropPartition(time);
            } catch (DataSetException e) {
                Assert.fail("Should not have have thrown Exception for removing non-existent partition");
            }
        }
    });
}
Also used : DataSetException(co.cask.cdap.api.dataset.DataSetException) TransactionAware(org.apache.tephra.TransactionAware) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) TransactionFailureException(org.apache.tephra.TransactionFailureException) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) IOException(java.io.IOException) DataSetException(co.cask.cdap.api.dataset.DataSetException) Test(org.junit.Test)

Example 34 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class TimePartitionedFileSetTest method testInputPartitionPaths.

/**
 * Tests that the TPFS sets the file input paths correctly for the input time range.
 */
@Test
public void testInputPartitionPaths() throws Exception {
    // make sure the dataset has no partitions
    final TimePartitionedFileSet tpfs = dsFrameworkUtil.getInstance(TPFS_INSTANCE);
    TransactionAware txAwareDataset = (TransactionAware) tpfs;
    TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset);
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            validateTimePartitions(tpfs, 0L, MAX, Collections.<Long, String>emptyMap());
        }
    });
    Date date = DATE_FORMAT.parse("6/4/12 10:00 am");
    final long time = date.getTime();
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            tpfs.addPartition(time, "file");
            tpfs.addPartition(time + 5 * MINUTE, "file5");
            tpfs.addPartition(time + 10 * MINUTE, "file10");
            tpfs.addPartition(time + 12 * MINUTE, "file12");
        }
    });
    validateInputPaths(time, -10, -5);
    validateInputPaths(time, -10, 2, "file");
    validateInputPaths(time, 1, 11, "file5", "file10");
    validateInputPaths(time, 1, 15, "file5", "file10", "file12");
    validateInputPaths(time, 5, 10, "file5");
}
Also used : TransactionAware(org.apache.tephra.TransactionAware) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimePartitionedFileSet(co.cask.cdap.api.dataset.lib.TimePartitionedFileSet) TransactionFailureException(org.apache.tephra.TransactionFailureException) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) IOException(java.io.IOException) DataSetException(co.cask.cdap.api.dataset.DataSetException) Date(java.util.Date) Test(org.junit.Test)

Example 35 with TransactionAware

use of org.apache.tephra.TransactionAware in project cdap by caskdata.

the class CubeDatasetTest method testTxRetryOnFailure.

@Test
public void testTxRetryOnFailure() throws Exception {
    // This test ensures that there's no non-transactional cache used in cube dataset. For that, it
    // 1) simulates transaction conflict for the first write to cube
    // 2) attempts to write again, writes successfully
    // 3) uses second cube instance to read the result
    // 
    // In case there's a non-transactional cache used in cube, it would fill entity mappings in the first tx, and only
    // use them to write data. Hence, when reading - there will be no mapping in entity table to decode, as first tx
    // that wrote it is not visible (was aborted on conflict).
    Aggregation agg1 = new DefaultAggregation(ImmutableList.of("dim1", "dim2", "dim3"));
    int resolution = 1;
    Cube cube1 = getCubeInternal("concurrCube", new int[] { resolution }, ImmutableMap.of("agg1", agg1));
    Cube cube2 = getCubeInternal("concurrCube", new int[] { resolution }, ImmutableMap.of("agg1", agg1));
    Configuration txConf = HBaseConfiguration.create();
    TransactionManager txManager = new TransactionManager(txConf);
    txManager.startAndWait();
    try {
        TransactionSystemClient txClient = new InMemoryTxSystemClient(txManager);
        // 1) write and abort after commit to simlate conflict
        Transaction tx = txClient.startShort();
        ((TransactionAware) cube1).startTx(tx);
        writeInc(cube1, "metric1", 1, 1, "1", "1", "1");
        ((TransactionAware) cube1).commitTx();
        txClient.abort(tx);
        ((TransactionAware) cube1).rollbackTx();
        // 2) write successfully
        tx = txClient.startShort();
        ((TransactionAware) cube1).startTx(tx);
        writeInc(cube1, "metric1", 1, 1, "1", "1", "1");
        // let's pretend we had conflict and rollback it
        ((TransactionAware) cube1).commitTx();
        txClient.commitOrThrow(tx);
        ((TransactionAware) cube1).postTxCommit();
        // 3) read using different cube instance
        tx = txClient.startShort();
        ((TransactionAware) cube2).startTx(tx);
        verifyCountQuery(cube2, 0, 2, resolution, "metric1", AggregationFunction.SUM, new HashMap<String, String>(), new ArrayList<String>(), ImmutableList.of(new TimeSeries("metric1", new HashMap<String, String>(), timeValues(1, 1))));
        // let's pretend we had conflict and rollback it
        ((TransactionAware) cube2).commitTx();
        txClient.commitOrThrow(tx);
        ((TransactionAware) cube2).postTxCommit();
    } finally {
        txManager.stopAndWait();
    }
}
Also used : TimeSeries(co.cask.cdap.api.dataset.lib.cube.TimeSeries) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) InMemoryTxSystemClient(org.apache.tephra.inmemory.InMemoryTxSystemClient) TransactionSystemClient(org.apache.tephra.TransactionSystemClient) Transaction(org.apache.tephra.Transaction) Cube(co.cask.cdap.api.dataset.lib.cube.Cube) TransactionManager(org.apache.tephra.TransactionManager) TransactionAware(org.apache.tephra.TransactionAware) Test(org.junit.Test)

Aggregations

TransactionAware (org.apache.tephra.TransactionAware)97 Test (org.junit.Test)65 TransactionExecutor (org.apache.tephra.TransactionExecutor)48 Table (co.cask.cdap.api.dataset.table.Table)39 Transaction (org.apache.tephra.Transaction)34 IOException (java.io.IOException)28 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)24 DatasetAdmin (co.cask.cdap.api.dataset.DatasetAdmin)23 HBaseTable (co.cask.cdap.data2.dataset2.lib.table.hbase.HBaseTable)21 PartitionKey (co.cask.cdap.api.dataset.lib.PartitionKey)19 DataSetException (co.cask.cdap.api.dataset.DataSetException)16 List (java.util.List)13 Put (co.cask.cdap.api.dataset.table.Put)12 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)11 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)11 Row (co.cask.cdap.api.dataset.table.Row)11 ArrayList (java.util.ArrayList)11 HashSet (java.util.HashSet)11 TransactionFailureException (org.apache.tephra.TransactionFailureException)11 PartitionNotFoundException (co.cask.cdap.api.dataset.PartitionNotFoundException)10