use of org.apache.tephra.TransactionAware in project cdap by caskdata.
the class PartitionedFileSetTest method testPartitionConsumingWithFilterAndLimit.
@Test
public void testPartitionConsumingWithFilterAndLimit() throws Exception {
final PartitionedFileSet dataset = dsFrameworkUtil.getInstance(pfsInstance);
final TransactionAware txAwareDataset = (TransactionAware) dataset;
final Set<PartitionKey> partitionKeys1 = Sets.newHashSet();
for (int i = 0; i < 10; i++) {
partitionKeys1.add(generateUniqueKey());
}
final Set<PartitionKey> partitionKeys2 = Sets.newHashSet();
for (int i = 0; i < 15; i++) {
partitionKeys2.add(generateUniqueKey());
}
final SimplePartitionConsumer partitionConsumer = new SimplePartitionConsumer(dataset);
// (consumption only happens at transaction borders)
for (final PartitionKey partitionKey : partitionKeys1) {
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
dataset.getPartitionOutput(partitionKey).addPartition();
}
});
}
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// Initial consumption results in the partitions corresponding to partitionKeys1 to be consumed because only
// those partitions are added to the dataset at this point
List<Partition> consumedPartitions = Lists.newArrayList();
// with limit = 1, the returned iterator is only size 1, even though there are more unconsumed partitions
Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(1));
Assert.assertEquals(1, consumedPartitions.size());
// ask for 5 more
Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(5));
Assert.assertEquals(6, consumedPartitions.size());
// ask for 5 more, but there are only 4 more unconsumed partitions (size of partitionKeys1 is 10).
Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(5));
Assert.assertEquals(10, consumedPartitions.size());
Set<PartitionKey> retrievedKeys = Sets.newHashSet();
for (Partition consumedPartition : consumedPartitions) {
retrievedKeys.add(consumedPartition.getPartitionKey());
}
Assert.assertEquals(partitionKeys1, retrievedKeys);
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
for (PartitionKey partitionKey : partitionKeys2) {
dataset.getPartitionOutput(partitionKey).addPartition();
}
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// using the same PartitionConsumer (which remembers the PartitionConsumerState) to consume additional
// partitions results in only the newly added partitions (corresponding to partitionKeys2) to be returned
List<Partition> consumedPartitions = Lists.newArrayList();
Iterables.addAll(consumedPartitions, partitionConsumer.consumePartitions(1));
// even though we set limit to 1 in the previous call to consumePartitions, we get all the elements of
// partitionKeys2, because they were all added in the same transaction
Set<PartitionKey> retrievedKeys = Sets.newHashSet();
for (Partition consumedPartition : consumedPartitions) {
retrievedKeys.add(consumedPartition.getPartitionKey());
}
Assert.assertEquals(partitionKeys2, retrievedKeys);
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// consuming the partitions again, without adding any new partitions returns an empty iterator
Assert.assertTrue(partitionConsumer.consumePartitions().isEmpty());
}
});
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// creating a new PartitionConsumer resets the consumption state.
// test combination of filter and limit
SimplePartitionConsumer newPartitionConsumer = new SimplePartitionConsumer(dataset);
List<Partition> consumedPartitions = Lists.newArrayList();
// the partitionFilter will match partitionKeys [1, 7), of which there are 6
final PartitionFilter partitionFilter = PartitionFilter.builder().addRangeCondition("i", 1, 7).build();
final Predicate<PartitionDetail> predicate = new Predicate<PartitionDetail>() {
@Override
public boolean apply(PartitionDetail partitionDetail) {
return partitionFilter.match(partitionDetail.getPartitionKey());
}
};
// apply the filter (narrows it down to 6 elements) and apply a limit of 4 results in 4 consumed partitions
Iterables.addAll(consumedPartitions, newPartitionConsumer.consumePartitions(4, predicate));
Assert.assertEquals(4, consumedPartitions.size());
// apply a limit of 3, using the same filter returns the remaining 2 elements that fit that filter
Iterables.addAll(consumedPartitions, newPartitionConsumer.consumePartitions(3, predicate));
Assert.assertEquals(6, consumedPartitions.size());
// assert that the partitions returned have partition keys, where the i values range from [1, 7]
Set<Integer> expectedIFields = new HashSet<>();
for (int i = 1; i < 7; i++) {
expectedIFields.add(i);
}
Set<Integer> actualIFields = new HashSet<>();
for (Partition consumedPartition : consumedPartitions) {
actualIFields.add((Integer) consumedPartition.getPartitionKey().getField("i"));
}
Assert.assertEquals(expectedIFields, actualIFields);
}
});
}
use of org.apache.tephra.TransactionAware in project cdap by caskdata.
the class TimePartitionedFileSetTest method testInputConfigurationFailure.
private void testInputConfigurationFailure(Map<String, String> arguments, final String why) throws Exception {
final TimePartitionedFileSet dataset = dsFrameworkUtil.getInstance(TPFS_INSTANCE, arguments);
TransactionAware txAwareDataset = (TransactionAware) dataset;
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
try {
dataset.getInputFormatConfiguration();
Assert.fail("getInputFormatConfiguration should fail " + why);
} catch (Exception e) {
// expected
}
}
});
}
use of org.apache.tephra.TransactionAware in project cdap by caskdata.
the class TimePartitionedFileSetTest method testAddGetPartitions.
@Test
public void testAddGetPartitions() throws Exception {
final TimePartitionedFileSet fileSet = dsFrameworkUtil.getInstance(TPFS_INSTANCE);
TransactionAware txAwareDataset = (TransactionAware) fileSet;
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// this is an arbitrary data to use as the test time
long time = DATE_FORMAT.parse("12/10/14 5:10 am").getTime();
long time2 = time + HOUR;
String firstPath = "first/partition";
String secondPath = "second/partition";
// make sure the file set has no partitions initially
validateTimePartition(fileSet, time, null);
validateTimePartitions(fileSet, 0L, MAX, Collections.<Long, String>emptyMap());
// add a partition, verify getPartition() works
fileSet.addPartition(time, firstPath);
validateTimePartition(fileSet, time, firstPath);
Map<Long, String> expectNone = Collections.emptyMap();
Map<Long, String> expectFirst = ImmutableMap.of(time, firstPath);
Map<Long, String> expectSecond = ImmutableMap.of(time2, secondPath);
Map<Long, String> expectBoth = ImmutableMap.of(time, firstPath, time2, secondPath);
// verify various ways to list partitions with various ranges
validateTimePartitions(fileSet, time + MINUTE, MAX, expectNone);
validateTimePartitions(fileSet, 0L, time, expectNone);
validateTimePartitions(fileSet, 0L, MAX, expectFirst);
validateTimePartitions(fileSet, 0L, time + MINUTE, expectFirst);
validateTimePartitions(fileSet, 0L, time + MINUTE, expectFirst);
validateTimePartitions(fileSet, 0L, time + HOUR, expectFirst);
validateTimePartitions(fileSet, time - HOUR, time + HOUR, expectFirst);
// add and verify another partition
fileSet.addPartition(time2, secondPath);
validateTimePartition(fileSet, time2, secondPath);
// verify various ways to list partitions with various ranges
validateTimePartitions(fileSet, 0L, MAX, expectBoth);
validateTimePartitions(fileSet, time, time + 30 * MINUTE, expectFirst);
validateTimePartitions(fileSet, time + 30 * MINUTE, time2, expectNone);
validateTimePartitions(fileSet, time + 30 * MINUTE, time2 + 30 * MINUTE, expectSecond);
validateTimePartitions(fileSet, time - 30 * MINUTE, time2 + 30 * MINUTE, expectBoth);
// try to add another partition with the same key
try {
fileSet.addPartition(time2, "third/partition");
Assert.fail("Should have thrown Exception for duplicate partition");
} catch (DataSetException e) {
// expected
}
// remove first partition and validate
fileSet.dropPartition(time);
validateTimePartition(fileSet, time, null);
// verify various ways to list partitions with various ranges
validateTimePartitions(fileSet, 0L, MAX, expectSecond);
validateTimePartitions(fileSet, time, time + 30 * MINUTE, expectNone);
validateTimePartitions(fileSet, time + 30 * MINUTE, time2, expectNone);
validateTimePartitions(fileSet, time + 30 * MINUTE, time2 + 30 * MINUTE, expectSecond);
validateTimePartitions(fileSet, time - 30 * MINUTE, time2 + 30 * MINUTE, expectSecond);
// try to delete another partition with the same key
try {
fileSet.dropPartition(time);
} catch (DataSetException e) {
Assert.fail("Should not have have thrown Exception for removing non-existent partition");
}
}
});
}
use of org.apache.tephra.TransactionAware in project cdap by caskdata.
the class TimePartitionedFileSetTest method testInputPartitionPaths.
/**
* Tests that the TPFS sets the file input paths correctly for the input time range.
*/
@Test
public void testInputPartitionPaths() throws Exception {
// make sure the dataset has no partitions
final TimePartitionedFileSet tpfs = dsFrameworkUtil.getInstance(TPFS_INSTANCE);
TransactionAware txAwareDataset = (TransactionAware) tpfs;
TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset);
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
validateTimePartitions(tpfs, 0L, MAX, Collections.<Long, String>emptyMap());
}
});
Date date = DATE_FORMAT.parse("6/4/12 10:00 am");
final long time = date.getTime();
txnl.execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
tpfs.addPartition(time, "file");
tpfs.addPartition(time + 5 * MINUTE, "file5");
tpfs.addPartition(time + 10 * MINUTE, "file10");
tpfs.addPartition(time + 12 * MINUTE, "file12");
}
});
validateInputPaths(time, -10, -5);
validateInputPaths(time, -10, 2, "file");
validateInputPaths(time, 1, 11, "file5", "file10");
validateInputPaths(time, 1, 15, "file5", "file10", "file12");
validateInputPaths(time, 5, 10, "file5");
}
use of org.apache.tephra.TransactionAware in project cdap by caskdata.
the class CubeDatasetTest method testTxRetryOnFailure.
@Test
public void testTxRetryOnFailure() throws Exception {
// This test ensures that there's no non-transactional cache used in cube dataset. For that, it
// 1) simulates transaction conflict for the first write to cube
// 2) attempts to write again, writes successfully
// 3) uses second cube instance to read the result
//
// In case there's a non-transactional cache used in cube, it would fill entity mappings in the first tx, and only
// use them to write data. Hence, when reading - there will be no mapping in entity table to decode, as first tx
// that wrote it is not visible (was aborted on conflict).
Aggregation agg1 = new DefaultAggregation(ImmutableList.of("dim1", "dim2", "dim3"));
int resolution = 1;
Cube cube1 = getCubeInternal("concurrCube", new int[] { resolution }, ImmutableMap.of("agg1", agg1));
Cube cube2 = getCubeInternal("concurrCube", new int[] { resolution }, ImmutableMap.of("agg1", agg1));
Configuration txConf = HBaseConfiguration.create();
TransactionManager txManager = new TransactionManager(txConf);
txManager.startAndWait();
try {
TransactionSystemClient txClient = new InMemoryTxSystemClient(txManager);
// 1) write and abort after commit to simlate conflict
Transaction tx = txClient.startShort();
((TransactionAware) cube1).startTx(tx);
writeInc(cube1, "metric1", 1, 1, "1", "1", "1");
((TransactionAware) cube1).commitTx();
txClient.abort(tx);
((TransactionAware) cube1).rollbackTx();
// 2) write successfully
tx = txClient.startShort();
((TransactionAware) cube1).startTx(tx);
writeInc(cube1, "metric1", 1, 1, "1", "1", "1");
// let's pretend we had conflict and rollback it
((TransactionAware) cube1).commitTx();
txClient.commitOrThrow(tx);
((TransactionAware) cube1).postTxCommit();
// 3) read using different cube instance
tx = txClient.startShort();
((TransactionAware) cube2).startTx(tx);
verifyCountQuery(cube2, 0, 2, resolution, "metric1", AggregationFunction.SUM, new HashMap<String, String>(), new ArrayList<String>(), ImmutableList.of(new TimeSeries("metric1", new HashMap<String, String>(), timeValues(1, 1))));
// let's pretend we had conflict and rollback it
((TransactionAware) cube2).commitTx();
txClient.commitOrThrow(tx);
((TransactionAware) cube2).postTxCommit();
} finally {
txManager.stopAndWait();
}
}
Aggregations