use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.
the class TimePartitionedFileSetTest method testAddGetPartitions.
@Test
public void testAddGetPartitions() throws Exception {
final TimePartitionedFileSet fileSet = dsFrameworkUtil.getInstance(TPFS_INSTANCE);
TransactionAware txAwareDataset = (TransactionAware) fileSet;
dsFrameworkUtil.newInMemoryTransactionExecutor(txAwareDataset).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// this is an arbitrary data to use as the test time
long time = DATE_FORMAT.parse("12/10/14 5:10 am").getTime();
long time2 = time + HOUR;
String firstPath = "first/partition";
String secondPath = "second/partition";
// make sure the file set has no partitions initially
validateTimePartition(fileSet, time, null);
validateTimePartitions(fileSet, 0L, MAX, Collections.<Long, String>emptyMap());
// add a partition, verify getPartition() works
fileSet.addPartition(time, firstPath);
validateTimePartition(fileSet, time, firstPath);
Map<Long, String> expectNone = Collections.emptyMap();
Map<Long, String> expectFirst = ImmutableMap.of(time, firstPath);
Map<Long, String> expectSecond = ImmutableMap.of(time2, secondPath);
Map<Long, String> expectBoth = ImmutableMap.of(time, firstPath, time2, secondPath);
// verify various ways to list partitions with various ranges
validateTimePartitions(fileSet, time + MINUTE, MAX, expectNone);
validateTimePartitions(fileSet, 0L, time, expectNone);
validateTimePartitions(fileSet, 0L, MAX, expectFirst);
validateTimePartitions(fileSet, 0L, time + MINUTE, expectFirst);
validateTimePartitions(fileSet, 0L, time + MINUTE, expectFirst);
validateTimePartitions(fileSet, 0L, time + HOUR, expectFirst);
validateTimePartitions(fileSet, time - HOUR, time + HOUR, expectFirst);
// add and verify another partition
fileSet.addPartition(time2, secondPath);
validateTimePartition(fileSet, time2, secondPath);
// verify various ways to list partitions with various ranges
validateTimePartitions(fileSet, 0L, MAX, expectBoth);
validateTimePartitions(fileSet, time, time + 30 * MINUTE, expectFirst);
validateTimePartitions(fileSet, time + 30 * MINUTE, time2, expectNone);
validateTimePartitions(fileSet, time + 30 * MINUTE, time2 + 30 * MINUTE, expectSecond);
validateTimePartitions(fileSet, time - 30 * MINUTE, time2 + 30 * MINUTE, expectBoth);
// try to add another partition with the same key
try {
fileSet.addPartition(time2, "third/partition");
Assert.fail("Should have thrown Exception for duplicate partition");
} catch (DataSetException e) {
// expected
}
// remove first partition and validate
fileSet.dropPartition(time);
validateTimePartition(fileSet, time, null);
// verify various ways to list partitions with various ranges
validateTimePartitions(fileSet, 0L, MAX, expectSecond);
validateTimePartitions(fileSet, time, time + 30 * MINUTE, expectNone);
validateTimePartitions(fileSet, time + 30 * MINUTE, time2, expectNone);
validateTimePartitions(fileSet, time + 30 * MINUTE, time2 + 30 * MINUTE, expectSecond);
validateTimePartitions(fileSet, time - 30 * MINUTE, time2 + 30 * MINUTE, expectSecond);
// try to delete another partition with the same key
try {
fileSet.dropPartition(time);
} catch (DataSetException e) {
Assert.fail("Should not have have thrown Exception for removing non-existent partition");
}
}
});
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.
the class TimePartitionedFileSetTest method testPartitionMetadata.
@Test
public void testPartitionMetadata() throws Exception {
final TimePartitionedFileSet tpfs = dsFrameworkUtil.getInstance(TPFS_INSTANCE);
TransactionAware txAware = (TransactionAware) tpfs;
dsFrameworkUtil.newInMemoryTransactionExecutor(txAware).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() throws Exception {
// make sure the dataset has no partitions
validateTimePartitions(tpfs, 0L, MAX, Collections.<Long, String>emptyMap());
Date date = DATE_FORMAT.parse("6/4/12 10:00 am");
long time = date.getTime();
// keep track of all the metadata added
Map<String, String> allMetadata = Maps.newHashMap();
Map<String, String> metadata = ImmutableMap.of("key1", "value1", "key2", "value3", "key100", "value4");
tpfs.addPartition(time, "file", metadata);
allMetadata.putAll(metadata);
TimePartitionDetail partitionByTime = tpfs.getPartitionByTime(time);
Assert.assertNotNull(partitionByTime);
Assert.assertEquals(metadata, partitionByTime.getMetadata().asMap());
tpfs.addMetadata(time, "key3", "value4");
allMetadata.put("key3", "value4");
// using the setMetadata API, adding an entry, for a key that already exists will overwrite the previous value
tpfs.setMetadata(time, Collections.singletonMap("key3", "value5"));
allMetadata.put("key3", "value5");
Map<String, String> newMetadata = ImmutableMap.of("key4", "value4", "key5", "value5");
tpfs.addMetadata(time, newMetadata);
allMetadata.putAll(newMetadata);
try {
// attempting to update an existing key throws a DatasetException
tpfs.addMetadata(time, "key3", "value5");
Assert.fail("Expected not to be able to update an existing metadata entry");
} catch (DataSetException expected) {
}
partitionByTime = tpfs.getPartitionByTime(time);
Assert.assertNotNull(partitionByTime);
Assert.assertEquals(allMetadata, partitionByTime.getMetadata().asMap());
// remove metadata entries; specifying metadata key that does not exist ('key6') does not cause an error
tpfs.removeMetadata(time, ImmutableSet.of("key4", "key5", "key6"));
allMetadata.remove("key4");
allMetadata.remove("key5");
partitionByTime = tpfs.getPartitionByTime(time);
Assert.assertNotNull(partitionByTime);
Assert.assertEquals(allMetadata, partitionByTime.getMetadata().asMap());
}
});
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.
the class PartitionRollbackTestRun method testPFSRollback.
/*
* This tests all the following cases:
*
* 1. addPartition(location) fails because partition already exists
* 2. addPartition(location) fails because Hive partition already exists
* 3. addPartition(location) succeeds but transaction fails
* 4. getPartitionOutput() fails because partition already exists
* 5. partitionOutput.addPartition() fails because Hive partition already exists
* 6. partitionOutput.addPartition() succeeds but transaction fails
* 7. mapreduce writing partition fails because location already exists
* 8. mapreduce writing partition fails because partition already exists
* 9. mapreduce writing partition fails because Hive partition already exists
* 10. mapreduce writing dynamic partition fails because location already exists
* 11. mapreduce writing dynamic partition fails because partition already exists
* 12. mapreduce writing dynamic partition fails because Hive partition already exists
* 13. multi-output mapreduce writing partition fails because location already exists
* 13a. first output fails, other output must rollback 0 and 5
* 13b. second output fails, first output must rollback 0 and 5
* 14. multi-output mapreduce writing partition fails because partition already exists
* 14a. first output fails, other output must rollback partition 5
* 14b. second output fails, first output must rollback partition 5
* 15. multi-output mapreduce writing partition fails because Hive partition already exists
* 15a. first output fails, other output must rollback partitions 0 and 5
* 15b. second output fails, first output must rollback partitions 0 and 5
*
* For all these cases, we validate that existing files and partitions are preserved, and newly
* added files and partitions are rolled back.
*/
@Test
public void testPFSRollback() throws Exception {
ApplicationManager appManager = deployApplication(AppWritingToPartitioned.class);
MapReduceManager mrManager = appManager.getMapReduceManager(MAPREDUCE);
int numRuns = 0;
Validator pfsValidator = new Validator(PFS);
Validator otherValidator = new Validator(OTHER);
final UnitTestManager.UnitTestDatasetManager<PartitionedFileSet> pfsManager = pfsValidator.getPfsManager();
final PartitionedFileSet pfs = pfsManager.get();
final PartitionedFileSet other = otherValidator.getPfsManager().get();
final String path3 = pfsValidator.getRelativePath3();
// 1. addPartition(location) fails because partition already exists
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
pfs.addPartition(KEY_1, path3);
}
});
Assert.fail("Expected tx to fail because partition for number=1 already exists");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
// 2. addPartition(location) fails because Hive partition already exists
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
pfs.addPartition(KEY_4, path3);
}
});
Assert.fail("Expected tx to fail because hive partition for number=1 already exists");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
// 3. addPartition(location) succeeds but transaction fails
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
pfs.addPartition(KEY_3, path3);
throw new RuntimeException("fail the tx");
}
});
Assert.fail("Expected tx to fail because it threw a runtime exception");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
// 4. partitionOutput.getPartitionOutput() fails because partition already exists
try {
pfs.getPartitionOutput(KEY_1);
Assert.fail("Expected getPartitionOutput to fail, because the partition already exists.");
} catch (DataSetException expected) {
}
pfsValidator.validate();
// 5. partitionOutput.addPartition() fails because Hive partition already exists
final PartitionOutput output4x = pfs.getPartitionOutput(KEY_4);
final Location location4x = output4x.getLocation();
try (Writer writer = new OutputStreamWriter(location4x.append("file").getOutputStream())) {
writer.write("4x,4x\n");
}
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
output4x.addPartition();
}
});
Assert.fail("Expected tx to fail because hive partition for number=4 already exists");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
Assert.assertFalse(location4x.exists());
// 6. partitionOutput.addPartition() succeeds but transaction fails
final PartitionOutput output5x = pfs.getPartitionOutput(KEY_5);
final Location location5x = output5x.getLocation();
try (Writer writer = new OutputStreamWriter(location5x.append("file").getOutputStream())) {
writer.write("5x,5x\n");
}
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
output5x.addPartition();
throw new RuntimeException("fail the tx");
}
});
Assert.fail("Expected tx to fail because it threw a runtime exception");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
Assert.assertFalse(location5x.exists());
// 7. mapreduce writing partition fails because location already exists
mrManager.start(ImmutableMap.of(PFS_OUT, "1", "input.text", "1x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
// 8. mapreduce writing partition fails because partition already exists
mrManager.start(ImmutableMap.of(PFS_OUT, "2", "input.text", "2x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_2).getLocation().exists());
// 9. mapreduce writing partition fails because Hive partition already exists
mrManager.start(ImmutableMap.of(PFS_OUT, "4", "input.text", "4x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
// 10. mapreduce writing dynamic partition fails because location already exists
mrManager.start(ImmutableMap.of("input.text", "3x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 11. mapreduce writing dynamic partition fails because partition already exists
mrManager.start(ImmutableMap.of("input.text", "2x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_2).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 12. mapreduce writing dynamic partition fails because Hive partition already exists
mrManager.start(ImmutableMap.of("input.text", "0x 4x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 13. multi-output mapreduce writing partition fails because location already exists
// 13a. first output fails, other output must rollback 0 and 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "1", "input.text", "0x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(other.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
// 13b. second output fails, first output must rollback 0 and 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, OTHER_OUT, "1", "input.text", "0x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 14. multi-output mapreduce writing partition fails because partition already exists
// 14a. first output fails, other output must rollback partition 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "2", OTHER_OUT, "5", "input.text", "2x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
// 14b. second output fails, first output must rollback partition 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "5", OTHER_OUT, "2", "input.text", "2x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 15. multi-output mapreduce writing partition fails because Hive partition already exists
// 15a. first output fails, other output must rollback partitions 0 and 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "4", "input.text", "0x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
Assert.assertFalse(other.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
// 15b. second output fails, first output must rollback partitions 0 and 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, OTHER_OUT, "4", "input.text", "0x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(other.getPartitionOutput(KEY_4).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by cdapio.
the class AppWithCustomTx method recordTransaction.
/**
* If in a transaction, records the timeout that the current transaction was given, or "default" if no explicit
* timeout was given. Otherwise does nothing.
*
* Note: we know whether and what explicit timeout was given, because we inject a {@link RevealingTxSystemClient},
* which returns a {@link RevealingTransaction} for {@link TransactionSystemClient#startShort(int)} only.
*/
static void recordTransaction(DatasetContext context, String row, String column) {
TransactionCapturingTable capture = context.getDataset(CAPTURE);
Transaction tx = capture.getTx();
// we cannot cast because the RevealingTransaction is not visible in the program class loader
String value = DEFAULT;
if (tx == null) {
try {
capture.getTable().put(new Put(row, column, value));
throw new RuntimeException("put to table without transaction should have failed.");
} catch (DataSetException e) {
// expected
}
return;
}
if ("RevealingTransaction".equals(tx.getClass().getSimpleName())) {
int txTimeout;
try {
txTimeout = (int) tx.getClass().getField("timeout").get(tx);
} catch (Exception e) {
throw Throwables.propagate(e);
}
value = String.valueOf(txTimeout);
}
capture.getTable().put(new Put(row, column, value));
}
Aggregations