use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.
the class PartitionRollbackTestRun method testPFSRollback.
/*
* This tests all the following cases:
*
* 1. addPartition(location) fails because partition already exists
* 2. addPartition(location) fails because Hive partition already exists
* 3. addPartition(location) succeeds but transaction fails
* 4. getPartitionOutput() fails because partition already exists
* 5. partitionOutput.addPartition() fails because Hive partition already exists
* 6. partitionOutput.addPartition() succeeds but transaction fails
* 7. mapreduce writing partition fails because location already exists
* 8. mapreduce writing partition fails because partition already exists
* 9. mapreduce writing partition fails because Hive partition already exists
* 10. mapreduce writing dynamic partition fails because location already exists
* 11. mapreduce writing dynamic partition fails because partition already exists
* 12. mapreduce writing dynamic partition fails because Hive partition already exists
* 13. multi-output mapreduce writing partition fails because location already exists
* 13a. first output fails, other output must rollback 0 and 5
* 13b. second output fails, first output must rollback 0 and 5
* 14. multi-output mapreduce writing partition fails because partition already exists
* 14a. first output fails, other output must rollback partition 5
* 14b. second output fails, first output must rollback partition 5
* 15. multi-output mapreduce writing partition fails because Hive partition already exists
* 15a. first output fails, other output must rollback partitions 0 and 5
* 15b. second output fails, first output must rollback partitions 0 and 5
*
* For all these cases, we validate that existing files and partitions are preserved, and newly
* added files and partitions are rolled back.
*/
@Test
public void testPFSRollback() throws Exception {
ApplicationManager appManager = deployApplication(AppWritingToPartitioned.class);
MapReduceManager mrManager = appManager.getMapReduceManager(MAPREDUCE);
int numRuns = 0;
Validator pfsValidator = new Validator(PFS);
Validator otherValidator = new Validator(OTHER);
final UnitTestManager.UnitTestDatasetManager<PartitionedFileSet> pfsManager = pfsValidator.getPfsManager();
final PartitionedFileSet pfs = pfsManager.get();
final PartitionedFileSet other = otherValidator.getPfsManager().get();
final String path3 = pfsValidator.getRelativePath3();
// 1. addPartition(location) fails because partition already exists
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
pfs.addPartition(KEY_1, path3);
}
});
Assert.fail("Expected tx to fail because partition for number=1 already exists");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
// 2. addPartition(location) fails because Hive partition already exists
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
pfs.addPartition(KEY_4, path3);
}
});
Assert.fail("Expected tx to fail because hive partition for number=1 already exists");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
// 3. addPartition(location) succeeds but transaction fails
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
pfs.addPartition(KEY_3, path3);
throw new RuntimeException("fail the tx");
}
});
Assert.fail("Expected tx to fail because it threw a runtime exception");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
// 4. partitionOutput.getPartitionOutput() fails because partition already exists
try {
pfs.getPartitionOutput(KEY_1);
Assert.fail("Expected getPartitionOutput to fail, because the partition already exists.");
} catch (DataSetException expected) {
}
pfsValidator.validate();
// 5. partitionOutput.addPartition() fails because Hive partition already exists
final PartitionOutput output4x = pfs.getPartitionOutput(KEY_4);
final Location location4x = output4x.getLocation();
try (Writer writer = new OutputStreamWriter(location4x.append("file").getOutputStream())) {
writer.write("4x,4x\n");
}
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
output4x.addPartition();
}
});
Assert.fail("Expected tx to fail because hive partition for number=4 already exists");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
Assert.assertFalse(location4x.exists());
// 6. partitionOutput.addPartition() succeeds but transaction fails
final PartitionOutput output5x = pfs.getPartitionOutput(KEY_5);
final Location location5x = output5x.getLocation();
try (Writer writer = new OutputStreamWriter(location5x.append("file").getOutputStream())) {
writer.write("5x,5x\n");
}
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
output5x.addPartition();
throw new RuntimeException("fail the tx");
}
});
Assert.fail("Expected tx to fail because it threw a runtime exception");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
Assert.assertFalse(location5x.exists());
// 7. mapreduce writing partition fails because location already exists
mrManager.start(ImmutableMap.of(PFS_OUT, "1", "input.text", "1x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
// 8. mapreduce writing partition fails because partition already exists
mrManager.start(ImmutableMap.of(PFS_OUT, "2", "input.text", "2x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_2).getLocation().exists());
// 9. mapreduce writing partition fails because Hive partition already exists
mrManager.start(ImmutableMap.of(PFS_OUT, "4", "input.text", "4x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
// 10. mapreduce writing dynamic partition fails because location already exists
mrManager.start(ImmutableMap.of("input.text", "3x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 11. mapreduce writing dynamic partition fails because partition already exists
mrManager.start(ImmutableMap.of("input.text", "2x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_2).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 12. mapreduce writing dynamic partition fails because Hive partition already exists
mrManager.start(ImmutableMap.of("input.text", "0x 4x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 13. multi-output mapreduce writing partition fails because location already exists
// 13a. first output fails, other output must rollback 0 and 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "1", "input.text", "0x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(other.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
// 13b. second output fails, first output must rollback 0 and 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, OTHER_OUT, "1", "input.text", "0x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 14. multi-output mapreduce writing partition fails because partition already exists
// 14a. first output fails, other output must rollback partition 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "2", OTHER_OUT, "5", "input.text", "2x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
// 14b. second output fails, first output must rollback partition 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "5", OTHER_OUT, "2", "input.text", "2x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 15. multi-output mapreduce writing partition fails because Hive partition already exists
// 15a. first output fails, other output must rollback partitions 0 and 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "4", "input.text", "0x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
Assert.assertFalse(other.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
// 15b. second output fails, first output must rollback partitions 0 and 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, OTHER_OUT, "4", "input.text", "0x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(other.getPartitionOutput(KEY_4).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.
the class AppWithCustomTx method recordTransaction.
/**
* If in a transaction, records the timeout that the current transaction was given, or "default" if no explicit
* timeout was given. Otherwise does nothing.
*
* Note: we know whether and what explicit timeout was given, because we inject a {@link RevealingTxSystemClient},
* which returns a {@link RevealingTransaction} for {@link TransactionSystemClient#startShort(int)} only.
*/
static void recordTransaction(DatasetContext context, String row, String column) {
TransactionCapturingTable capture = context.getDataset(CAPTURE);
Transaction tx = capture.getTx();
// we cannot cast because the RevealingTransaction is not visible in the program class loader
String value = DEFAULT;
if (tx == null) {
try {
capture.getTable().put(new Put(row, column, value));
throw new RuntimeException("put to table without transaction should have failed.");
} catch (DataSetException e) {
// expected
}
return;
}
if ("RevealingTransaction".equals(tx.getClass().getSimpleName())) {
int txTimeout;
try {
txTimeout = (int) tx.getClass().getField("timeout").get(tx);
} catch (Exception e) {
throw Throwables.propagate(e);
}
value = String.valueOf(txTimeout);
}
capture.getTable().put(new Put(row, column, value));
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.
the class PartitionedFileSetDataset method dropPartition.
@WriteOnly
@Override
public void dropPartition(PartitionKey key) {
byte[] rowKey = generateRowKey(key, partitioning);
PartitionDetail partition = getPartition(key);
if (partition == null) {
// silently ignore non-existing partitions
return;
}
// TODO: make DDL operations transactional [CDAP-1393]
dropPartitionFromExplore(key);
partitionsTable.delete(rowKey);
if (!isExternal) {
Location partitionLocation = partition.getLocation();
try {
if (partitionLocation.exists()) {
Location dstLocation = getQuarantineLocation().append(partition.getRelativePath());
Location dstParent = Locations.getParent(dstLocation);
// shouldn't be null, since dstLocation was created by appending to a location, so it must have a parent
Preconditions.checkNotNull(dstParent);
// before moving into quarantine, we need to ensure that parent location exists
if (!dstParent.exists()) {
if (!dstParent.mkdirs()) {
throw new DataSetException(String.format("Failed to create parent directory %s", dstParent));
}
}
partitionLocation.renameTo(dstLocation);
}
} catch (IOException ioe) {
throw new DataSetException(String.format("Failed to move location %s into quarantine", partitionLocation), ioe);
}
operationsInThisTx.add(new DropPartitionOperation(key, partition.getRelativePath()));
}
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.
the class BufferingTable method get.
@ReadOnly
@Override
public Row get(byte[] row, byte[] startColumn, byte[] stopColumn, int limit) {
ensureTransactionIsStarted();
reportRead(1);
// checking if the row was deleted inside this tx
NavigableMap<byte[], Update> buffCols = buff.get(row);
// potential improvement: do not fetch columns available in in-mem buffer (we know them at this point)
try {
Map<byte[], byte[]> persistedCols = getPersisted(row, startColumn, stopColumn, limit);
// adding server cols, and then overriding with buffered values
NavigableMap<byte[], byte[]> result = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
if (persistedCols != null) {
result.putAll(persistedCols);
}
if (buffCols != null) {
buffCols = getRange(buffCols, startColumn, stopColumn, limit);
// null valued columns in in-memory buffer are deletes, so we need to delete them from the result list
mergeToPersisted(result, buffCols, null);
}
// applying limit
return new Result(row, head(result, limit));
} catch (Exception e) {
LOG.debug("get failed for table: " + getTransactionAwareName() + ", row: " + Bytes.toStringBinary(row), e);
throw new DataSetException("get failed", e);
}
}
use of io.cdap.cdap.api.dataset.DataSetException in project cdap by caskdata.
the class BufferingTable method get.
@ReadOnly
@Override
public List<Row> get(List<Get> gets) {
ensureTransactionIsStarted();
try {
// get persisted, then overwrite with whats buffered
List<Map<byte[], byte[]>> persistedRows = getPersisted(gets);
// gets and rows lists are always of the same size
Preconditions.checkArgument(gets.size() == persistedRows.size(), "Invalid number of rows fetched when performing multi-get. There must be one row for each get.");
List<Row> result = Lists.newArrayListWithCapacity(persistedRows.size());
Iterator<Map<byte[], byte[]>> persistedRowsIter = persistedRows.iterator();
Iterator<Get> getIter = gets.iterator();
while (persistedRowsIter.hasNext() && getIter.hasNext()) {
Get get = getIter.next();
Map<byte[], byte[]> persistedRow = persistedRowsIter.next();
// navigable copy of the persisted data. Implementation may return immutable or unmodifiable maps,
// so we make a copy here.
NavigableMap<byte[], byte[]> rowColumns = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
rowColumns.putAll(persistedRow);
byte[] row = get.getRow();
NavigableMap<byte[], Update> buffCols = buff.get(row);
// merge what was in the buffer and what was persisted
if (buffCols != null) {
List<byte[]> getColumns = get.getColumns();
byte[][] columns = getColumns == null ? null : getColumns.toArray(new byte[getColumns.size()][]);
mergeToPersisted(rowColumns, buffCols, columns);
}
result.add(new Result(row, unwrapDeletes(rowColumns)));
}
return result;
} catch (Exception e) {
LOG.debug("multi-get failed for table: " + getTransactionAwareName(), e);
throw new DataSetException("multi-get failed", e);
}
}
Aggregations