use of org.apache.tephra.TransactionFailureException in project cdap by caskdata.
the class PartitionRollbackTestRun method testPFSRollback.
/*
* This tests all the following cases:
*
* 1. addPartition(location) fails because partition already exists
* 2. addPartition(location) fails because Hive partition already exists
* 3. addPartition(location) succeeds but transaction fails
* 4. getPartitionOutput() fails because partition already exists
* 5. partitionOutput.addPartition() fails because Hive partition already exists
* 6. partitionOutput.addPartition() succeeds but transaction fails
* 7. mapreduce writing partition fails because location already exists
* 8. mapreduce writing partition fails because partition already exists
* 9. mapreduce writing partition fails because Hive partition already exists
* 10. mapreduce writing dynamic partition fails because location already exists
* 11. mapreduce writing dynamic partition fails because partition already exists
* 12. mapreduce writing dynamic partition fails because Hive partition already exists
* 13. multi-output mapreduce writing partition fails because location already exists
* 13a. first output fails, other output must rollback 0 and 5
* 13b. second output fails, first output must rollback 0 and 5
* 14. multi-output mapreduce writing partition fails because partition already exists
* 14a. first output fails, other output must rollback partition 5
* 14b. second output fails, first output must rollback partition 5
* 15. multi-output mapreduce writing partition fails because Hive partition already exists
* 15a. first output fails, other output must rollback partitions 0 and 5
* 15b. second output fails, first output must rollback partitions 0 and 5
*
* For all these cases, we validate that existing files and partitions are preserved, and newly
* added files and partitions are rolled back.
*/
@Test
public void testPFSRollback() throws Exception {
ApplicationManager appManager = deployApplication(AppWritingToPartitioned.class);
MapReduceManager mrManager = appManager.getMapReduceManager(MAPREDUCE);
int numRuns = 0;
Validator pfsValidator = new Validator(PFS);
Validator otherValidator = new Validator(OTHER);
final UnitTestManager.UnitTestDatasetManager<PartitionedFileSet> pfsManager = pfsValidator.getPfsManager();
final PartitionedFileSet pfs = pfsManager.get();
final PartitionedFileSet other = otherValidator.getPfsManager().get();
final String path3 = pfsValidator.getRelativePath3();
// 1. addPartition(location) fails because partition already exists
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
pfs.addPartition(KEY_1, path3);
}
});
Assert.fail("Expected tx to fail because partition for number=1 already exists");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
// 2. addPartition(location) fails because Hive partition already exists
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
pfs.addPartition(KEY_4, path3);
}
});
Assert.fail("Expected tx to fail because hive partition for number=1 already exists");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
// 3. addPartition(location) succeeds but transaction fails
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
pfs.addPartition(KEY_3, path3);
throw new RuntimeException("fail the tx");
}
});
Assert.fail("Expected tx to fail because it threw a runtime exception");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
// 4. partitionOutput.getPartitionOutput() fails because partition already exists
try {
pfs.getPartitionOutput(KEY_1);
Assert.fail("Expected getPartitionOutput to fail, because the partition already exists.");
} catch (DataSetException expected) {
}
pfsValidator.validate();
// 5. partitionOutput.addPartition() fails because Hive partition already exists
final PartitionOutput output4x = pfs.getPartitionOutput(KEY_4);
final Location location4x = output4x.getLocation();
try (Writer writer = new OutputStreamWriter(location4x.append("file").getOutputStream())) {
writer.write("4x,4x\n");
}
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
output4x.addPartition();
}
});
Assert.fail("Expected tx to fail because hive partition for number=4 already exists");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
Assert.assertFalse(location4x.exists());
// 6. partitionOutput.addPartition() succeeds but transaction fails
final PartitionOutput output5x = pfs.getPartitionOutput(KEY_5);
final Location location5x = output5x.getLocation();
try (Writer writer = new OutputStreamWriter(location5x.append("file").getOutputStream())) {
writer.write("5x,5x\n");
}
try {
pfsManager.execute(new Runnable() {
@Override
public void run() {
output5x.addPartition();
throw new RuntimeException("fail the tx");
}
});
Assert.fail("Expected tx to fail because it threw a runtime exception");
} catch (TransactionFailureException e) {
// expected
}
pfsValidator.validate();
Assert.assertFalse(location5x.exists());
// 7. mapreduce writing partition fails because location already exists
mrManager.start(ImmutableMap.of(PFS_OUT, "1", "input.text", "1x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
// 8. mapreduce writing partition fails because partition already exists
mrManager.start(ImmutableMap.of(PFS_OUT, "2", "input.text", "2x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_2).getLocation().exists());
// 9. mapreduce writing partition fails because Hive partition already exists
mrManager.start(ImmutableMap.of(PFS_OUT, "4", "input.text", "4x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
// 10. mapreduce writing dynamic partition fails because location already exists
mrManager.start(ImmutableMap.of("input.text", "3x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 11. mapreduce writing dynamic partition fails because partition already exists
mrManager.start(ImmutableMap.of("input.text", "2x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_2).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 12. mapreduce writing dynamic partition fails because Hive partition already exists
mrManager.start(ImmutableMap.of("input.text", "0x 4x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 13. multi-output mapreduce writing partition fails because location already exists
// 13a. first output fails, other output must rollback 0 and 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "1", "input.text", "0x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(other.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
// 13b. second output fails, first output must rollback 0 and 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, OTHER_OUT, "1", "input.text", "0x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 14. multi-output mapreduce writing partition fails because partition already exists
// 14a. first output fails, other output must rollback partition 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "2", OTHER_OUT, "5", "input.text", "2x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
// 14b. second output fails, first output must rollback partition 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "5", OTHER_OUT, "2", "input.text", "2x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
// 15. multi-output mapreduce writing partition fails because Hive partition already exists
// 15a. first output fails, other output must rollback partitions 0 and 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "4", "input.text", "0x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
Assert.assertFalse(other.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
// 15b. second output fails, first output must rollback partitions 0 and 5
mrManager.start(ImmutableMap.of("output.datasets", BOTH, OTHER_OUT, "4", "input.text", "0x 5x"));
mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
pfsValidator.validate();
otherValidator.validate();
Assert.assertFalse(other.getPartitionOutput(KEY_4).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
}
use of org.apache.tephra.TransactionFailureException in project cdap by caskdata.
the class BasicNotificationContext method execute.
@Override
public boolean execute(TxRunnable runnable, TxRetryPolicy policy) {
int failureCount = 0;
while (true) {
try {
TransactionContext context = datasetContext.newTransactionContext();
context.start();
try {
runnable.run(datasetContext);
} catch (Throwable t) {
context.abort(new TransactionFailureException("Exception thrown from runnable. Aborting transaction.", t));
}
context.finish();
return true;
} catch (Throwable t) {
switch(policy.handleFailure(++failureCount, t)) {
case RETRY:
LOG.warn("Retrying failed transactional operation", t);
break;
case DROP:
LOG.warn("Failed to execute transactional operation", t);
return false;
}
}
}
}
use of org.apache.tephra.TransactionFailureException in project cdap by caskdata.
the class SparkTransactionHandlerTest method testRunJob.
/**
* Simulates a single job run which contains multiple stages with an optional explicit {@link Transaction} to use.
*
* @param jobId the job id
* @param stages stages of the job
* @param jobSucceeded end result of the job
* @param explicitTransaction the job transaction to use if not {@code null}
*/
private void testRunJob(int jobId, Set<Integer> stages, boolean jobSucceeded, @Nullable final Transaction explicitTransaction) throws Exception {
// Before job start, no transaction will be associated with the stages
verifyStagesTransactions(stages, new ClientTransactionVerifier() {
@Override
public boolean verify(@Nullable Transaction transaction, @Nullable Throwable failureCause) throws Exception {
return transaction == null && failureCause instanceof TimeoutException;
}
});
// Now start the job
if (explicitTransaction == null) {
sparkTxHandler.jobStarted(jobId, stages);
} else {
sparkTxHandler.jobStarted(jobId, stages, new TransactionInfo() {
@Override
public Transaction getTransaction() {
return explicitTransaction;
}
@Override
public boolean commitOnJobEnded() {
return false;
}
@Override
public void onJobStarted() {
// no-op
}
@Override
public void onTransactionCompleted(boolean jobSucceeded, @Nullable TransactionFailureException failureCause) {
// no-op
}
});
}
// For all stages, it should get the same transaction
final Set<Transaction> transactions = Collections.newSetFromMap(new ConcurrentHashMap<Transaction, Boolean>());
verifyStagesTransactions(stages, new ClientTransactionVerifier() {
@Override
public boolean verify(@Nullable Transaction transaction, @Nullable Throwable failureCause) throws Exception {
transactions.add(new TransactionWrapper(transaction));
return transaction != null;
}
});
// Transactions returned for all stages belonging to the same job must return the same transaction
Assert.assertEquals(1, transactions.size());
// The transaction must be in progress
Transaction transaction = transactions.iterator().next();
Assert.assertTrue(txManager.getCurrentState().getInProgress().containsKey(transaction.getWritePointer()));
// If run with an explicit transaction, then all stages' transactions must be the same as the explicit transaction
if (explicitTransaction != null) {
Assert.assertEquals(new TransactionWrapper(explicitTransaction), transaction);
}
// Now finish the job
sparkTxHandler.jobEnded(jobId, jobSucceeded);
// After job finished, no transaction will be associated with the stages
verifyStagesTransactions(stages, new ClientTransactionVerifier() {
@Override
public boolean verify(@Nullable Transaction transaction, @Nullable Throwable failureCause) throws Exception {
return transaction == null && failureCause instanceof TimeoutException;
}
});
// Check the transaction state based on the job result
TransactionSnapshot txState = txManager.getCurrentState();
// If explicit transaction is used, the transaction should still be in-progress
if (explicitTransaction != null) {
Assert.assertTrue(txState.getInProgress().containsKey(transaction.getWritePointer()));
} else {
// With implicit transaction, after job completed, the tx shouldn't be in-progress
Assert.assertFalse(txState.getInProgress().containsKey(transaction.getWritePointer()));
if (jobSucceeded) {
// Transaction must not be in the invalid list
Assert.assertFalse(txState.getInvalid().contains(transaction.getWritePointer()));
} else {
// Transaction must be in the invalid list
Assert.assertTrue(txState.getInvalid().contains(transaction.getWritePointer()));
}
}
}
use of org.apache.tephra.TransactionFailureException in project cdap by caskdata.
the class SparkTransactionClient method getTransaction.
@Nullable
private Transaction getTransaction(int stageId) throws TransactionFailureException {
try {
URL url = txServiceBaseURI.resolve("/spark/stages/" + stageId + "/transaction").toURL();
HttpURLConnection urlConn = (HttpURLConnection) url.openConnection();
try {
int responseCode = urlConn.getResponseCode();
if (responseCode == 200) {
return TX_CODEC.decode(ByteStreams.toByteArray(urlConn.getInputStream()));
}
if (responseCode == 404) {
return null;
}
throw new TransactionFailureException(String.format("No transaction for stage %d. Reason: %s", stageId, Bytes.toString(ByteStreams.toByteArray(urlConn.getErrorStream()))));
} finally {
urlConn.disconnect();
}
} catch (IOException e) {
// If not able to talk to the tx service, just treat it the same as 404 so that there could be retry.
return null;
}
}
use of org.apache.tephra.TransactionFailureException in project cdap by caskdata.
the class SparkTransactional method execute.
/**
* Executes the given runnable with transactionally. If there is an opened transaction that can be used, then
* the runnable will be executed with that existing transaction.
* Otherwise, a new long transaction will be created to execute the given runnable.
*
* @param runnable The {@link TxRunnable} to be executed inside a transaction
* @param transactionType The {@link TransactionType} of the Spark transaction.
*/
void execute(SparkTxRunnable runnable, TransactionType transactionType) throws TransactionFailureException {
TransactionalDatasetContext txDatasetContext = activeDatasetContext.get();
boolean needCommit = false;
// If there is an existing transaction
if (txDatasetContext != null) {
TransactionType currentTransactionType = txDatasetContext.getTransactionType();
// We don't support nested transaction
if (currentTransactionType == TransactionType.EXPLICIT && transactionType == TransactionType.EXPLICIT) {
throw new TransactionFailureException("Nested transaction not supported. Active transaction is " + txDatasetContext.getTransaction());
}
// If the current transaction is commit on job end, we need some special handling
if (currentTransactionType == TransactionType.IMPLICIT_COMMIT_ON_JOB_END) {
// associated with the transaction is completed (asynchronously).
if (txDatasetContext.isJobStarted()) {
try {
txDatasetContext.awaitCompletion();
txDatasetContext = null;
} catch (InterruptedException e) {
// Don't execute the runnable. Reset the interrupt flag and return
Thread.currentThread().interrupt();
return;
}
} else if (transactionType != TransactionType.IMPLICIT_COMMIT_ON_JOB_END) {
// If the job hasn't been started and the requested type is not commit on job end,
// we need to "upgrade" the transaction type based on the requested type
// E.g. if the requested type is EXPLICIT, then the current transaction will become an explicit one
txDatasetContext.setTransactionType(transactionType);
needCommit = true;
}
}
}
// If there is no active transaction, start a new long transaction
if (txDatasetContext == null) {
txDatasetContext = new TransactionalDatasetContext(datasetCache, transactionType);
activeDatasetContext.set(txDatasetContext);
needCommit = transactionType != TransactionType.IMPLICIT_COMMIT_ON_JOB_END;
}
Transaction transaction = txDatasetContext.getTransaction();
try {
// Call the runnable
runnable.run(txDatasetContext);
// Persist the changes
txDatasetContext.flush();
if (needCommit) {
txClient.commitOrThrow(transaction);
activeDatasetContext.remove();
txDatasetContext.postCommit();
txDatasetContext.discardDatasets();
}
} catch (Throwable t) {
// Only need to rollback and invalidate transaction if the current call needs to commit.
if (needCommit) {
// Any exception will cause invalidation of the transaction
activeDatasetContext.remove();
txDatasetContext.rollbackWithoutFailure();
Transactions.invalidateQuietly(txClient, transaction);
}
throw Transactions.asTransactionFailure(t);
}
}
Aggregations