Search in sources :

Example 6 with TransactionFailureException

use of org.apache.tephra.TransactionFailureException in project cdap by caskdata.

the class PartitionRollbackTestRun method testPFSRollback.

/*
   * This tests all the following cases:
   *
   *  1. addPartition(location) fails because partition already exists
   *  2. addPartition(location) fails because Hive partition already exists
   *  3. addPartition(location) succeeds but transaction fails
   *  4. getPartitionOutput() fails because partition already exists
   *  5. partitionOutput.addPartition() fails because Hive partition already exists
   *  6. partitionOutput.addPartition() succeeds but transaction fails
   *  7. mapreduce writing partition fails because location already exists
   *  8. mapreduce writing partition fails because partition already exists
   *  9. mapreduce writing partition fails because Hive partition already exists
   *  10. mapreduce writing dynamic partition fails because location already exists
   *  11. mapreduce writing dynamic partition fails because partition already exists
   *  12. mapreduce writing dynamic partition fails because Hive partition already exists
   *  13. multi-output mapreduce writing partition fails because location already exists
   *  13a. first output fails, other output must rollback 0 and 5
   *  13b. second output fails, first output must rollback 0 and 5
   *  14. multi-output mapreduce writing partition fails because partition already exists
   *  14a. first output fails, other output must rollback partition 5
   *  14b. second output fails, first output must rollback partition 5
   *  15. multi-output mapreduce writing partition fails because Hive partition already exists
   *  15a. first output fails, other output must rollback partitions 0 and 5
   *  15b. second output fails, first output must rollback partitions 0 and 5
   *
   * For all these cases, we validate that existing files and partitions are preserved, and newly
   * added files and partitions are rolled back.
   */
@Test
public void testPFSRollback() throws Exception {
    ApplicationManager appManager = deployApplication(AppWritingToPartitioned.class);
    MapReduceManager mrManager = appManager.getMapReduceManager(MAPREDUCE);
    int numRuns = 0;
    Validator pfsValidator = new Validator(PFS);
    Validator otherValidator = new Validator(OTHER);
    final UnitTestManager.UnitTestDatasetManager<PartitionedFileSet> pfsManager = pfsValidator.getPfsManager();
    final PartitionedFileSet pfs = pfsManager.get();
    final PartitionedFileSet other = otherValidator.getPfsManager().get();
    final String path3 = pfsValidator.getRelativePath3();
    // 1. addPartition(location) fails because partition already exists
    try {
        pfsManager.execute(new Runnable() {

            @Override
            public void run() {
                pfs.addPartition(KEY_1, path3);
            }
        });
        Assert.fail("Expected tx to fail because partition for number=1 already exists");
    } catch (TransactionFailureException e) {
    // expected
    }
    pfsValidator.validate();
    // 2. addPartition(location) fails because Hive partition already exists
    try {
        pfsManager.execute(new Runnable() {

            @Override
            public void run() {
                pfs.addPartition(KEY_4, path3);
            }
        });
        Assert.fail("Expected tx to fail because hive partition for number=1 already exists");
    } catch (TransactionFailureException e) {
    // expected
    }
    pfsValidator.validate();
    // 3. addPartition(location) succeeds but transaction fails
    try {
        pfsManager.execute(new Runnable() {

            @Override
            public void run() {
                pfs.addPartition(KEY_3, path3);
                throw new RuntimeException("fail the tx");
            }
        });
        Assert.fail("Expected tx to fail because it threw a runtime exception");
    } catch (TransactionFailureException e) {
    // expected
    }
    pfsValidator.validate();
    // 4. partitionOutput.getPartitionOutput() fails because partition already exists
    try {
        pfs.getPartitionOutput(KEY_1);
        Assert.fail("Expected getPartitionOutput to fail, because the partition already exists.");
    } catch (DataSetException expected) {
    }
    pfsValidator.validate();
    // 5. partitionOutput.addPartition() fails because Hive partition already exists
    final PartitionOutput output4x = pfs.getPartitionOutput(KEY_4);
    final Location location4x = output4x.getLocation();
    try (Writer writer = new OutputStreamWriter(location4x.append("file").getOutputStream())) {
        writer.write("4x,4x\n");
    }
    try {
        pfsManager.execute(new Runnable() {

            @Override
            public void run() {
                output4x.addPartition();
            }
        });
        Assert.fail("Expected tx to fail because hive partition for number=4 already exists");
    } catch (TransactionFailureException e) {
    // expected
    }
    pfsValidator.validate();
    Assert.assertFalse(location4x.exists());
    // 6. partitionOutput.addPartition() succeeds but transaction fails
    final PartitionOutput output5x = pfs.getPartitionOutput(KEY_5);
    final Location location5x = output5x.getLocation();
    try (Writer writer = new OutputStreamWriter(location5x.append("file").getOutputStream())) {
        writer.write("5x,5x\n");
    }
    try {
        pfsManager.execute(new Runnable() {

            @Override
            public void run() {
                output5x.addPartition();
                throw new RuntimeException("fail the tx");
            }
        });
        Assert.fail("Expected tx to fail because it threw a runtime exception");
    } catch (TransactionFailureException e) {
    // expected
    }
    pfsValidator.validate();
    Assert.assertFalse(location5x.exists());
    // 7. mapreduce writing partition fails because location already exists
    mrManager.start(ImmutableMap.of(PFS_OUT, "1", "input.text", "1x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    // 8. mapreduce writing partition fails because partition already exists
    mrManager.start(ImmutableMap.of(PFS_OUT, "2", "input.text", "2x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    Assert.assertFalse(pfs.getPartitionOutput(KEY_2).getLocation().exists());
    // 9. mapreduce writing partition fails because Hive partition already exists
    mrManager.start(ImmutableMap.of(PFS_OUT, "4", "input.text", "4x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
    // 10. mapreduce writing dynamic partition fails because location already exists
    mrManager.start(ImmutableMap.of("input.text", "3x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
    // 11. mapreduce writing dynamic partition fails because partition already exists
    mrManager.start(ImmutableMap.of("input.text", "2x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    Assert.assertFalse(pfs.getPartitionOutput(KEY_2).getLocation().exists());
    Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
    // 12. mapreduce writing dynamic partition fails because Hive partition already exists
    mrManager.start(ImmutableMap.of("input.text", "0x 4x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
    Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
    Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
    // 13. multi-output mapreduce writing partition fails because location already exists
    // 13a. first output fails, other output must rollback 0 and 5
    mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "1", "input.text", "0x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    otherValidator.validate();
    Assert.assertFalse(other.getPartitionOutput(KEY_0).getLocation().exists());
    Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
    // 13b. second output fails, first output must rollback 0 and 5
    mrManager.start(ImmutableMap.of("output.datasets", BOTH, OTHER_OUT, "1", "input.text", "0x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    otherValidator.validate();
    Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
    Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
    // 14. multi-output mapreduce writing partition fails because partition already exists
    // 14a. first output fails, other output must rollback partition 5
    mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "2", OTHER_OUT, "5", "input.text", "2x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    otherValidator.validate();
    Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
    // 14b. second output fails, first output must rollback partition 5
    mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "5", OTHER_OUT, "2", "input.text", "2x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    otherValidator.validate();
    Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
    // 15. multi-output mapreduce writing partition fails because Hive partition already exists
    // 15a. first output fails, other output must rollback partitions 0 and 5
    mrManager.start(ImmutableMap.of("output.datasets", BOTH, PFS_OUT, "4", "input.text", "0x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    otherValidator.validate();
    Assert.assertFalse(pfs.getPartitionOutput(KEY_4).getLocation().exists());
    Assert.assertFalse(other.getPartitionOutput(KEY_0).getLocation().exists());
    Assert.assertFalse(other.getPartitionOutput(KEY_5).getLocation().exists());
    // 15b. second output fails, first output must rollback partitions 0 and 5
    mrManager.start(ImmutableMap.of("output.datasets", BOTH, OTHER_OUT, "4", "input.text", "0x 5x"));
    mrManager.waitForRuns(ProgramRunStatus.FAILED, ++numRuns, 2, TimeUnit.MINUTES);
    pfsValidator.validate();
    otherValidator.validate();
    Assert.assertFalse(other.getPartitionOutput(KEY_4).getLocation().exists());
    Assert.assertFalse(pfs.getPartitionOutput(KEY_0).getLocation().exists());
    Assert.assertFalse(pfs.getPartitionOutput(KEY_5).getLocation().exists());
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) MapReduceManager(co.cask.cdap.test.MapReduceManager) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) TransactionFailureException(org.apache.tephra.TransactionFailureException) DataSetException(co.cask.cdap.api.dataset.DataSetException) PartitionOutput(co.cask.cdap.api.dataset.lib.PartitionOutput) UnitTestManager(co.cask.cdap.test.UnitTestManager) OutputStreamWriter(java.io.OutputStreamWriter) OutputStreamWriter(java.io.OutputStreamWriter) Writer(java.io.Writer) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 7 with TransactionFailureException

use of org.apache.tephra.TransactionFailureException in project cdap by caskdata.

the class BasicNotificationContext method execute.

@Override
public boolean execute(TxRunnable runnable, TxRetryPolicy policy) {
    int failureCount = 0;
    while (true) {
        try {
            TransactionContext context = datasetContext.newTransactionContext();
            context.start();
            try {
                runnable.run(datasetContext);
            } catch (Throwable t) {
                context.abort(new TransactionFailureException("Exception thrown from runnable. Aborting transaction.", t));
            }
            context.finish();
            return true;
        } catch (Throwable t) {
            switch(policy.handleFailure(++failureCount, t)) {
                case RETRY:
                    LOG.warn("Retrying failed transactional operation", t);
                    break;
                case DROP:
                    LOG.warn("Failed to execute transactional operation", t);
                    return false;
            }
        }
    }
}
Also used : TransactionFailureException(org.apache.tephra.TransactionFailureException) TransactionContext(org.apache.tephra.TransactionContext)

Example 8 with TransactionFailureException

use of org.apache.tephra.TransactionFailureException in project cdap by caskdata.

the class SparkTransactionHandlerTest method testRunJob.

/**
 * Simulates a single job run which contains multiple stages with an optional explicit {@link Transaction} to use.
 *
 * @param jobId the job id
 * @param stages stages of the job
 * @param jobSucceeded end result of the job
 * @param explicitTransaction the job transaction to use if not {@code null}
 */
private void testRunJob(int jobId, Set<Integer> stages, boolean jobSucceeded, @Nullable final Transaction explicitTransaction) throws Exception {
    // Before job start, no transaction will be associated with the stages
    verifyStagesTransactions(stages, new ClientTransactionVerifier() {

        @Override
        public boolean verify(@Nullable Transaction transaction, @Nullable Throwable failureCause) throws Exception {
            return transaction == null && failureCause instanceof TimeoutException;
        }
    });
    // Now start the job
    if (explicitTransaction == null) {
        sparkTxHandler.jobStarted(jobId, stages);
    } else {
        sparkTxHandler.jobStarted(jobId, stages, new TransactionInfo() {

            @Override
            public Transaction getTransaction() {
                return explicitTransaction;
            }

            @Override
            public boolean commitOnJobEnded() {
                return false;
            }

            @Override
            public void onJobStarted() {
            // no-op
            }

            @Override
            public void onTransactionCompleted(boolean jobSucceeded, @Nullable TransactionFailureException failureCause) {
            // no-op
            }
        });
    }
    // For all stages, it should get the same transaction
    final Set<Transaction> transactions = Collections.newSetFromMap(new ConcurrentHashMap<Transaction, Boolean>());
    verifyStagesTransactions(stages, new ClientTransactionVerifier() {

        @Override
        public boolean verify(@Nullable Transaction transaction, @Nullable Throwable failureCause) throws Exception {
            transactions.add(new TransactionWrapper(transaction));
            return transaction != null;
        }
    });
    // Transactions returned for all stages belonging to the same job must return the same transaction
    Assert.assertEquals(1, transactions.size());
    // The transaction must be in progress
    Transaction transaction = transactions.iterator().next();
    Assert.assertTrue(txManager.getCurrentState().getInProgress().containsKey(transaction.getWritePointer()));
    // If run with an explicit transaction, then all stages' transactions must be the same as the explicit transaction
    if (explicitTransaction != null) {
        Assert.assertEquals(new TransactionWrapper(explicitTransaction), transaction);
    }
    // Now finish the job
    sparkTxHandler.jobEnded(jobId, jobSucceeded);
    // After job finished, no transaction will be associated with the stages
    verifyStagesTransactions(stages, new ClientTransactionVerifier() {

        @Override
        public boolean verify(@Nullable Transaction transaction, @Nullable Throwable failureCause) throws Exception {
            return transaction == null && failureCause instanceof TimeoutException;
        }
    });
    // Check the transaction state based on the job result
    TransactionSnapshot txState = txManager.getCurrentState();
    // If explicit transaction is used, the transaction should still be in-progress
    if (explicitTransaction != null) {
        Assert.assertTrue(txState.getInProgress().containsKey(transaction.getWritePointer()));
    } else {
        // With implicit transaction, after job completed, the tx shouldn't be in-progress
        Assert.assertFalse(txState.getInProgress().containsKey(transaction.getWritePointer()));
        if (jobSucceeded) {
            // Transaction must not be in the invalid list
            Assert.assertFalse(txState.getInvalid().contains(transaction.getWritePointer()));
        } else {
            // Transaction must be in the invalid list
            Assert.assertTrue(txState.getInvalid().contains(transaction.getWritePointer()));
        }
    }
}
Also used : TransactionFailureException(org.apache.tephra.TransactionFailureException) TimeoutException(java.util.concurrent.TimeoutException) UnknownHostException(java.net.UnknownHostException) TransactionFailureException(org.apache.tephra.TransactionFailureException) TransactionSnapshot(org.apache.tephra.persist.TransactionSnapshot) Transaction(org.apache.tephra.Transaction) TimeoutException(java.util.concurrent.TimeoutException)

Example 9 with TransactionFailureException

use of org.apache.tephra.TransactionFailureException in project cdap by caskdata.

the class SparkTransactionClient method getTransaction.

@Nullable
private Transaction getTransaction(int stageId) throws TransactionFailureException {
    try {
        URL url = txServiceBaseURI.resolve("/spark/stages/" + stageId + "/transaction").toURL();
        HttpURLConnection urlConn = (HttpURLConnection) url.openConnection();
        try {
            int responseCode = urlConn.getResponseCode();
            if (responseCode == 200) {
                return TX_CODEC.decode(ByteStreams.toByteArray(urlConn.getInputStream()));
            }
            if (responseCode == 404) {
                return null;
            }
            throw new TransactionFailureException(String.format("No transaction for stage %d. Reason: %s", stageId, Bytes.toString(ByteStreams.toByteArray(urlConn.getErrorStream()))));
        } finally {
            urlConn.disconnect();
        }
    } catch (IOException e) {
        // If not able to talk to the tx service, just treat it the same as 404 so that there could be retry.
        return null;
    }
}
Also used : TransactionFailureException(org.apache.tephra.TransactionFailureException) HttpURLConnection(java.net.HttpURLConnection) IOException(java.io.IOException) URL(java.net.URL) Nullable(javax.annotation.Nullable)

Example 10 with TransactionFailureException

use of org.apache.tephra.TransactionFailureException in project cdap by caskdata.

the class SparkTransactional method execute.

/**
 * Executes the given runnable with transactionally. If there is an opened transaction that can be used, then
 * the runnable will be executed with that existing transaction.
 * Otherwise, a new long transaction will be created to execute the given runnable.
 *
 * @param runnable The {@link TxRunnable} to be executed inside a transaction
 * @param transactionType The {@link TransactionType} of the Spark transaction.
 */
void execute(SparkTxRunnable runnable, TransactionType transactionType) throws TransactionFailureException {
    TransactionalDatasetContext txDatasetContext = activeDatasetContext.get();
    boolean needCommit = false;
    // If there is an existing transaction
    if (txDatasetContext != null) {
        TransactionType currentTransactionType = txDatasetContext.getTransactionType();
        // We don't support nested transaction
        if (currentTransactionType == TransactionType.EXPLICIT && transactionType == TransactionType.EXPLICIT) {
            throw new TransactionFailureException("Nested transaction not supported. Active transaction is " + txDatasetContext.getTransaction());
        }
        // If the current transaction is commit on job end, we need some special handling
        if (currentTransactionType == TransactionType.IMPLICIT_COMMIT_ON_JOB_END) {
            // associated with the transaction is completed (asynchronously).
            if (txDatasetContext.isJobStarted()) {
                try {
                    txDatasetContext.awaitCompletion();
                    txDatasetContext = null;
                } catch (InterruptedException e) {
                    // Don't execute the runnable. Reset the interrupt flag and return
                    Thread.currentThread().interrupt();
                    return;
                }
            } else if (transactionType != TransactionType.IMPLICIT_COMMIT_ON_JOB_END) {
                // If the job hasn't been started and the requested type is not commit on job end,
                // we need to "upgrade" the transaction type based on the requested type
                // E.g. if the requested type is EXPLICIT, then the current transaction will become an explicit one
                txDatasetContext.setTransactionType(transactionType);
                needCommit = true;
            }
        }
    }
    // If there is no active transaction, start a new long transaction
    if (txDatasetContext == null) {
        txDatasetContext = new TransactionalDatasetContext(datasetCache, transactionType);
        activeDatasetContext.set(txDatasetContext);
        needCommit = transactionType != TransactionType.IMPLICIT_COMMIT_ON_JOB_END;
    }
    Transaction transaction = txDatasetContext.getTransaction();
    try {
        // Call the runnable
        runnable.run(txDatasetContext);
        // Persist the changes
        txDatasetContext.flush();
        if (needCommit) {
            txClient.commitOrThrow(transaction);
            activeDatasetContext.remove();
            txDatasetContext.postCommit();
            txDatasetContext.discardDatasets();
        }
    } catch (Throwable t) {
        // Only need to rollback and invalidate transaction if the current call needs to commit.
        if (needCommit) {
            // Any exception will cause invalidation of the transaction
            activeDatasetContext.remove();
            txDatasetContext.rollbackWithoutFailure();
            Transactions.invalidateQuietly(txClient, transaction);
        }
        throw Transactions.asTransactionFailure(t);
    }
}
Also used : TransactionFailureException(org.apache.tephra.TransactionFailureException) Transaction(org.apache.tephra.Transaction)

Aggregations

TransactionFailureException (org.apache.tephra.TransactionFailureException)55 Test (org.junit.Test)19 TransactionContext (org.apache.tephra.TransactionContext)17 IOException (java.io.IOException)16 TransactionExecutor (org.apache.tephra.TransactionExecutor)12 TransactionConflictException (org.apache.tephra.TransactionConflictException)8 TxRunnable (co.cask.cdap.api.TxRunnable)6 DatasetContext (co.cask.cdap.api.data.DatasetContext)6 Location (org.apache.twill.filesystem.Location)6 TransactionAware (org.apache.tephra.TransactionAware)5 DataSetException (co.cask.cdap.api.dataset.DataSetException)4 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)4 Table (co.cask.cdap.api.dataset.table.Table)4 ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)4 List (java.util.List)4 Map (java.util.Map)4 ArrayList (java.util.ArrayList)3 Collection (java.util.Collection)3 TimeoutException (java.util.concurrent.TimeoutException)3 Transaction (org.apache.tephra.Transaction)3