use of com.torodb.core.transaction.RollbackException in project torodb by torodb.
the class ContinuousOplogFetcher method fetch.
@Override
public OplogBatch fetch() throws StopReplicationException, RollbackReplicationException {
if (state.isClosed()) {
return FinishedOplogBatch.getInstance();
}
try {
return retrier.retry(() -> {
try {
if (state.isClosed()) {
return FinishedOplogBatch.getInstance();
}
state.prepareToFetch();
MongoCursor<OplogOperation> cursor = state.getLastUsedMongoCursor();
Batch<OplogOperation> batch = cursor.tryFetchBatch();
if (batch == null || !batch.hasNext()) {
Thread.sleep(1000);
batch = cursor.tryFetchBatch();
if (batch == null || !batch.hasNext()) {
return NotReadyForMoreOplogBatch.getInstance();
}
}
List<OplogOperation> fetchedOps = null;
long fetchTime = 0;
/*
* As we already modify the cursor by fetching the batch, we cannot retry the whole block
* (as the cursor would be reused and the previous batch will be discarted).
*
* Then, if we leave the following try section with an error, we need to discard the
* cursor, so the next iteration starts from the last batch we returned. On the other
* hand, if we finished successfully, then we need to update the state.
*/
boolean successful = false;
try {
fetchedOps = batch.asList();
fetchTime = batch.getFetchTime();
postBatchChecks(cursor, fetchedOps);
OplogBatch result = new NormalOplogBatch(fetchedOps, true);
successful = true;
return result;
} finally {
if (!successful) {
cursor.close();
} else {
assert fetchedOps != null;
assert fetchTime != 0;
state.updateState(fetchedOps, fetchTime);
}
}
} catch (RestartFetchException ex) {
//lets choose a new reader
state.discardReader();
//and then try again
throw new RollbackException(ex);
} catch (DeadCursorException ex) {
//lets retry the whole block with the same reader
throw new RollbackException(ex);
} catch (StopReplicationException | RollbackReplicationException ex) {
//do not try again
throw new RetrierAbortException(ex);
} catch (MongoServerException ex) {
//TODO: Fix this violation on the abstraction!
LOGGER.debug("Found an unwrapped MongodbServerException");
//lets choose a new reader
state.discardReader();
//rollback and hopefully use another member
throw new RollbackException(ex);
} catch (MongoException | MongoRuntimeException ex) {
LOGGER.warn("Catched an error while reading the remote " + "oplog: {}", ex.getLocalizedMessage());
//lets choose a new reader
state.discardReader();
//rollback and hopefully use another member
throw new RollbackException(ex);
}
}, Hint.CRITICAL, Hint.TIME_SENSIBLE);
} catch (RetrierGiveUpException ex) {
this.close();
throw new StopReplicationException("Stopping replication after several attepts to " + "fetch the remote oplog", ex);
} catch (RetrierAbortException ex) {
this.close();
Throwable cause = ex.getCause();
if (cause != null) {
if (cause instanceof StopReplicationException) {
throw (StopReplicationException) cause;
}
if (cause instanceof RollbackReplicationException) {
throw (RollbackReplicationException) cause;
}
}
throw new StopReplicationException("Stopping replication after a unknown abort " + "exception", ex);
}
}
use of com.torodb.core.transaction.RollbackException in project torodb by torodb.
the class SimpleAnalyzedOplogBatchExecutorTest method testVisit_SingleOp_NotRepyingRollback.
/**
* Test the behaviour of the method
* {@link SimpleAnalyzedOplogBatchExecutor#visit(com.torodb.mongodb.repl.oplogreplier.batch.SingleOpAnalyzedOplogBatch, com.torodb.mongodb.repl.oplogreplier.ApplierContext) that visits a single op}
* when
* {@link SimpleAnalyzedOplogBatchExecutor#execute(com.eightkdata.mongowp.server.api.oplog.OplogOperation, com.torodb.mongodb.repl.oplogreplier.ApplierContext) the execution}
* fails until the given attempt.
*
*
* @param myRetrier
* @param atteptsToSucceed
* @return true if the execution finishes or false if it throw an exception.
* @throws Exception
*/
private boolean testVisit_SingleOp_NotRepyingRollback(Retrier myRetrier, int atteptsToSucceed) throws Exception {
//GIVEN
OplogOperation operation = mock(OplogOperation.class);
SingleOpAnalyzedOplogBatch batch = new SingleOpAnalyzedOplogBatch(operation);
ApplierContext applierContext = new ApplierContext.Builder().setReapplying(true).setUpdatesAsUpserts(false).build();
executor = spy(new SimpleAnalyzedOplogBatchExecutor(metrics, applier, server, myRetrier, namespaceJobExecutor));
Timer timer = mock(Timer.class);
Context context = mock(Context.class);
given(metrics.getSingleOpTimer(operation)).willReturn(timer);
given(timer.time()).willReturn(context);
doAnswer(new Answer() {
int attempts = 0;
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
try {
ApplierContext context = invocation.getArgument(1);
if (attempts == 0) {
assert !context.treatUpdateAsUpsert() : "on this test, first attept should be not trying updates as upserts";
throw new RollbackException("Forcing a rollback on the first attempt");
}
assert context.treatUpdateAsUpsert() : "on this test, only the first attept should be " + "not trying updates as upserts, but " + attempts + " is not trying updates as upserts";
if (attempts < (atteptsToSucceed - 1)) {
throw new RollbackException("forcing a rollback on the " + attempts + "th attempt");
}
return null;
} finally {
attempts++;
}
}
}).when(executor).execute(eq(operation), any());
try {
//WHEN
OplogOperation result = executor.visit(batch, applierContext);
//THEN
then(executor).should(times(atteptsToSucceed)).execute(eq(operation), any());
assertEquals(operation, result);
return true;
} catch (RetrierGiveUpException ignore) {
return false;
} finally {
then(metrics).should().getSingleOpTimer(operation);
then(timer).should().time();
}
}
use of com.torodb.core.transaction.RollbackException in project torodb by torodb.
the class SimpleAnalyzedOplogBatchExecutorTest method testVisit_CudAnalyzedOplog_NotRepyingRollback.
/**
* Test the behaviour of the method
* {@link SimpleAnalyzedOplogBatchExecutor#visit(com.torodb.mongodb.repl.oplogreplier.batch.CudAnalyzedOplogBatch, com.torodb.mongodb.repl.oplogreplier.ApplierContext) that visits a cud batch}
* when
* {@link SimpleAnalyzedOplogBatchExecutor#execute(com.torodb.mongodb.repl.oplogreplier.batch.CudAnalyzedOplogBatch, com.torodb.mongodb.repl.oplogreplier.ApplierContext) the execution}
* fails until the given attempt.
*
*
* @param myRetrier
* @param atteptsToSucceed
* @return true if the execution finishes or false if it throw an exception.
* @throws Exception
*/
private boolean testVisit_CudAnalyzedOplog_NotRepyingRollback(Retrier myRetrier, int atteptsToSucceed) throws Exception {
//GIVEN
OplogOperation lastOp = mock(OplogOperation.class);
CudAnalyzedOplogBatch batch = mock(CudAnalyzedOplogBatch.class);
ApplierContext applierContext = new ApplierContext.Builder().setReapplying(true).setUpdatesAsUpserts(false).build();
given(batch.getOriginalBatch()).willReturn(Lists.newArrayList(mock(OplogOperation.class), mock(OplogOperation.class), mock(OplogOperation.class), lastOp));
executor = spy(new SimpleAnalyzedOplogBatchExecutor(metrics, applier, server, myRetrier, namespaceJobExecutor));
Timer timer = mock(Timer.class);
Context context = mock(Context.class);
given(metrics.getCudBatchTimer()).willReturn(timer);
given(timer.time()).willReturn(context);
doAnswer(new Answer() {
int attempts = 0;
@Override
public Object answer(InvocationOnMock invocation) throws Throwable {
try {
ApplierContext context = invocation.getArgument(1);
if (attempts == 0) {
assert !context.treatUpdateAsUpsert() : "on this test, first attept should be not trying updates as upserts";
throw new RollbackException("Forcing a rollback on the first attempt");
}
assert context.treatUpdateAsUpsert() : "on this test, only the first attept should be " + "not trying updates as upserts, but " + attempts + " is not trying updates as upserts";
if (attempts < (atteptsToSucceed - 1)) {
throw new RollbackException("forcing a rollback on the " + attempts + "th attempt");
}
return null;
} finally {
attempts++;
}
}
}).when(executor).execute(eq(batch), any());
boolean success;
try {
//WHEN
OplogOperation result = executor.visit(batch, applierContext);
//THEN
then(executor).should(times(atteptsToSucceed)).execute(eq(batch), any());
assertEquals(lastOp, result);
success = true;
} catch (RetrierGiveUpException ignore) {
success = false;
}
then(metrics.getCudBatchSize()).should().update(batch.getOriginalBatch().size());
then(metrics).should().getCudBatchTimer();
then(metrics.getCudBatchTimer()).should().time();
return success;
}
use of com.torodb.core.transaction.RollbackException in project torodb by torodb.
the class ReplSyncFetcher method runProtected.
@Override
public void runProtected() {
runThread = Thread.currentThread();
RollbackReplicationException rollbackEx = null;
boolean oplogFinished = false;
try {
while (rollbackEx == null && isRunning()) {
try {
if (callback.shouldPause()) {
callback.awaitUntilUnpaused();
continue;
}
callback.awaitUntilAllFetchedAreApplied();
OplogBatch oplogBatch = fetcher.fetch();
if (oplogBatch.isLastOne()) {
oplogFinished = true;
break;
}
oplogBatch.getOps().forEach((oplogOp) -> {
try {
callback.deliver(oplogOp);
} catch (InterruptedException ex) {
Thread.interrupted();
throw new RollbackException(serviceName() + " interrupted while a " + "message was being to deliver.", ex);
}
});
if (!oplogBatch.isReadyForMore()) {
LOGGER.warn("There is no source to sync from");
Thread.sleep(1000);
}
} catch (InterruptedException ex) {
Thread.interrupted();
LOGGER.info("Restarting fetch process", ex);
} catch (RollbackReplicationException ex) {
rollbackEx = ex;
} catch (RollbackException ignore) {
LOGGER.info("Retrying after a rollback exception");
} catch (StopReplicationException ex) {
throw ex;
} catch (Throwable ex) {
throw new StopReplicationException(ex);
}
}
if (rollbackEx != null) {
LOGGER.debug("Requesting rollback");
callback.rollback(rollbackEx);
} else {
if (oplogFinished) {
LOGGER.info("Remote oplog finished");
} else {
LOGGER.info(serviceName() + " ending by external request");
}
callback.fetchFinished();
}
} catch (StopReplicationException ex) {
LOGGER.info(serviceName() + " stopped by self request");
callback.fetchAborted(ex);
}
LOGGER.info(serviceName() + " stopped");
}
use of com.torodb.core.transaction.RollbackException in project torodb by torodb.
the class RecoveryService method initialSync.
private boolean initialSync() throws TryAgainException, FatalErrorException {
/*
* 1. store that data is inconsistent 2. decide a sync source 3. lastRemoteOptime1 = get the
* last optime of the sync source 4. clone all databases except local 5. lastRemoteOptime2 = get
* the last optime of the sync source 6. apply remote oplog from lastRemoteOptime1 to
* lastRemoteOptime2 7. lastRemoteOptime3 = get the last optime of the sync source 8. apply
* remote oplog from lastRemoteOptime2 to lastRemoteOptime3 9. rebuild indexes 10. store
* lastRemoteOptime3 as the last applied operation optime 11. store that data is consistent 12.
* change replication state to SECONDARY
*/
//TODO: Support fastsync (used to restore a node by copying the data from other up-to-date node)
LOGGER.info("Starting initial sync");
callback.setConsistentState(false);
HostAndPort syncSource;
try {
syncSource = syncSourceProvider.newSyncSource();
LOGGER.info("Using node " + syncSource + " to replicate from");
} catch (NoSyncSourceFoundException ex) {
throw new TryAgainException("No sync source");
}
MongoClient remoteClient;
try {
remoteClient = remoteClientFactory.createClient(syncSource);
} catch (UnreachableMongoServerException ex) {
throw new TryAgainException(ex);
}
try {
LOGGER.debug("Remote client obtained");
MongoConnection remoteConnection = remoteClient.openConnection();
try (OplogReader reader = oplogReaderProvider.newReader(remoteConnection)) {
OplogOperation lastClonedOp = reader.getLastOp();
OpTime lastRemoteOptime1 = lastClonedOp.getOpTime();
try (WriteOplogTransaction oplogTransaction = oplogManager.createWriteTransaction()) {
LOGGER.info("Remote database cloning started");
oplogTransaction.truncate();
LOGGER.info("Local databases dropping started");
Status<?> status = dropDatabases();
if (!status.isOk()) {
throw new TryAgainException("Error while trying to drop collections: " + status);
}
LOGGER.info("Local databases dropping finished");
if (!isRunning()) {
LOGGER.warn("Recovery stopped before it can finish");
return false;
}
LOGGER.info("Remote database cloning started");
cloneDatabases(remoteClient);
LOGGER.info("Remote database cloning finished");
oplogTransaction.forceNewValue(lastClonedOp.getHash(), lastClonedOp.getOpTime());
}
if (!isRunning()) {
LOGGER.warn("Recovery stopped before it can finish");
return false;
}
TorodServer torodServer = server.getTorodServer();
try (TorodConnection connection = torodServer.openConnection();
SharedWriteTorodTransaction trans = connection.openWriteTransaction(false)) {
OpTime lastRemoteOptime2 = reader.getLastOp().getOpTime();
LOGGER.info("First oplog application started");
applyOplog(reader, lastRemoteOptime1, lastRemoteOptime2);
trans.commit();
LOGGER.info("First oplog application finished");
if (!isRunning()) {
LOGGER.warn("Recovery stopped before it can finish");
return false;
}
OplogOperation lastOperation = reader.getLastOp();
OpTime lastRemoteOptime3 = lastOperation.getOpTime();
LOGGER.info("Second oplog application started");
applyOplog(reader, lastRemoteOptime2, lastRemoteOptime3);
trans.commit();
LOGGER.info("Second oplog application finished");
if (!isRunning()) {
LOGGER.warn("Recovery stopped before it can finish");
return false;
}
LOGGER.info("Index rebuild started");
rebuildIndexes();
trans.commit();
LOGGER.info("Index rebuild finished");
if (!isRunning()) {
LOGGER.warn("Recovery stopped before it can finish");
return false;
}
trans.commit();
}
} catch (OplogStartMissingException ex) {
throw new TryAgainException(ex);
} catch (OplogOperationUnsupported ex) {
throw new TryAgainException(ex);
} catch (MongoException | RollbackException ex) {
throw new TryAgainException(ex);
} catch (OplogManagerPersistException ex) {
throw new FatalErrorException();
} catch (UserException ex) {
throw new FatalErrorException(ex);
}
callback.setConsistentState(true);
LOGGER.info("Initial sync finished");
} finally {
remoteClient.close();
}
return true;
}
Aggregations