use of com.torodb.mongodb.repl.OplogManager.OplogManagerPersistException in project torodb by torodb.
the class RecoveryService method initialSync.
private boolean initialSync() throws TryAgainException, FatalErrorException {
/*
* 1. store that data is inconsistent 2. decide a sync source 3. lastRemoteOptime1 = get the
* last optime of the sync source 4. clone all databases except local 5. lastRemoteOptime2 = get
* the last optime of the sync source 6. apply remote oplog from lastRemoteOptime1 to
* lastRemoteOptime2 7. lastRemoteOptime3 = get the last optime of the sync source 8. apply
* remote oplog from lastRemoteOptime2 to lastRemoteOptime3 9. rebuild indexes 10. store
* lastRemoteOptime3 as the last applied operation optime 11. store that data is consistent 12.
* change replication state to SECONDARY
*/
//TODO: Support fastsync (used to restore a node by copying the data from other up-to-date node)
LOGGER.info("Starting initial sync");
callback.setConsistentState(false);
HostAndPort syncSource;
try {
syncSource = syncSourceProvider.newSyncSource();
LOGGER.info("Using node " + syncSource + " to replicate from");
} catch (NoSyncSourceFoundException ex) {
throw new TryAgainException("No sync source");
}
MongoClient remoteClient;
try {
remoteClient = remoteClientFactory.createClient(syncSource);
} catch (UnreachableMongoServerException ex) {
throw new TryAgainException(ex);
}
try {
LOGGER.debug("Remote client obtained");
MongoConnection remoteConnection = remoteClient.openConnection();
try (OplogReader reader = oplogReaderProvider.newReader(remoteConnection)) {
OplogOperation lastClonedOp = reader.getLastOp();
OpTime lastRemoteOptime1 = lastClonedOp.getOpTime();
try (WriteOplogTransaction oplogTransaction = oplogManager.createWriteTransaction()) {
LOGGER.info("Remote database cloning started");
oplogTransaction.truncate();
LOGGER.info("Local databases dropping started");
Status<?> status = dropDatabases();
if (!status.isOk()) {
throw new TryAgainException("Error while trying to drop collections: " + status);
}
LOGGER.info("Local databases dropping finished");
if (!isRunning()) {
LOGGER.warn("Recovery stopped before it can finish");
return false;
}
LOGGER.info("Remote database cloning started");
cloneDatabases(remoteClient);
LOGGER.info("Remote database cloning finished");
oplogTransaction.forceNewValue(lastClonedOp.getHash(), lastClonedOp.getOpTime());
}
if (!isRunning()) {
LOGGER.warn("Recovery stopped before it can finish");
return false;
}
TorodServer torodServer = server.getTorodServer();
try (TorodConnection connection = torodServer.openConnection();
SharedWriteTorodTransaction trans = connection.openWriteTransaction(false)) {
OpTime lastRemoteOptime2 = reader.getLastOp().getOpTime();
LOGGER.info("First oplog application started");
applyOplog(reader, lastRemoteOptime1, lastRemoteOptime2);
trans.commit();
LOGGER.info("First oplog application finished");
if (!isRunning()) {
LOGGER.warn("Recovery stopped before it can finish");
return false;
}
OplogOperation lastOperation = reader.getLastOp();
OpTime lastRemoteOptime3 = lastOperation.getOpTime();
LOGGER.info("Second oplog application started");
applyOplog(reader, lastRemoteOptime2, lastRemoteOptime3);
trans.commit();
LOGGER.info("Second oplog application finished");
if (!isRunning()) {
LOGGER.warn("Recovery stopped before it can finish");
return false;
}
LOGGER.info("Index rebuild started");
rebuildIndexes();
trans.commit();
LOGGER.info("Index rebuild finished");
if (!isRunning()) {
LOGGER.warn("Recovery stopped before it can finish");
return false;
}
trans.commit();
}
} catch (OplogStartMissingException ex) {
throw new TryAgainException(ex);
} catch (OplogOperationUnsupported ex) {
throw new TryAgainException(ex);
} catch (MongoException | RollbackException ex) {
throw new TryAgainException(ex);
} catch (OplogManagerPersistException ex) {
throw new FatalErrorException();
} catch (UserException ex) {
throw new FatalErrorException(ex);
}
callback.setConsistentState(true);
LOGGER.info("Initial sync finished");
} finally {
remoteClient.close();
}
return true;
}
use of com.torodb.mongodb.repl.OplogManager.OplogManagerPersistException in project torodb by torodb.
the class ReplSyncApplier method runProtected.
@Override
protected void runProtected() {
runThread = Thread.currentThread();
/*
* TODO: In general, the replication context can be set as not reaplying. But it is not frequent
* but possible to stop the replication after some oplog ops have been apply but not marked as
* executed on the oplog manager. For that reason, all oplog ops betwen the last operation that
* have been marked as applyed and the current last operation on the remote oplog must be
* executed as replying operations. As it is not possible to do that yet, we have to always
* apply operations as replying to be safe.
*/
ApplierContext applierContext = new ApplierContext.Builder().setReapplying(true).setUpdatesAsUpserts(true).build();
while (isRunning()) {
OplogOperation lastOperation = null;
ExclusiveWriteMongodTransaction trans = connection.openExclusiveWriteTransaction();
try (ExclusiveWriteMongodTransaction transaction = trans) {
try {
for (OplogOperation opToApply : callback.takeOps()) {
lastOperation = opToApply;
LOGGER.trace("Executing {}", opToApply);
try {
boolean done = false;
while (!done) {
try {
oplogOpApplier.apply(opToApply, transaction, applierContext);
transaction.commit();
done = true;
} catch (RollbackException ex) {
LOGGER.debug("Recived a rollback exception while applying an oplog op", ex);
}
}
} catch (OplogApplyingException ex) {
if (!callback.failedToApply(opToApply, ex)) {
LOGGER.error(serviceName() + " stopped because one operation " + "cannot be executed", ex);
break;
}
} catch (UserException ex) {
if (callback.failedToApply(opToApply, ex)) {
LOGGER.error(serviceName() + " stopped because one operation " + "cannot be executed", ex);
break;
}
} catch (Throwable ex) {
if (callback.failedToApply(opToApply, ex)) {
LOGGER.error(serviceName() + " stopped because " + "an unknown error", ex);
break;
}
}
callback.markAsApplied(opToApply);
}
} catch (InterruptedException ex) {
LOGGER.debug("Interrupted applier thread while applying an operator");
}
}
if (lastOperation != null) {
try (WriteOplogTransaction oplogTransaction = oplogManager.createWriteTransaction()) {
oplogTransaction.addOperation(lastOperation);
} catch (OplogManagerPersistException ex) {
if (callback.failedToApply(lastOperation, ex)) {
LOGGER.error(serviceName() + " stopped because " + "the last applied operation couldn't " + "be persisted", ex);
break;
}
}
}
}
}
Aggregations