use of org.neo4j.kernel.ha.store.HighAvailabilityStoreFailureException in project neo4j by neo4j.
the class HighAvailabilityModeSwitcher method switchToSlave.
private void switchToSlave() {
/*
* This is purely defensive and should never trigger. There was a race where the switch to slave task would
* start after this instance was elected master and the task would constantly try to change as slave
* for itself, never cancelling. This now should not be possible, since we cancel the task and wait for it
* to complete, all in a single thread executor. However, this is a check worth doing because if this
* condition slips through via some other code path it can cause trouble.
*/
if (getServerId(availableMasterId).equals(instanceId)) {
msgLog.error("I (" + me + ") tried to switch to slave for myself as master (" + availableMasterId + ")");
return;
}
final AtomicLong wait = new AtomicLong();
final CancellationHandle cancellationHandle = new CancellationHandle();
startModeSwitching(new Runnable() {
@Override
public void run() {
if (currentTargetState != HighAvailabilityMemberState.TO_SLAVE) {
// Already switched - this can happen if a second master becomes available while waiting
return;
}
if (cancellationHandle.cancellationRequested()) {
msgLog.info("Switch to slave cancelled on start.");
return;
}
componentSwitcher.switchToSlave();
try {
if (cancellationHandle.cancellationRequested()) {
msgLog.info("Switch to slave cancelled before ha communication started.");
return;
}
haCommunicationLife.shutdown();
haCommunicationLife = new LifeSupport();
// it is important for availableMasterId to be re-read on every attempt so that
// slave switching would not result in an infinite loop with wrong/stale availableMasterId
URI resultingSlaveHaURI = switchToSlave.switchToSlave(haCommunicationLife, me, availableMasterId, cancellationHandle);
if (resultingSlaveHaURI == null) {
/*
* null slave uri means the task was cancelled. The task then must simply terminate and
* have no side effects.
*/
msgLog.info("Switch to slave is effectively cancelled");
} else {
slaveHaURI = resultingSlaveHaURI;
canAskForElections.set(true);
}
} catch (HighAvailabilityStoreFailureException e) {
userLog.error("UNABLE TO START UP AS SLAVE: %s", e.getMessage());
msgLog.error("Unable to start up as slave", e);
clusterMemberAvailability.memberIsUnavailable(SLAVE);
ClusterClient clusterClient = HighAvailabilityModeSwitcher.this.clusterClient;
try {
// TODO I doubt this actually works
clusterClient.leave();
clusterClient.stop();
haCommunicationLife.shutdown();
} catch (Throwable t) {
msgLog.error("Unable to stop cluster client", t);
}
modeSwitcherExecutor.schedule(this, 5, TimeUnit.SECONDS);
} catch (MismatchingStoreIdException e) {
// Try again immediately, the place that threw it have already treated the db
// as branched and so a new attempt will have this slave copy a new store from master.
run();
} catch (Throwable t) {
msgLog.error("Error while trying to switch to slave", t);
// Try again later
// Exponential backoff
wait.set(1 + wait.get() * 2);
// Wait maximum 5 minutes
wait.set(Math.min(wait.get(), 5 * 60));
modeSwitcherFuture = modeSwitcherExecutor.schedule(this, wait.get(), TimeUnit.SECONDS);
msgLog.info("Attempting to switch to slave in %ds", wait.get());
}
}
}, cancellationHandle);
}
Aggregations