use of in project neo4j by neo4j.
the class HighAvailabilityModeSwitcherTest method shouldNotResetAvailableMasterURIIfElectionResultReceived.
public void shouldNotResetAvailableMasterURIIfElectionResultReceived() throws Throwable {
* It is possible that a masterIsElected nulls out the current available master URI in the HAMS. That can
* be a problem if handing the mIE event is concurrent with an ongoing switch which re-runs because
* the store was incompatible or a log was missing. In such a case it will find a null master URI on
* rerun and it will fail.
// Given
SwitchToSlaveCopyThenBranch switchToSlave = mock(SwitchToSlaveCopyThenBranch.class);
// The fist run through switchToSlave
final CountDownLatch firstCallMade = new CountDownLatch(1);
// The second run through switchToSlave
final CountDownLatch secondCallMade = new CountDownLatch(1);
// The latch for waiting for the masterIsElected to come through
final CountDownLatch waitForSecondMessage = new CountDownLatch(1);
HighAvailabilityModeSwitcher toTest = new HighAvailabilityModeSwitcher(switchToSlave, mock(SwitchToMaster.class), mock(Election.class), mock(ClusterMemberAvailability.class), mock(ClusterClient.class), storeSupplierMock(), new InstanceId(1), new ComponentSwitcherContainer(), neoStoreDataSourceSupplierMock(), NullLogService.getInstance());
URI uri1 = URI.create("ha://server1");
when(switchToSlave.switchToSlave(any(LifeSupport.class), any(URI.class), any(URI.class), any(CancellationRequest.class))).thenAnswer(invocation -> {
throw new MismatchingStoreIdException(StoreId.DEFAULT, StoreId.DEFAULT);
}).thenAnswer(invocation -> {
return URI.create("ha://server3");
// When
// The first message goes through, start the first run
toTest.masterIsAvailable(new HighAvailabilityMemberChangeEvent(PENDING, TO_SLAVE, new InstanceId(1), uri1));
// Wait for it to be processed but get just before the exception
// It is just about to throw the exception, i.e. rerun. Send in the event
toTest.masterIsElected(new HighAvailabilityMemberChangeEvent(TO_SLAVE, TO_SLAVE, new InstanceId(1), null));
// Allow to continue and do the second run
// Wait for the call to finish
// Then
verify(switchToSlave, times(2)).switchToSlave(any(LifeSupport.class), any(URI.class), eq(uri1), any(CancellationRequest.class));
use of in project neo4j by neo4j.
the class SwitchToSlaveBranchThenCopy method checkDataConsistency.
void checkDataConsistency(MasterClient masterClient, TransactionIdStore txIdStore, StoreId storeId, URI masterUri, URI me, CancellationRequest cancellationRequest) throws Throwable {
try {"Checking store consistency with master");
checkMyStoreIdAndMastersStoreId(storeId, masterUri, resolver);
checkDataConsistencyWithMaster(masterUri, masterClient, storeId, txIdStore);"Store is consistent");
} catch (StoreUnableToParticipateInClusterException upe) {"The store is inconsistent. Will treat it as branched and fetch a new one from the master");
msgLog.warn("Current store is unable to participate in the cluster; fetching new store from master", upe);
try {
} catch (IOException e) {
msgLog.warn("Failed while trying to handle branched data", e);
throw upe;
} catch (MismatchingStoreIdException e) {"The store does not represent the same database as master. Will remove and fetch a new one from " + "master");
if (txIdStore.getLastCommittedTransactionId() == BASE_TX_ID) {
msgLog.warn("Found and deleting empty store with mismatching store id", e);
throw e;
msgLog.error("Store cannot participate in cluster due to mismatching store IDs", e);
throw new ForeignStoreException(e.getExpected(), e.getEncountered());
use of in project neo4j by neo4j.
the class BackupService method incrementalWithContext.
* Performs an incremental backup based off the given context. This means
* receiving and applying selectively (i.e. irrespective of the actual state
* of the target db) a set of transactions starting at the desired txId and
* spanning up to the latest of the master
* @param targetDb The database that contains a previous full copy
* @param context The context, containing transaction id to start streaming transaction from
* @return A backup context, ready to perform
private BackupOutcome incrementalWithContext(String sourceHostNameOrIp, int sourcePort, GraphDatabaseAPI targetDb, long timeout, RequestContext context) throws IncrementalBackupNotPossibleException {
DependencyResolver resolver = targetDb.getDependencyResolver();
ProgressTxHandler handler = new ProgressTxHandler();
TransactionCommittingResponseUnpacker unpacker = new TransactionCommittingResponseUnpacker(resolver, DEFAULT_BATCH_SIZE, 0);
Monitors monitors = resolver.resolveDependency(Monitors.class);
LogProvider logProvider = resolver.resolveDependency(LogService.class).getInternalLogProvider();
BackupClient client = new BackupClient(sourceHostNameOrIp, sourcePort, null, logProvider, targetDb.storeId(), timeout, unpacker, monitors.newMonitor(ByteCounterMonitor.class, BackupClient.class), monitors.newMonitor(RequestMonitor.class, BackupClient.class), new VersionAwareLogEntryReader<>());
try (Lifespan lifespan = new Lifespan(unpacker, client)) {
try (Response<Void> response = client.incrementalBackup(context)) {
unpacker.unpackResponse(response, handler);
} catch (MismatchingStoreIdException e) {
throw new RuntimeException(DIFFERENT_STORE, e);
} catch (RuntimeException | IOException e) {
if (e.getCause() != null && e.getCause() instanceof MissingLogDataException) {
throw new IncrementalBackupNotPossibleException(TOO_OLD_BACKUP, e.getCause());
if (e.getCause() != null && e.getCause() instanceof ConnectException) {
throw new RuntimeException(e.getMessage(), e.getCause());
throw new RuntimeException("Failed to perform incremental backup.", e);
} catch (Throwable throwable) {
throw new RuntimeException("Unexpected error", throwable);
return new BackupOutcome(handler.getLastSeenTransactionId(), true);
use of in project neo4j by neo4j.
the class HighAvailabilityModeSwitcher method switchToSlave.
private void switchToSlave() {
* This is purely defensive and should never trigger. There was a race where the switch to slave task would
* start after this instance was elected master and the task would constantly try to change as slave
* for itself, never cancelling. This now should not be possible, since we cancel the task and wait for it
* to complete, all in a single thread executor. However, this is a check worth doing because if this
* condition slips through via some other code path it can cause trouble.
if (getServerId(availableMasterId).equals(instanceId)) {
msgLog.error("I (" + me + ") tried to switch to slave for myself as master (" + availableMasterId + ")");
final AtomicLong wait = new AtomicLong();
final CancellationHandle cancellationHandle = new CancellationHandle();
startModeSwitching(new Runnable() {
public void run() {
if (currentTargetState != HighAvailabilityMemberState.TO_SLAVE) {
// Already switched - this can happen if a second master becomes available while waiting
if (cancellationHandle.cancellationRequested()) {"Switch to slave cancelled on start.");
try {
if (cancellationHandle.cancellationRequested()) {"Switch to slave cancelled before ha communication started.");
haCommunicationLife = new LifeSupport();
// it is important for availableMasterId to be re-read on every attempt so that
// slave switching would not result in an infinite loop with wrong/stale availableMasterId
URI resultingSlaveHaURI = switchToSlave.switchToSlave(haCommunicationLife, me, availableMasterId, cancellationHandle);
if (resultingSlaveHaURI == null) {
* null slave uri means the task was cancelled. The task then must simply terminate and
* have no side effects.
*/"Switch to slave is effectively cancelled");
} else {
slaveHaURI = resultingSlaveHaURI;
} catch (HighAvailabilityStoreFailureException e) {
userLog.error("UNABLE TO START UP AS SLAVE: %s", e.getMessage());
msgLog.error("Unable to start up as slave", e);
ClusterClient clusterClient = HighAvailabilityModeSwitcher.this.clusterClient;
try {
// TODO I doubt this actually works
} catch (Throwable t) {
msgLog.error("Unable to stop cluster client", t);
modeSwitcherExecutor.schedule(this, 5, TimeUnit.SECONDS);
} catch (MismatchingStoreIdException e) {
// Try again immediately, the place that threw it have already treated the db
// as branched and so a new attempt will have this slave copy a new store from master.
} catch (Throwable t) {
msgLog.error("Error while trying to switch to slave", t);
// Try again later
// Exponential backoff
wait.set(1 + wait.get() * 2);
// Wait maximum 5 minutes
wait.set(Math.min(wait.get(), 5 * 60));
modeSwitcherFuture = modeSwitcherExecutor.schedule(this, wait.get(), TimeUnit.SECONDS);"Attempting to switch to slave in %ds", wait.get());
}, cancellationHandle);