use of org.neo4j.cluster.client.ClusterClient in project neo4j by neo4j.
the class ClusterManager method stateToString.
public static String stateToString(ManagedCluster cluster) {
StringBuilder buf = new StringBuilder("\n");
for (HighlyAvailableGraphDatabase database : cluster.getAllMembers()) {
ClusterClient client = database.getDependencyResolver().resolveDependency(ClusterClient.class);
buf.append("Instance ").append(client.getServerId()).append(":State ").append(database.getInstanceState()).append(" (").append(client.getClusterServer()).append("):").append("\n");
ClusterMembers members = database.getDependencyResolver().resolveDependency(ClusterMembers.class);
buf.append(members);
}
return buf.toString();
}
use of org.neo4j.cluster.client.ClusterClient in project neo4j by neo4j.
the class HighAvailabilityModeSwitcher method switchToSlave.
private void switchToSlave() {
/*
* This is purely defensive and should never trigger. There was a race where the switch to slave task would
* start after this instance was elected master and the task would constantly try to change as slave
* for itself, never cancelling. This now should not be possible, since we cancel the task and wait for it
* to complete, all in a single thread executor. However, this is a check worth doing because if this
* condition slips through via some other code path it can cause trouble.
*/
if (getServerId(availableMasterId).equals(instanceId)) {
msgLog.error("I (" + me + ") tried to switch to slave for myself as master (" + availableMasterId + ")");
return;
}
final AtomicLong wait = new AtomicLong();
final CancellationHandle cancellationHandle = new CancellationHandle();
startModeSwitching(new Runnable() {
@Override
public void run() {
if (currentTargetState != HighAvailabilityMemberState.TO_SLAVE) {
// Already switched - this can happen if a second master becomes available while waiting
return;
}
if (cancellationHandle.cancellationRequested()) {
msgLog.info("Switch to slave cancelled on start.");
return;
}
componentSwitcher.switchToSlave();
try {
if (cancellationHandle.cancellationRequested()) {
msgLog.info("Switch to slave cancelled before ha communication started.");
return;
}
haCommunicationLife.shutdown();
haCommunicationLife = new LifeSupport();
// it is important for availableMasterId to be re-read on every attempt so that
// slave switching would not result in an infinite loop with wrong/stale availableMasterId
URI resultingSlaveHaURI = switchToSlave.switchToSlave(haCommunicationLife, me, availableMasterId, cancellationHandle);
if (resultingSlaveHaURI == null) {
/*
* null slave uri means the task was cancelled. The task then must simply terminate and
* have no side effects.
*/
msgLog.info("Switch to slave is effectively cancelled");
} else {
slaveHaURI = resultingSlaveHaURI;
canAskForElections.set(true);
}
} catch (HighAvailabilityStoreFailureException e) {
userLog.error("UNABLE TO START UP AS SLAVE: %s", e.getMessage());
msgLog.error("Unable to start up as slave", e);
clusterMemberAvailability.memberIsUnavailable(SLAVE);
ClusterClient clusterClient = HighAvailabilityModeSwitcher.this.clusterClient;
try {
// TODO I doubt this actually works
clusterClient.leave();
clusterClient.stop();
haCommunicationLife.shutdown();
} catch (Throwable t) {
msgLog.error("Unable to stop cluster client", t);
}
modeSwitcherExecutor.schedule(this, 5, TimeUnit.SECONDS);
} catch (MismatchingStoreIdException e) {
// Try again immediately, the place that threw it have already treated the db
// as branched and so a new attempt will have this slave copy a new store from master.
run();
} catch (Throwable t) {
msgLog.error("Error while trying to switch to slave", t);
// Try again later
// Exponential backoff
wait.set(1 + wait.get() * 2);
// Wait maximum 5 minutes
wait.set(Math.min(wait.get(), 5 * 60));
modeSwitcherFuture = modeSwitcherExecutor.schedule(this, wait.get(), TimeUnit.SECONDS);
msgLog.info("Attempting to switch to slave in %ds", wait.get());
}
}
}, cancellationHandle);
}
use of org.neo4j.cluster.client.ClusterClient in project neo4j by neo4j.
the class TestPullUpdates method shouldPullUpdatesOnStartupNoMatterWhat.
@Test
public void shouldPullUpdatesOnStartupNoMatterWhat() throws Exception {
HighlyAvailableGraphDatabase slave = null;
HighlyAvailableGraphDatabase master = null;
try {
File testRootDir = clusterRule.cleanDirectory("shouldPullUpdatesOnStartupNoMatterWhat");
File masterDir = new File(testRootDir, "master");
master = (HighlyAvailableGraphDatabase) new TestHighlyAvailableGraphDatabaseFactory().newEmbeddedDatabaseBuilder(masterDir).setConfig(ClusterSettings.server_id, "1").setConfig(ClusterSettings.initial_hosts, "localhost:5001").newGraphDatabase();
// Copy the store, then shutdown, so update pulling later makes sense
File slaveDir = new File(testRootDir, "slave");
slave = (HighlyAvailableGraphDatabase) new TestHighlyAvailableGraphDatabaseFactory().newEmbeddedDatabaseBuilder(slaveDir).setConfig(ClusterSettings.server_id, "2").setConfig(ClusterSettings.initial_hosts, "localhost:5001").newGraphDatabase();
// Required to block until the slave has left for sure
final CountDownLatch slaveLeftLatch = new CountDownLatch(1);
final ClusterClient masterClusterClient = master.getDependencyResolver().resolveDependency(ClusterClient.class);
masterClusterClient.addClusterListener(new ClusterListener.Adapter() {
@Override
public void leftCluster(InstanceId instanceId, URI member) {
slaveLeftLatch.countDown();
masterClusterClient.removeClusterListener(this);
}
});
master.getDependencyResolver().resolveDependency(LogService.class).getInternalLog(getClass()).info("SHUTTING DOWN SLAVE");
slave.shutdown();
slave = null;
// Make sure that the slave has left, because shutdown() may return before the master knows
assertTrue("Timeout waiting for slave to leave", slaveLeftLatch.await(60, TimeUnit.SECONDS));
long nodeId;
try (Transaction tx = master.beginTx()) {
Node node = master.createNode();
node.setProperty("from", "master");
nodeId = node.getId();
tx.success();
}
// Store is already in place, should pull updates
slave = (HighlyAvailableGraphDatabase) new TestHighlyAvailableGraphDatabaseFactory().newEmbeddedDatabaseBuilder(slaveDir).setConfig(ClusterSettings.server_id, "2").setConfig(ClusterSettings.initial_hosts, "localhost:5001").setConfig(HaSettings.pull_interval, // no pull updates, should pull on startup
"0").newGraphDatabase();
// Make sure switch to slave completes and so does the update pulling on startup
slave.beginTx().close();
try (Transaction tx = slave.beginTx()) {
assertEquals("master", slave.getNodeById(nodeId).getProperty("from"));
tx.success();
}
} finally {
if (slave != null) {
slave.shutdown();
}
if (master != null) {
master.shutdown();
}
}
}
use of org.neo4j.cluster.client.ClusterClient in project neo4j by neo4j.
the class ArbiterBootstrapperIT method before.
@Before
public void before() throws Exception {
directory = testDirectory.directory("temp");
life = new LifeSupport();
// So that the clients get started as they are added
life.start();
clients = new ClusterClient[2];
for (int i = 1; i <= clients.length; i++) {
Map<String, String> config = stringMap();
config.put(cluster_server.name(), ":" + (5000 + i));
config.put(server_id.name(), "" + i);
config.put(initial_hosts.name(), ":5001");
LifeSupport moduleLife = new LifeSupport();
ClusterClientModule clusterClientModule = new ClusterClientModule(moduleLife, new Dependencies(), new Monitors(), Config.embeddedDefaults(config), NullLogService.getInstance(), new ServerIdElectionCredentialsProvider());
ClusterClient client = clusterClientModule.clusterClient;
CountDownLatch latch = new CountDownLatch(1);
client.addClusterListener(new ClusterListener.Adapter() {
@Override
public void enteredCluster(ClusterConfiguration configuration) {
latch.countDown();
client.removeClusterListener(this);
}
});
life.add(moduleLife);
clients[i - 1] = client;
assertTrue("Didn't join the cluster", latch.await(20, SECONDS));
}
}
Aggregations