use of org.neo4j.kernel.impl.ha.ClusterManager.RepairKit in project neo4j by neo4j.
the class TestSlaveOnlyCluster method testMasterElectionAfterMasterRecoversInSlaveOnlyCluster.
@Test
public void testMasterElectionAfterMasterRecoversInSlaveOnlyCluster() throws Throwable {
ManagedCluster cluster = clusterRule.startCluster();
assertThat(cluster.getServerId(cluster.getMaster()), equalTo(new InstanceId(3)));
HighlyAvailableGraphDatabase master = cluster.getMaster();
CountDownLatch masterFailedLatch = createMasterFailLatch(cluster);
RepairKit repairKit = cluster.fail(master);
try {
assertTrue(masterFailedLatch.await(60, TimeUnit.SECONDS));
} finally {
repairKit.repair();
}
cluster.await(allSeesAllAsAvailable());
long nodeId = createNodeWithPropertyOn(cluster.getAnySlave(), PROPERTY, VALUE);
try (Transaction ignore = master.beginTx()) {
assertThat(master.getNodeById(nodeId).getProperty(PROPERTY), equalTo(VALUE));
}
}
use of org.neo4j.kernel.impl.ha.ClusterManager.RepairKit in project neo4j by neo4j.
the class IndexOperationsIT method index_objects_can_be_reused_after_role_switch.
@Test
public void index_objects_can_be_reused_after_role_switch() throws Throwable {
// GIVEN
// -- an existing index
String key = "key", value = "value";
HighlyAvailableGraphDatabase master = cluster.getMaster();
long nodeId = createNode(master, key, value, true);
cluster.sync();
// -- get Index and IndexManager references to all dbs
Map<HighlyAvailableGraphDatabase, IndexManager> indexManagers = new HashMap<>();
Map<HighlyAvailableGraphDatabase, Index<Node>> indexes = new HashMap<>();
for (HighlyAvailableGraphDatabase db : cluster.getAllMembers()) {
try (Transaction transaction = db.beginTx()) {
indexManagers.put(db, db.index());
indexes.put(db, db.index().forNodes(key));
transaction.success();
}
}
// WHEN
// -- there's a master switch
RepairKit repair = cluster.shutdown(master);
indexManagers.remove(master);
indexes.remove(master);
cluster.await(ClusterManager.masterAvailable(master));
cluster.await(ClusterManager.masterSeesSlavesAsAvailable(1));
// -- the index instances should still be viable to use
for (Map.Entry<HighlyAvailableGraphDatabase, IndexManager> entry : indexManagers.entrySet()) {
HighlyAvailableGraphDatabase db = entry.getKey();
try (Transaction transaction = db.beginTx()) {
IndexManager indexManager = entry.getValue();
assertTrue(indexManager.existsForNodes(key));
assertEquals(nodeId, indexManager.forNodes(key).get(key, value).getSingle().getId());
}
}
for (Map.Entry<HighlyAvailableGraphDatabase, Index<Node>> entry : indexes.entrySet()) {
HighlyAvailableGraphDatabase db = entry.getKey();
try (Transaction transaction = db.beginTx()) {
Index<Node> index = entry.getValue();
assertEquals(nodeId, index.get(key, value).getSingle().getId());
}
}
repair.repair();
}
use of org.neo4j.kernel.impl.ha.ClusterManager.RepairKit in project neo4j by neo4j.
the class ClusterTopologyChangesIT method masterRejoinsAfterFailureAndReelection.
@Test
public void masterRejoinsAfterFailureAndReelection() throws Throwable {
// Given
HighlyAvailableGraphDatabase initialMaster = cluster.getMaster();
// When
cluster.info("Fail master");
RepairKit kit = cluster.fail(initialMaster);
cluster.info("Wait for 2 to become master and 3 slave");
cluster.await(masterAvailable(initialMaster));
cluster.await(masterSeesSlavesAsAvailable(1));
cluster.info("Repair 1");
kit.repair();
// Then
cluster.info("Wait for cluster recovery");
cluster.await(masterAvailable());
cluster.await(allSeesAllAsAvailable());
assertEquals(3, cluster.size());
}
use of org.neo4j.kernel.impl.ha.ClusterManager.RepairKit in project neo4j by neo4j.
the class ClusterTopologyChangesIT method slaveShouldServeTxsAfterMasterLostQuorumWentToPendingAndThenQuorumWasRestored.
@Test
@Ignore
public void slaveShouldServeTxsAfterMasterLostQuorumWentToPendingAndThenQuorumWasRestored() throws Throwable {
// GIVEN: cluster with 3 members
HighlyAvailableGraphDatabase master = cluster.getMaster();
final HighlyAvailableGraphDatabase slave1 = cluster.getAnySlave();
final HighlyAvailableGraphDatabase slave2 = cluster.getAnySlave(slave1);
final CountDownLatch slave1Left = new CountDownLatch(1);
final CountDownLatch slave2Left = new CountDownLatch(1);
clusterClientOf(master).addHeartbeatListener(new HeartbeatListener.Adapter() {
@Override
public void failed(InstanceId server) {
if (instanceIdOf(slave1).equals(server)) {
slave1Left.countDown();
} else if (instanceIdOf(slave2).equals(server)) {
slave2Left.countDown();
}
}
});
// fail slave1 and await master to spot the failure
RepairKit slave1RepairKit = cluster.fail(slave1);
assertTrue(slave1Left.await(60, SECONDS));
// fail slave2 and await master to spot the failure
RepairKit slave2RepairKit = cluster.fail(slave2);
assertTrue(slave2Left.await(60, SECONDS));
// master loses quorum and goes to PENDING, cluster is unavailable
cluster.await(masterAvailable().negate());
assertEquals(HighAvailabilityMemberState.PENDING, master.getInstanceState());
// WHEN: both slaves are repaired, majority restored, quorum can be achieved
slave1RepairKit.repair();
slave2RepairKit.repair();
// whole cluster looks fine, but slaves have stale value of the epoch if they rejoin the cluster in SLAVE state
cluster.await(masterAvailable());
cluster.await(masterSeesSlavesAsAvailable(2));
HighlyAvailableGraphDatabase newMaster = cluster.getMaster();
final HighlyAvailableGraphDatabase newSlave1 = cluster.getAnySlave();
final HighlyAvailableGraphDatabase newSlave2 = cluster.getAnySlave(newSlave1);
// now adding another failing listener and wait for the failure due to stale epoch
final CountDownLatch slave1Unavailable = new CountDownLatch(1);
final CountDownLatch slave2Unavailable = new CountDownLatch(1);
ClusterMemberEvents clusterEvents = newMaster.getDependencyResolver().resolveDependency(ClusterMemberEvents.class);
clusterEvents.addClusterMemberListener(new ClusterMemberListener.Adapter() {
@Override
public void memberIsUnavailable(String role, InstanceId unavailableId) {
if (instanceIdOf(newSlave1).equals(unavailableId)) {
slave1Unavailable.countDown();
} else if (instanceIdOf(newSlave2).equals(unavailableId)) {
slave2Unavailable.countDown();
}
}
});
// attempt to perform transactions on both slaves throws, election is triggered
attemptTransactions(newSlave1, newSlave2);
// set a timeout in case the instance does not have stale epoch
assertTrue(slave1Unavailable.await(60, TimeUnit.SECONDS));
assertTrue(slave2Unavailable.await(60, TimeUnit.SECONDS));
// THEN: done with election, cluster feels good and able to serve transactions
cluster.info("Waiting for cluster to stabilize");
cluster.await(allSeesAllAsAvailable());
cluster.info("Assert ok");
assertNotNull(createNodeOn(newMaster));
assertNotNull(createNodeOn(newSlave1));
assertNotNull(createNodeOn(newSlave2));
}
use of org.neo4j.kernel.impl.ha.ClusterManager.RepairKit in project neo4j by neo4j.
the class TestBasicHaOperations method testBasicFailover.
@Test
public void testBasicFailover() throws Throwable {
// given
ManagedCluster cluster = clusterRule.startCluster();
HighlyAvailableGraphDatabase master = cluster.getMaster();
HighlyAvailableGraphDatabase slave1 = cluster.getAnySlave();
HighlyAvailableGraphDatabase slave2 = cluster.getAnySlave(slave1);
// When
long start = System.nanoTime();
RepairKit repair = cluster.shutdown(master);
try {
logger.getLogger().warning("Shut down master");
cluster.await(ClusterManager.masterAvailable());
long end = System.nanoTime();
logger.getLogger().warning("Failover took:" + (end - start) / 1000000 + "ms");
// Then
boolean slave1Master = slave1.isMaster();
boolean slave2Master = slave2.isMaster();
if (slave1Master) {
assertFalse(slave2Master);
} else {
assertTrue(slave2Master);
}
} finally {
repair.repair();
}
}
Aggregations