use of org.neo4j.kernel.impl.ha.ClusterManager.RepairKit in project neo4j by neo4j.
the class FailoverWithAdditionalSlaveFailuresIT method testFailoverWithAdditionalSlave.
private void testFailoverWithAdditionalSlave(int clusterSize, int[] slaveIndexes) throws Throwable {
ClusterManager manager = new ClusterManager.Builder().withRootDirectory(dir.cleanDirectory("testcluster")).withCluster(ClusterManager.clusterOfSize(clusterSize)).build();
try {
manager.start();
ClusterManager.ManagedCluster cluster = manager.getCluster();
cluster.await(allSeesAllAsAvailable());
cluster.await(masterAvailable());
Collection<HighlyAvailableGraphDatabase> failed = new ArrayList<>();
Collection<RepairKit> repairKits = new ArrayList<>();
for (int slaveIndex : slaveIndexes) {
HighlyAvailableGraphDatabase nthSlave = getNthSlave(cluster, slaveIndex);
failed.add(nthSlave);
RepairKit repairKit = cluster.fail(nthSlave);
repairKits.add(repairKit);
}
HighlyAvailableGraphDatabase oldMaster = cluster.getMaster();
failed.add(oldMaster);
repairKits.add(cluster.fail(oldMaster));
cluster.await(masterAvailable(toArray(failed)));
for (RepairKit repairKit : repairKits) {
repairKit.repair();
}
// give repaired instances a chance to cleanly rejoin and exit faster
Thread.sleep(3000);
} finally {
manager.safeShutdown();
}
}
use of org.neo4j.kernel.impl.ha.ClusterManager.RepairKit in project neo4j by neo4j.
the class TransactionThroughMasterSwitchStressIT method reelectTheSameMasterMakingItGoToPendingAndBack.
private void reelectTheSameMasterMakingItGoToPendingAndBack(ManagedCluster cluster) throws Throwable {
HighlyAvailableGraphDatabase master = cluster.getMaster();
// Fail master and wait for master to go to pending, since it detects it's partitioned away
RepairKit masterRepair = cluster.fail(master, false, NetworkFlag.IN, NetworkFlag.OUT);
cluster.await(memberThinksItIsRole(master, UNKNOWN));
// Then Immediately repair
masterRepair.repair();
// Wait for this instance to go to master again, since the other instances are slave only
cluster.await(memberThinksItIsRole(master, MASTER));
cluster.await(ClusterManager.masterAvailable());
assertEquals(master, cluster.getMaster());
}
use of org.neo4j.kernel.impl.ha.ClusterManager.RepairKit in project neo4j by neo4j.
the class TestBranchedData method shouldCopyStoreFromMasterIfBranchedInLiveScenario.
/**
* Main difference to {@link #shouldCopyStoreFromMasterIfBranched()} is that no instances are shut down
* during the course of the test. This to test functionality of some internal components being restarted.
*/
@SuppressWarnings("unchecked")
@Test
public void shouldCopyStoreFromMasterIfBranchedInLiveScenario() throws Throwable {
// GIVEN a cluster of 3, all having the same data (node A)
// thor is whoever is the master to begin with
// odin is whoever is picked as _the_ slave given thor as initial master
File dir = directory.directory();
ClusterManager clusterManager = life.add(new ClusterManager.Builder(dir).withSharedConfig(stringMap(// Effectively disable automatic transaction propagation within the cluster
HaSettings.tx_push_factor.name(), "0", HaSettings.pull_interval.name(), "0")).build());
ManagedCluster cluster = clusterManager.getCluster();
cluster.await(allSeesAllAsAvailable());
HighlyAvailableGraphDatabase thor = cluster.getMaster();
String indexName = "valhalla";
createNode(thor, "A", andIndexInto(indexName));
cluster.sync();
// WHEN creating a node B1 on thor (note the disabled cluster transaction propagation)
createNode(thor, "B1", andIndexInto(indexName));
// and right after that failing the master so that it falls out of the cluster
HighlyAvailableGraphDatabase odin = cluster.getAnySlave();
cluster.info(format("%n ==== TAMPERING WITH " + thor + "'s CABLES ====%n"));
RepairKit thorRepairKit = cluster.fail(thor);
// try to create a transaction on odin until it succeeds
cluster.await(ClusterManager.masterAvailable(thor));
cluster.await(ClusterManager.memberThinksItIsRole(odin, HighAvailabilityModeSwitcher.MASTER));
assertTrue(odin.isMaster());
retryOnTransactionFailure(odin, db -> createNode(db, "B2", andIndexInto(indexName)));
// perform transactions so that index files changes under the hood
Set<File> odinLuceneFilesBefore = Iterables.asSet(gatherLuceneFiles(odin, indexName));
for (char prefix = 'C'; !changed(odinLuceneFilesBefore, Iterables.asSet(gatherLuceneFiles(odin, indexName))); prefix++) {
char fixedPrefix = prefix;
retryOnTransactionFailure(odin, db -> createNodes(odin, String.valueOf(fixedPrefix), 10_000, andIndexInto(indexName)));
// Force will most likely cause lucene legacy indexes to commit and change file structure
cluster.force();
}
// so anyways, when thor comes back into the cluster
cluster.info(format("%n ==== REPAIRING CABLES ====%n"));
cluster.await(memberThinksItIsRole(thor, UNKNOWN));
BranchMonitor thorHasBranched = installBranchedDataMonitor(thor);
thorRepairKit.repair();
cluster.await(memberThinksItIsRole(thor, SLAVE));
cluster.await(memberThinksItIsRole(odin, MASTER));
cluster.await(allSeesAllAsAvailable());
assertFalse(thor.isMaster());
assertTrue("No store-copy performed", thorHasBranched.copyCompleted);
assertTrue("Store-copy unsuccessful", thorHasBranched.copySucessful);
// Now do some more transactions on current master (odin) and have thor pull those
for (int i = 0; i < 3; i++) {
int ii = i;
retryOnTransactionFailure(odin, db -> createNodes(odin, String.valueOf("" + ii), 10, andIndexInto(indexName)));
cluster.sync();
cluster.force();
}
// THEN thor should be a slave, having copied a store from master and good to go
assertFalse(hasNode(thor, "B1"));
assertTrue(hasNode(thor, "B2"));
assertTrue(hasNode(thor, "C-0"));
assertTrue(hasNode(thor, "0-0"));
assertTrue(hasNode(odin, "0-0"));
}
use of org.neo4j.kernel.impl.ha.ClusterManager.RepairKit in project neo4j by neo4j.
the class TestBranchedData method shouldCopyStoreFromMasterIfBranched.
@Test
public void shouldCopyStoreFromMasterIfBranched() throws Throwable {
// GIVEN
File dir = directory.directory();
ClusterManager clusterManager = life.add(new ClusterManager.Builder(dir).withCluster(clusterOfSize(2)).build());
ManagedCluster cluster = clusterManager.getCluster();
cluster.await(allSeesAllAsAvailable());
createNode(cluster.getMaster(), "A");
cluster.sync();
// WHEN
HighlyAvailableGraphDatabase slave = cluster.getAnySlave();
File storeDir = new File(slave.getStoreDir());
RepairKit starter = cluster.shutdown(slave);
HighlyAvailableGraphDatabase master = cluster.getMaster();
createNode(master, "B1");
createNode(master, "C");
createNodeOffline(storeDir, "B2");
slave = starter.repair();
// THEN
cluster.await(allSeesAllAsAvailable());
slave.beginTx().close();
}
Aggregations