Search in sources :

Example 6 with RepairKit

use of org.neo4j.kernel.impl.ha.ClusterManager.RepairKit in project neo4j by neo4j.

the class FailoverWithAdditionalSlaveFailuresIT method testFailoverWithAdditionalSlave.

private void testFailoverWithAdditionalSlave(int clusterSize, int[] slaveIndexes) throws Throwable {
    ClusterManager manager = new ClusterManager.Builder().withRootDirectory(dir.cleanDirectory("testcluster")).withCluster(ClusterManager.clusterOfSize(clusterSize)).build();
    try {
        manager.start();
        ClusterManager.ManagedCluster cluster = manager.getCluster();
        cluster.await(allSeesAllAsAvailable());
        cluster.await(masterAvailable());
        Collection<HighlyAvailableGraphDatabase> failed = new ArrayList<>();
        Collection<RepairKit> repairKits = new ArrayList<>();
        for (int slaveIndex : slaveIndexes) {
            HighlyAvailableGraphDatabase nthSlave = getNthSlave(cluster, slaveIndex);
            failed.add(nthSlave);
            RepairKit repairKit = cluster.fail(nthSlave);
            repairKits.add(repairKit);
        }
        HighlyAvailableGraphDatabase oldMaster = cluster.getMaster();
        failed.add(oldMaster);
        repairKits.add(cluster.fail(oldMaster));
        cluster.await(masterAvailable(toArray(failed)));
        for (RepairKit repairKit : repairKits) {
            repairKit.repair();
        }
        // give repaired instances a chance to cleanly rejoin and exit faster
        Thread.sleep(3000);
    } finally {
        manager.safeShutdown();
    }
}
Also used : RepairKit(org.neo4j.kernel.impl.ha.ClusterManager.RepairKit) ArrayList(java.util.ArrayList) ClusterManager(org.neo4j.kernel.impl.ha.ClusterManager)

Example 7 with RepairKit

use of org.neo4j.kernel.impl.ha.ClusterManager.RepairKit in project neo4j by neo4j.

the class TransactionThroughMasterSwitchStressIT method reelectTheSameMasterMakingItGoToPendingAndBack.

private void reelectTheSameMasterMakingItGoToPendingAndBack(ManagedCluster cluster) throws Throwable {
    HighlyAvailableGraphDatabase master = cluster.getMaster();
    // Fail master and wait for master to go to pending, since it detects it's partitioned away
    RepairKit masterRepair = cluster.fail(master, false, NetworkFlag.IN, NetworkFlag.OUT);
    cluster.await(memberThinksItIsRole(master, UNKNOWN));
    // Then Immediately repair
    masterRepair.repair();
    // Wait for this instance to go to master again, since the other instances are slave only
    cluster.await(memberThinksItIsRole(master, MASTER));
    cluster.await(ClusterManager.masterAvailable());
    assertEquals(master, cluster.getMaster());
}
Also used : HighlyAvailableGraphDatabase(org.neo4j.kernel.ha.HighlyAvailableGraphDatabase) RepairKit(org.neo4j.kernel.impl.ha.ClusterManager.RepairKit)

Example 8 with RepairKit

use of org.neo4j.kernel.impl.ha.ClusterManager.RepairKit in project neo4j by neo4j.

the class TestBranchedData method shouldCopyStoreFromMasterIfBranchedInLiveScenario.

/**
     * Main difference to {@link #shouldCopyStoreFromMasterIfBranched()} is that no instances are shut down
     * during the course of the test. This to test functionality of some internal components being restarted.
     */
@SuppressWarnings("unchecked")
@Test
public void shouldCopyStoreFromMasterIfBranchedInLiveScenario() throws Throwable {
    // GIVEN a cluster of 3, all having the same data (node A)
    // thor is whoever is the master to begin with
    // odin is whoever is picked as _the_ slave given thor as initial master
    File dir = directory.directory();
    ClusterManager clusterManager = life.add(new ClusterManager.Builder(dir).withSharedConfig(stringMap(// Effectively disable automatic transaction propagation within the cluster
    HaSettings.tx_push_factor.name(), "0", HaSettings.pull_interval.name(), "0")).build());
    ManagedCluster cluster = clusterManager.getCluster();
    cluster.await(allSeesAllAsAvailable());
    HighlyAvailableGraphDatabase thor = cluster.getMaster();
    String indexName = "valhalla";
    createNode(thor, "A", andIndexInto(indexName));
    cluster.sync();
    // WHEN creating a node B1 on thor (note the disabled cluster transaction propagation)
    createNode(thor, "B1", andIndexInto(indexName));
    // and right after that failing the master so that it falls out of the cluster
    HighlyAvailableGraphDatabase odin = cluster.getAnySlave();
    cluster.info(format("%n   ==== TAMPERING WITH " + thor + "'s CABLES ====%n"));
    RepairKit thorRepairKit = cluster.fail(thor);
    // try to create a transaction on odin until it succeeds
    cluster.await(ClusterManager.masterAvailable(thor));
    cluster.await(ClusterManager.memberThinksItIsRole(odin, HighAvailabilityModeSwitcher.MASTER));
    assertTrue(odin.isMaster());
    retryOnTransactionFailure(odin, db -> createNode(db, "B2", andIndexInto(indexName)));
    // perform transactions so that index files changes under the hood
    Set<File> odinLuceneFilesBefore = Iterables.asSet(gatherLuceneFiles(odin, indexName));
    for (char prefix = 'C'; !changed(odinLuceneFilesBefore, Iterables.asSet(gatherLuceneFiles(odin, indexName))); prefix++) {
        char fixedPrefix = prefix;
        retryOnTransactionFailure(odin, db -> createNodes(odin, String.valueOf(fixedPrefix), 10_000, andIndexInto(indexName)));
        // Force will most likely cause lucene legacy indexes to commit and change file structure
        cluster.force();
    }
    // so anyways, when thor comes back into the cluster
    cluster.info(format("%n   ==== REPAIRING CABLES ====%n"));
    cluster.await(memberThinksItIsRole(thor, UNKNOWN));
    BranchMonitor thorHasBranched = installBranchedDataMonitor(thor);
    thorRepairKit.repair();
    cluster.await(memberThinksItIsRole(thor, SLAVE));
    cluster.await(memberThinksItIsRole(odin, MASTER));
    cluster.await(allSeesAllAsAvailable());
    assertFalse(thor.isMaster());
    assertTrue("No store-copy performed", thorHasBranched.copyCompleted);
    assertTrue("Store-copy unsuccessful", thorHasBranched.copySucessful);
    // Now do some more transactions on current master (odin) and have thor pull those
    for (int i = 0; i < 3; i++) {
        int ii = i;
        retryOnTransactionFailure(odin, db -> createNodes(odin, String.valueOf("" + ii), 10, andIndexInto(indexName)));
        cluster.sync();
        cluster.force();
    }
    // THEN thor should be a slave, having copied a store from master and good to go
    assertFalse(hasNode(thor, "B1"));
    assertTrue(hasNode(thor, "B2"));
    assertTrue(hasNode(thor, "C-0"));
    assertTrue(hasNode(thor, "0-0"));
    assertTrue(hasNode(odin, "0-0"));
}
Also used : HighlyAvailableGraphDatabase(org.neo4j.kernel.ha.HighlyAvailableGraphDatabase) ManagedCluster(org.neo4j.kernel.impl.ha.ClusterManager.ManagedCluster) RepairKit(org.neo4j.kernel.impl.ha.ClusterManager.RepairKit) File(java.io.File) ClusterManager(org.neo4j.kernel.impl.ha.ClusterManager) Test(org.junit.Test)

Example 9 with RepairKit

use of org.neo4j.kernel.impl.ha.ClusterManager.RepairKit in project neo4j by neo4j.

the class TestBranchedData method shouldCopyStoreFromMasterIfBranched.

@Test
public void shouldCopyStoreFromMasterIfBranched() throws Throwable {
    // GIVEN
    File dir = directory.directory();
    ClusterManager clusterManager = life.add(new ClusterManager.Builder(dir).withCluster(clusterOfSize(2)).build());
    ManagedCluster cluster = clusterManager.getCluster();
    cluster.await(allSeesAllAsAvailable());
    createNode(cluster.getMaster(), "A");
    cluster.sync();
    // WHEN
    HighlyAvailableGraphDatabase slave = cluster.getAnySlave();
    File storeDir = new File(slave.getStoreDir());
    RepairKit starter = cluster.shutdown(slave);
    HighlyAvailableGraphDatabase master = cluster.getMaster();
    createNode(master, "B1");
    createNode(master, "C");
    createNodeOffline(storeDir, "B2");
    slave = starter.repair();
    // THEN
    cluster.await(allSeesAllAsAvailable());
    slave.beginTx().close();
}
Also used : HighlyAvailableGraphDatabase(org.neo4j.kernel.ha.HighlyAvailableGraphDatabase) ManagedCluster(org.neo4j.kernel.impl.ha.ClusterManager.ManagedCluster) RepairKit(org.neo4j.kernel.impl.ha.ClusterManager.RepairKit) File(java.io.File) ClusterManager(org.neo4j.kernel.impl.ha.ClusterManager) Test(org.junit.Test)

Aggregations

RepairKit (org.neo4j.kernel.impl.ha.ClusterManager.RepairKit)9 Test (org.junit.Test)7 HighlyAvailableGraphDatabase (org.neo4j.kernel.ha.HighlyAvailableGraphDatabase)5 ManagedCluster (org.neo4j.kernel.impl.ha.ClusterManager.ManagedCluster)4 ClusterManager (org.neo4j.kernel.impl.ha.ClusterManager)3 File (java.io.File)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 InstanceId (org.neo4j.cluster.InstanceId)2 Transaction (org.neo4j.graphdb.Transaction)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Ignore (org.junit.Ignore)1 ClusterMemberEvents (org.neo4j.cluster.member.ClusterMemberEvents)1 ClusterMemberListener (org.neo4j.cluster.member.ClusterMemberListener)1 HeartbeatListener (org.neo4j.cluster.protocol.heartbeat.HeartbeatListener)1 Node (org.neo4j.graphdb.Node)1 Index (org.neo4j.graphdb.index.Index)1 IndexManager (org.neo4j.graphdb.index.IndexManager)1