Search in sources :

Example 6 with TestRule

use of org.corfudb.runtime.clients.TestRule in project CorfuDB by CorfuDB.

the class ManagementViewTest method handleTransientFailure.

/**
     * Scenario with 3 nodes: SERVERS.PORT_0, SERVERS.PORT_1 and SERVERS.PORT_2.
     * Simulate transient failure of a server leading to a partial seal.
     * Allow the management server to detect the partial seal and correct this.
     * <p>
     * Part 1.
     * The partial seal causes SERVERS.PORT_0 to be at epoch 2 whereas,
     * SERVERS.PORT_1 & SERVERS.PORT_2 fail to receive this message and are stuck at epoch 1.
     * <p>
     * Part 2.
     * All the 3 servers are now functional and receive all messages.
     * <p>
     * Part 3.
     * The PING message gets rejected by the partially sealed router (WrongEpoch)
     * and the management server realizes of the partial seal and corrects this
     * by issuing another failure detected message.
     *
     * @throws Exception
     */
@Test
public void handleTransientFailure() throws Exception {
    // Boolean flag turned to true when the MANAGEMENT_FAILURE_DETECTED message
    // is sent by the Management client to its server.
    final Semaphore failureDetected = new Semaphore(2, true);
    addServer(SERVERS.PORT_0);
    addServer(SERVERS.PORT_1);
    addServer(SERVERS.PORT_2);
    Layout l = new TestLayoutBuilder().setEpoch(1L).addLayoutServer(SERVERS.PORT_0).addLayoutServer(SERVERS.PORT_1).addLayoutServer(SERVERS.PORT_2).addSequencer(SERVERS.PORT_0).buildSegment().setReplicationMode(Layout.ReplicationMode.QUORUM_REPLICATION).buildStripe().addLogUnit(SERVERS.PORT_0).addLogUnit(SERVERS.PORT_1).addLogUnit(SERVERS.PORT_2).addToSegment().addToLayout().build();
    bootstrapAllServers(l);
    CorfuRuntime corfuRuntime = getRuntime(l).connect();
    // Initiate SERVERS.ENDPOINT_0 failureHandler
    corfuRuntime.getRouter(SERVERS.ENDPOINT_0).getClient(ManagementClient.class).initiateFailureHandler().get();
    // Set aggressive timeouts.
    setAggressiveTimeouts(l, corfuRuntime, getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), getManagementServer(SERVERS.PORT_1).getCorfuRuntime(), getManagementServer(SERVERS.PORT_2).getCorfuRuntime());
    failureDetected.acquire(2);
    // Only allow SERVERS.PORT_0 to manage failures.
    getManagementServer(SERVERS.PORT_1).shutdown();
    getManagementServer(SERVERS.PORT_2).shutdown();
    // PART 1.
    // Prevent ENDPOINT_1 from sealing.
    addClientRule(getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), SERVERS.ENDPOINT_1, new TestRule().matches(corfuMsg -> corfuMsg.getMsgType().equals(CorfuMsgType.SET_EPOCH)).drop());
    // Simulate ENDPOINT_2 failure from ENDPOINT_0 (only Management Server)
    addClientRule(getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), SERVERS.ENDPOINT_2, new TestRule().matches(corfuMsg -> true).drop());
    // Adding a rule on SERVERS.PORT_1 to toggle the flag when it sends the
    // MANAGEMENT_FAILURE_DETECTED message.
    addClientRule(getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), new TestRule().matches(corfuMsg -> {
        if (corfuMsg.getMsgType().equals(CorfuMsgType.MANAGEMENT_FAILURE_DETECTED)) {
            failureDetected.release();
        }
        return true;
    }));
    // Go ahead when sealing of ENDPOINT_0 takes place.
    for (int i = 0; i < PARAMETERS.NUM_ITERATIONS_LOW; i++) {
        if (getServerRouter(SERVERS.PORT_0).getServerEpoch() == 2L) {
            failureDetected.release();
            break;
        }
        Thread.sleep(PARAMETERS.TIMEOUT_VERY_SHORT.toMillis());
    }
    assertThat(failureDetected.tryAcquire(2, PARAMETERS.TIMEOUT_NORMAL.toNanos(), TimeUnit.NANOSECONDS)).isEqualTo(true);
    addClientRule(getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), new TestRule().matches(corfuMsg -> corfuMsg.getMsgType().equals(CorfuMsgType.MANAGEMENT_FAILURE_DETECTED)).drop());
    // Assert that only a partial seal was successful.
    // ENDPOINT_0 sealed. ENDPOINT_1 & ENDPOINT_2 not sealed.
    assertThat(getServerRouter(SERVERS.PORT_0).getServerEpoch()).isEqualTo(2L);
    assertThat(getServerRouter(SERVERS.PORT_1).getServerEpoch()).isEqualTo(1L);
    assertThat(getServerRouter(SERVERS.PORT_2).getServerEpoch()).isEqualTo(1L);
    assertThat(getLayoutServer(SERVERS.PORT_0).getCurrentLayout().getEpoch()).isEqualTo(1L);
    assertThat(getLayoutServer(SERVERS.PORT_1).getCurrentLayout().getEpoch()).isEqualTo(1L);
    assertThat(getLayoutServer(SERVERS.PORT_2).getCurrentLayout().getEpoch()).isEqualTo(1L);
    // PART 2.
    // Simulate normal operations for all servers and clients.
    clearClientRules(getManagementServer(SERVERS.PORT_0).getCorfuRuntime());
    // PART 3.
    // Allow management server to detect partial seal and correct this issue.
    addClientRule(getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), new TestRule().matches(corfuMsg -> {
        if (corfuMsg.getMsgType().equals(CorfuMsgType.MANAGEMENT_FAILURE_DETECTED)) {
            failureDetected.release(2);
        }
        return true;
    }));
    assertThat(failureDetected.tryAcquire(2, PARAMETERS.TIMEOUT_NORMAL.toNanos(), TimeUnit.NANOSECONDS)).isEqualTo(true);
    for (int i = 0; i < PARAMETERS.NUM_ITERATIONS_LOW; i++) {
        Thread.sleep(PARAMETERS.TIMEOUT_VERY_SHORT.toMillis());
        // Assert successful seal of all servers.
        if (getServerRouter(SERVERS.PORT_0).getServerEpoch() == 2L || getServerRouter(SERVERS.PORT_1).getServerEpoch() == 2L || getServerRouter(SERVERS.PORT_2).getServerEpoch() == 2L || getLayoutServer(SERVERS.PORT_0).getCurrentLayout().getEpoch() == 2L || getLayoutServer(SERVERS.PORT_1).getCurrentLayout().getEpoch() == 2L || getLayoutServer(SERVERS.PORT_2).getCurrentLayout().getEpoch() == 2L) {
            return;
        }
    }
    fail();
}
Also used : Getter(lombok.Getter) PurgeFailurePolicy(org.corfudb.infrastructure.PurgeFailurePolicy) Semaphore(java.util.concurrent.Semaphore) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) ISMRMap(org.corfudb.runtime.collections.ISMRMap) Test(org.junit.Test) TypeToken(com.google.common.reflect.TypeToken) UUID(java.util.UUID) TestRule(org.corfudb.runtime.clients.TestRule) SMRMap(org.corfudb.runtime.collections.SMRMap) TransactionAbortedException(org.corfudb.runtime.exceptions.TransactionAbortedException) CorfuMsgType(org.corfudb.protocols.wireprotocol.CorfuMsgType) ServerContext(org.corfudb.infrastructure.ServerContext) TimeUnit(java.util.concurrent.TimeUnit) ManagementClient(org.corfudb.runtime.clients.ManagementClient) IStreamView(org.corfudb.runtime.view.stream.IStreamView) TokenResponse(org.corfudb.protocols.wireprotocol.TokenResponse) Map(java.util.Map) CorfuRuntime(org.corfudb.runtime.CorfuRuntime) Assert.fail(org.junit.Assert.fail) TestServerRouter(org.corfudb.infrastructure.TestServerRouter) TestLayoutBuilder(org.corfudb.infrastructure.TestLayoutBuilder) Collections(java.util.Collections) ServerContextBuilder(org.corfudb.infrastructure.ServerContextBuilder) TestRule(org.corfudb.runtime.clients.TestRule) TestLayoutBuilder(org.corfudb.infrastructure.TestLayoutBuilder) CorfuRuntime(org.corfudb.runtime.CorfuRuntime) Semaphore(java.util.concurrent.Semaphore) Test(org.junit.Test)

Example 7 with TestRule

use of org.corfudb.runtime.clients.TestRule in project CorfuDB by CorfuDB.

the class LayoutSealTest method failingQuorumSeal.

/**
     * Scenario: 5 Servers.
     * ENDPOINT_3 failed and attempted to seal.
     * LayoutServers quorum is possible,    -   Seal passes
     * Stripe 1: 0 failed, 3 responses.     -   Seal passes
     * Stripe 2: 1 failed, 1 response.      -   Seal failed (Quorum not possible)
     * Seal failed
     */
@Test
public void failingQuorumSeal() {
    Layout l = getLayout(Layout.ReplicationMode.QUORUM_REPLICATION);
    addClientRule(l.getRuntime(), SERVERS.ENDPOINT_3, new TestRule().drop().always());
    l.setEpoch(l.getEpoch() + 1);
    assertThatThrownBy(() -> l.moveServersToEpoch()).isInstanceOf(QuorumUnreachableException.class);
    assertLayoutEpochs(2, 2, 2);
    assertServerRouterEpochs(2, 2, 2, 1, 2);
}
Also used : TestRule(org.corfudb.runtime.clients.TestRule) Test(org.junit.Test)

Example 8 with TestRule

use of org.corfudb.runtime.clients.TestRule in project CorfuDB by CorfuDB.

the class LayoutSealTest method failingChainSeal.

/**
     * Scenario: 5 Servers.
     * ENDPOINT_1, ENDPOINT_3 and ENDPOINT_3 failed and attempted to seal.
     * LayoutServers quorum is possible,    -   Seal passes
     * Stripe 1: 1 failed, 2 responses.     -   Seal passes
     * Stripe 2: 2 failed, 0 responses.     -   Seal failed
     * Seal failed
     */
@Test
public void failingChainSeal() {
    Layout l = getLayout(Layout.ReplicationMode.CHAIN_REPLICATION);
    addClientRule(l.getRuntime(), SERVERS.ENDPOINT_1, new TestRule().drop().always());
    addClientRule(l.getRuntime(), SERVERS.ENDPOINT_3, new TestRule().drop().always());
    addClientRule(l.getRuntime(), SERVERS.ENDPOINT_4, new TestRule().drop().always());
    l.setEpoch(l.getEpoch() + 1);
    assertThatThrownBy(() -> l.moveServersToEpoch()).isInstanceOf(QuorumUnreachableException.class);
    assertLayoutEpochs(2, 1, 2);
    assertServerRouterEpochs(2, 1, 2, 1, 1);
}
Also used : TestRule(org.corfudb.runtime.clients.TestRule) Test(org.junit.Test)

Example 9 with TestRule

use of org.corfudb.runtime.clients.TestRule in project CorfuDB by CorfuDB.

the class PeriodicPollPolicyTest method failedPolling.

/**
     * Polls 3 failed servers.
     * Returns failed status for the 3 servers.
     * We then restart server SERVERS.PORT_0, run polls again.
     * Assert only 2 failures. SERVERS.PORT_1 & SERVERS.PORT_2
     *
     * @throws InterruptedException
     */
@Test
public void failedPolling() throws InterruptedException {
    addServerRule(SERVERS.PORT_0, new TestRule().always().drop());
    addServerRule(SERVERS.PORT_1, new TestRule().always().drop());
    addServerRule(SERVERS.PORT_2, new TestRule().always().drop());
    Set<String> expectedResult = new HashSet<>();
    expectedResult.add(getEndpoint(SERVERS.PORT_0));
    expectedResult.add(getEndpoint(SERVERS.PORT_1));
    expectedResult.add(getEndpoint(SERVERS.PORT_2));
    pollAndMatchExpectedResult(expectedResult);
    /*
         * Restarting the server SERVERS.PORT_0. Pings should work normally now.
         * This is also to demonstrate that we no longer receive the failed
         * nodes' status in the result map for SERVERS.PORT_0.
         */
    clearServerRules(SERVERS.PORT_0);
    // Has only SERVERS.PORT_1 & SERVERS.PORT_2
    expectedResult.remove(getEndpoint(SERVERS.PORT_0));
    pollAndMatchExpectedResult(expectedResult);
}
Also used : TestRule(org.corfudb.runtime.clients.TestRule) HashSet(java.util.HashSet) Test(org.junit.Test) AbstractViewTest(org.corfudb.runtime.view.AbstractViewTest)

Aggregations

TestRule (org.corfudb.runtime.clients.TestRule)9 Test (org.junit.Test)8 TestLayoutBuilder (org.corfudb.infrastructure.TestLayoutBuilder)5 CorfuRuntime (org.corfudb.runtime.CorfuRuntime)5 PurgeFailurePolicy (org.corfudb.infrastructure.PurgeFailurePolicy)3 ServerContext (org.corfudb.infrastructure.ServerContext)3 ServerContextBuilder (org.corfudb.infrastructure.ServerContextBuilder)3 TestServerRouter (org.corfudb.infrastructure.TestServerRouter)3 IStreamView (org.corfudb.runtime.view.stream.IStreamView)3 TypeToken (com.google.common.reflect.TypeToken)2 Collections (java.util.Collections)2 Map (java.util.Map)2 UUID (java.util.UUID)2 Semaphore (java.util.concurrent.Semaphore)2 TimeUnit (java.util.concurrent.TimeUnit)2 Getter (lombok.Getter)2 Assertions.assertThat (org.assertj.core.api.Assertions.assertThat)2 CorfuMsgType (org.corfudb.protocols.wireprotocol.CorfuMsgType)2 TokenResponse (org.corfudb.protocols.wireprotocol.TokenResponse)2 ManagementClient (org.corfudb.runtime.clients.ManagementClient)2