use of org.corfudb.runtime.clients.TestRule in project CorfuDB by CorfuDB.
the class ManagementViewTest method handleTransientFailure.
/**
* Scenario with 3 nodes: SERVERS.PORT_0, SERVERS.PORT_1 and SERVERS.PORT_2.
* Simulate transient failure of a server leading to a partial seal.
* Allow the management server to detect the partial seal and correct this.
* <p>
* Part 1.
* The partial seal causes SERVERS.PORT_0 to be at epoch 2 whereas,
* SERVERS.PORT_1 & SERVERS.PORT_2 fail to receive this message and are stuck at epoch 1.
* <p>
* Part 2.
* All the 3 servers are now functional and receive all messages.
* <p>
* Part 3.
* The PING message gets rejected by the partially sealed router (WrongEpoch)
* and the management server realizes of the partial seal and corrects this
* by issuing another failure detected message.
*
* @throws Exception
*/
@Test
public void handleTransientFailure() throws Exception {
// Boolean flag turned to true when the MANAGEMENT_FAILURE_DETECTED message
// is sent by the Management client to its server.
final Semaphore failureDetected = new Semaphore(2, true);
addServer(SERVERS.PORT_0);
addServer(SERVERS.PORT_1);
addServer(SERVERS.PORT_2);
Layout l = new TestLayoutBuilder().setEpoch(1L).addLayoutServer(SERVERS.PORT_0).addLayoutServer(SERVERS.PORT_1).addLayoutServer(SERVERS.PORT_2).addSequencer(SERVERS.PORT_0).buildSegment().setReplicationMode(Layout.ReplicationMode.QUORUM_REPLICATION).buildStripe().addLogUnit(SERVERS.PORT_0).addLogUnit(SERVERS.PORT_1).addLogUnit(SERVERS.PORT_2).addToSegment().addToLayout().build();
bootstrapAllServers(l);
CorfuRuntime corfuRuntime = getRuntime(l).connect();
// Initiate SERVERS.ENDPOINT_0 failureHandler
corfuRuntime.getRouter(SERVERS.ENDPOINT_0).getClient(ManagementClient.class).initiateFailureHandler().get();
// Set aggressive timeouts.
setAggressiveTimeouts(l, corfuRuntime, getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), getManagementServer(SERVERS.PORT_1).getCorfuRuntime(), getManagementServer(SERVERS.PORT_2).getCorfuRuntime());
failureDetected.acquire(2);
// Only allow SERVERS.PORT_0 to manage failures.
getManagementServer(SERVERS.PORT_1).shutdown();
getManagementServer(SERVERS.PORT_2).shutdown();
// PART 1.
// Prevent ENDPOINT_1 from sealing.
addClientRule(getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), SERVERS.ENDPOINT_1, new TestRule().matches(corfuMsg -> corfuMsg.getMsgType().equals(CorfuMsgType.SET_EPOCH)).drop());
// Simulate ENDPOINT_2 failure from ENDPOINT_0 (only Management Server)
addClientRule(getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), SERVERS.ENDPOINT_2, new TestRule().matches(corfuMsg -> true).drop());
// Adding a rule on SERVERS.PORT_1 to toggle the flag when it sends the
// MANAGEMENT_FAILURE_DETECTED message.
addClientRule(getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), new TestRule().matches(corfuMsg -> {
if (corfuMsg.getMsgType().equals(CorfuMsgType.MANAGEMENT_FAILURE_DETECTED)) {
failureDetected.release();
}
return true;
}));
// Go ahead when sealing of ENDPOINT_0 takes place.
for (int i = 0; i < PARAMETERS.NUM_ITERATIONS_LOW; i++) {
if (getServerRouter(SERVERS.PORT_0).getServerEpoch() == 2L) {
failureDetected.release();
break;
}
Thread.sleep(PARAMETERS.TIMEOUT_VERY_SHORT.toMillis());
}
assertThat(failureDetected.tryAcquire(2, PARAMETERS.TIMEOUT_NORMAL.toNanos(), TimeUnit.NANOSECONDS)).isEqualTo(true);
addClientRule(getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), new TestRule().matches(corfuMsg -> corfuMsg.getMsgType().equals(CorfuMsgType.MANAGEMENT_FAILURE_DETECTED)).drop());
// Assert that only a partial seal was successful.
// ENDPOINT_0 sealed. ENDPOINT_1 & ENDPOINT_2 not sealed.
assertThat(getServerRouter(SERVERS.PORT_0).getServerEpoch()).isEqualTo(2L);
assertThat(getServerRouter(SERVERS.PORT_1).getServerEpoch()).isEqualTo(1L);
assertThat(getServerRouter(SERVERS.PORT_2).getServerEpoch()).isEqualTo(1L);
assertThat(getLayoutServer(SERVERS.PORT_0).getCurrentLayout().getEpoch()).isEqualTo(1L);
assertThat(getLayoutServer(SERVERS.PORT_1).getCurrentLayout().getEpoch()).isEqualTo(1L);
assertThat(getLayoutServer(SERVERS.PORT_2).getCurrentLayout().getEpoch()).isEqualTo(1L);
// PART 2.
// Simulate normal operations for all servers and clients.
clearClientRules(getManagementServer(SERVERS.PORT_0).getCorfuRuntime());
// PART 3.
// Allow management server to detect partial seal and correct this issue.
addClientRule(getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), new TestRule().matches(corfuMsg -> {
if (corfuMsg.getMsgType().equals(CorfuMsgType.MANAGEMENT_FAILURE_DETECTED)) {
failureDetected.release(2);
}
return true;
}));
assertThat(failureDetected.tryAcquire(2, PARAMETERS.TIMEOUT_NORMAL.toNanos(), TimeUnit.NANOSECONDS)).isEqualTo(true);
for (int i = 0; i < PARAMETERS.NUM_ITERATIONS_LOW; i++) {
Thread.sleep(PARAMETERS.TIMEOUT_VERY_SHORT.toMillis());
// Assert successful seal of all servers.
if (getServerRouter(SERVERS.PORT_0).getServerEpoch() == 2L || getServerRouter(SERVERS.PORT_1).getServerEpoch() == 2L || getServerRouter(SERVERS.PORT_2).getServerEpoch() == 2L || getLayoutServer(SERVERS.PORT_0).getCurrentLayout().getEpoch() == 2L || getLayoutServer(SERVERS.PORT_1).getCurrentLayout().getEpoch() == 2L || getLayoutServer(SERVERS.PORT_2).getCurrentLayout().getEpoch() == 2L) {
return;
}
}
fail();
}
use of org.corfudb.runtime.clients.TestRule in project CorfuDB by CorfuDB.
the class LayoutSealTest method failingQuorumSeal.
/**
* Scenario: 5 Servers.
* ENDPOINT_3 failed and attempted to seal.
* LayoutServers quorum is possible, - Seal passes
* Stripe 1: 0 failed, 3 responses. - Seal passes
* Stripe 2: 1 failed, 1 response. - Seal failed (Quorum not possible)
* Seal failed
*/
@Test
public void failingQuorumSeal() {
Layout l = getLayout(Layout.ReplicationMode.QUORUM_REPLICATION);
addClientRule(l.getRuntime(), SERVERS.ENDPOINT_3, new TestRule().drop().always());
l.setEpoch(l.getEpoch() + 1);
assertThatThrownBy(() -> l.moveServersToEpoch()).isInstanceOf(QuorumUnreachableException.class);
assertLayoutEpochs(2, 2, 2);
assertServerRouterEpochs(2, 2, 2, 1, 2);
}
use of org.corfudb.runtime.clients.TestRule in project CorfuDB by CorfuDB.
the class LayoutSealTest method failingChainSeal.
/**
* Scenario: 5 Servers.
* ENDPOINT_1, ENDPOINT_3 and ENDPOINT_3 failed and attempted to seal.
* LayoutServers quorum is possible, - Seal passes
* Stripe 1: 1 failed, 2 responses. - Seal passes
* Stripe 2: 2 failed, 0 responses. - Seal failed
* Seal failed
*/
@Test
public void failingChainSeal() {
Layout l = getLayout(Layout.ReplicationMode.CHAIN_REPLICATION);
addClientRule(l.getRuntime(), SERVERS.ENDPOINT_1, new TestRule().drop().always());
addClientRule(l.getRuntime(), SERVERS.ENDPOINT_3, new TestRule().drop().always());
addClientRule(l.getRuntime(), SERVERS.ENDPOINT_4, new TestRule().drop().always());
l.setEpoch(l.getEpoch() + 1);
assertThatThrownBy(() -> l.moveServersToEpoch()).isInstanceOf(QuorumUnreachableException.class);
assertLayoutEpochs(2, 1, 2);
assertServerRouterEpochs(2, 1, 2, 1, 1);
}
use of org.corfudb.runtime.clients.TestRule in project CorfuDB by CorfuDB.
the class PeriodicPollPolicyTest method failedPolling.
/**
* Polls 3 failed servers.
* Returns failed status for the 3 servers.
* We then restart server SERVERS.PORT_0, run polls again.
* Assert only 2 failures. SERVERS.PORT_1 & SERVERS.PORT_2
*
* @throws InterruptedException
*/
@Test
public void failedPolling() throws InterruptedException {
addServerRule(SERVERS.PORT_0, new TestRule().always().drop());
addServerRule(SERVERS.PORT_1, new TestRule().always().drop());
addServerRule(SERVERS.PORT_2, new TestRule().always().drop());
Set<String> expectedResult = new HashSet<>();
expectedResult.add(getEndpoint(SERVERS.PORT_0));
expectedResult.add(getEndpoint(SERVERS.PORT_1));
expectedResult.add(getEndpoint(SERVERS.PORT_2));
pollAndMatchExpectedResult(expectedResult);
/*
* Restarting the server SERVERS.PORT_0. Pings should work normally now.
* This is also to demonstrate that we no longer receive the failed
* nodes' status in the result map for SERVERS.PORT_0.
*/
clearServerRules(SERVERS.PORT_0);
// Has only SERVERS.PORT_1 & SERVERS.PORT_2
expectedResult.remove(getEndpoint(SERVERS.PORT_0));
pollAndMatchExpectedResult(expectedResult);
}
Aggregations