use of org.corfudb.infrastructure.TestLayoutBuilder in project CorfuDB by CorfuDB.
the class LayoutViewTest method canTolerateLayoutServerFailure.
@Test
public void canTolerateLayoutServerFailure() throws Exception {
addServer(SERVERS.PORT_0);
addServer(SERVERS.PORT_1);
bootstrapAllServers(new TestLayoutBuilder().setEpoch(1L).addLayoutServer(SERVERS.PORT_0).addLayoutServer(SERVERS.PORT_1).addSequencer(SERVERS.PORT_0).buildSegment().buildStripe().addLogUnit(SERVERS.PORT_0).addToSegment().addToLayout().build());
CorfuRuntime r = getRuntime().connect();
// Fail the network link between the client and test server
addServerRule(SERVERS.PORT_1, new TestRule().always().drop());
r.invalidateLayout();
r.getStreamsView().get(CorfuRuntime.getStreamID("hi")).hasNext();
}
use of org.corfudb.infrastructure.TestLayoutBuilder in project CorfuDB by CorfuDB.
the class LayoutViewTest method reconfigurationDuringDataOperations.
/**
* Fail a server and reconfigure
* while data operations are going on.
* Details:
* Start with a configuration of 3 servers SERVERS.PORT_0, SERVERS.PORT_1, SERVERS.PORT_2.
* Perform data operations. Fail SERVERS.PORT_1 and reconfigure to have only SERVERS.PORT_0 and SERVERS.PORT_2.
* Perform data operations while the reconfiguration is going on. The operations should
* be stuck till the new configuration is chosen and then complete after that.
* FIXME: We cannot failover the server with the primary sequencer yet.
*
* @throws Exception
*/
@Test
public void reconfigurationDuringDataOperations() throws Exception {
addServer(SERVERS.PORT_0);
addServer(SERVERS.PORT_1);
addServer(SERVERS.PORT_2);
Layout l = new TestLayoutBuilder().setEpoch(1L).addLayoutServer(SERVERS.PORT_0).addLayoutServer(SERVERS.PORT_1).addLayoutServer(SERVERS.PORT_2).addSequencer(SERVERS.PORT_0).addSequencer(SERVERS.PORT_1).addSequencer(SERVERS.PORT_2).buildSegment().buildStripe().addLogUnit(SERVERS.PORT_0).addLogUnit(SERVERS.PORT_1).addLogUnit(SERVERS.PORT_2).addToSegment().addToLayout().build();
bootstrapAllServers(l);
CorfuRuntime corfuRuntime = getRuntime(l).connect();
// Thread to reconfigure the layout
CountDownLatch startReconfigurationLatch = new CountDownLatch(1);
CountDownLatch layoutReconfiguredLatch = new CountDownLatch(1);
Thread t = new Thread(() -> {
try {
startReconfigurationLatch.await();
corfuRuntime.invalidateLayout();
// Fail the network link between the client and test server
addServerRule(SERVERS.PORT_1, new TestRule().always().drop());
// New layout removes the failed server SERVERS.PORT_0
Layout newLayout = new TestLayoutBuilder().setEpoch(l.getEpoch() + 1).addLayoutServer(SERVERS.PORT_0).addLayoutServer(SERVERS.PORT_2).addSequencer(SERVERS.PORT_0).addSequencer(SERVERS.PORT_2).buildSegment().buildStripe().addLogUnit(SERVERS.PORT_0).addLogUnit(SERVERS.PORT_2).addToSegment().addToLayout().build();
newLayout.setRuntime(corfuRuntime);
//TODO need to figure out if we can move to
//update layout
newLayout.moveServersToEpoch();
corfuRuntime.getLayoutView().updateLayout(newLayout, newLayout.getEpoch());
corfuRuntime.invalidateLayout();
log.debug("layout updated new layout {}", corfuRuntime.getLayoutView().getLayout());
layoutReconfiguredLatch.countDown();
} catch (Exception e) {
e.printStackTrace();
}
});
t.start();
// verify writes and reads happen before and after the reconfiguration
IStreamView sv = corfuRuntime.getStreamsView().get(CorfuRuntime.getStreamID("streamA"));
// This append will happen before the reconfiguration while the read for this append
// will happen after reconfiguration
writeAndReadStream(corfuRuntime, sv, startReconfigurationLatch, layoutReconfiguredLatch);
// Write and read after reconfiguration.
writeAndReadStream(corfuRuntime, sv, startReconfigurationLatch, layoutReconfiguredLatch);
t.join();
}
use of org.corfudb.infrastructure.TestLayoutBuilder in project CorfuDB by CorfuDB.
the class ManagementViewTest method removeSingleNodeFailure.
/**
* Scenario with 3 nodes: SERVERS.PORT_0, SERVERS.PORT_1 and SERVERS.PORT_2.
* We fail SERVERS.PORT_1 and then wait for one of the other two servers to
* handle this failure, propose a new layout and we assert on the epoch change.
* The failure is handled by removing the failed node.
*
* @throws Exception
*/
@Test
public void removeSingleNodeFailure() throws Exception {
// Creating server contexts with PurgeFailurePolicies.
ServerContext sc0 = new ServerContextBuilder().setSingle(false).setServerRouter(new TestServerRouter(SERVERS.PORT_0)).setPort(SERVERS.PORT_0).build();
ServerContext sc1 = new ServerContextBuilder().setSingle(false).setServerRouter(new TestServerRouter(SERVERS.PORT_1)).setPort(SERVERS.PORT_1).build();
ServerContext sc2 = new ServerContextBuilder().setSingle(false).setServerRouter(new TestServerRouter(SERVERS.PORT_2)).setPort(SERVERS.PORT_2).build();
sc0.setFailureHandlerPolicy(new PurgeFailurePolicy());
sc1.setFailureHandlerPolicy(new PurgeFailurePolicy());
sc2.setFailureHandlerPolicy(new PurgeFailurePolicy());
addServer(SERVERS.PORT_0, sc0);
addServer(SERVERS.PORT_1, sc1);
addServer(SERVERS.PORT_2, sc2);
Layout l = new TestLayoutBuilder().setEpoch(1L).addLayoutServer(SERVERS.PORT_0).addLayoutServer(SERVERS.PORT_1).addLayoutServer(SERVERS.PORT_2).addSequencer(SERVERS.PORT_0).buildSegment().buildStripe().addLogUnit(SERVERS.PORT_0).addLogUnit(SERVERS.PORT_2).addToSegment().addToLayout().build();
bootstrapAllServers(l);
CorfuRuntime corfuRuntime = new CorfuRuntime();
l.getLayoutServers().forEach(corfuRuntime::addLayoutServer);
corfuRuntime.connect();
// Initiating all failure handlers.
for (String server : l.getAllServers()) {
corfuRuntime.getRouter(server).getClient(ManagementClient.class).initiateFailureHandler().get();
}
// Setting aggressive timeouts
setAggressiveTimeouts(l, corfuRuntime, getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), getManagementServer(SERVERS.PORT_1).getCorfuRuntime(), getManagementServer(SERVERS.PORT_2).getCorfuRuntime());
// Adding a rule on SERVERS.PORT_1 to drop all packets
addServerRule(SERVERS.PORT_1, new TestRule().always().drop());
getManagementServer(SERVERS.PORT_1).shutdown();
for (int i = 0; i < PARAMETERS.NUM_ITERATIONS_LOW; i++) {
corfuRuntime.invalidateLayout();
if (corfuRuntime.getLayoutView().getLayout().getEpoch() == 2L) {
break;
}
Thread.sleep(PARAMETERS.TIMEOUT_VERY_SHORT.toMillis());
}
Layout l2 = corfuRuntime.getLayoutView().getLayout();
assertThat(l2.getEpoch()).isEqualTo(2L);
assertThat(l2.getLayoutServers().size()).isEqualTo(2);
assertThat(l2.getLayoutServers().contains(SERVERS.ENDPOINT_1)).isFalse();
}
use of org.corfudb.infrastructure.TestLayoutBuilder in project CorfuDB by CorfuDB.
the class ManagementViewTest method invokeFailureHandler.
/**
* Scenario with 2 nodes: SERVERS.PORT_0 and SERVERS.PORT_1.
* We fail SERVERS.PORT_0 and then listen to intercept the message
* sent by SERVERS.PORT_1's client to the server to handle the failure.
*
* @throws Exception
*/
@Test
public void invokeFailureHandler() throws Exception {
// Boolean flag turned to true when the MANAGEMENT_FAILURE_DETECTED message
// is sent by the Management client to its server.
final Semaphore failureDetected = new Semaphore(1, true);
addServer(SERVERS.PORT_0);
addServer(SERVERS.PORT_1);
Layout l = new TestLayoutBuilder().setEpoch(1L).addLayoutServer(SERVERS.PORT_0).addLayoutServer(SERVERS.PORT_1).addSequencer(SERVERS.PORT_0).buildSegment().buildStripe().addLogUnit(SERVERS.PORT_0).addLogUnit(SERVERS.PORT_1).addToSegment().addToLayout().build();
bootstrapAllServers(l);
CorfuRuntime corfuRuntime = new CorfuRuntime();
l.getLayoutServers().forEach(corfuRuntime::addLayoutServer);
corfuRuntime.connect();
corfuRuntime.getRouter(SERVERS.ENDPOINT_1).getClient(ManagementClient.class).initiateFailureHandler().get();
// Set aggressive timeouts.
setAggressiveTimeouts(l, corfuRuntime, getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), getManagementServer(SERVERS.PORT_1).getCorfuRuntime());
failureDetected.acquire();
// Adding a rule on SERVERS.PORT_0 to drop all packets
addServerRule(SERVERS.PORT_0, new TestRule().always().drop());
getManagementServer(SERVERS.PORT_0).shutdown();
// Adding a rule on SERVERS.PORT_1 to toggle the flag when it sends the
// MANAGEMENT_FAILURE_DETECTED message.
addClientRule(getManagementServer(SERVERS.PORT_1).getCorfuRuntime(), new TestRule().matches(corfuMsg -> {
if (corfuMsg.getMsgType().equals(CorfuMsgType.MANAGEMENT_FAILURE_DETECTED)) {
failureDetected.release();
}
return true;
}));
assertThat(failureDetected.tryAcquire(PARAMETERS.TIMEOUT_NORMAL.toNanos(), TimeUnit.NANOSECONDS)).isEqualTo(true);
}
use of org.corfudb.infrastructure.TestLayoutBuilder in project CorfuDB by CorfuDB.
the class ManagementViewTest method handleTransientFailure.
/**
* Scenario with 3 nodes: SERVERS.PORT_0, SERVERS.PORT_1 and SERVERS.PORT_2.
* Simulate transient failure of a server leading to a partial seal.
* Allow the management server to detect the partial seal and correct this.
* <p>
* Part 1.
* The partial seal causes SERVERS.PORT_0 to be at epoch 2 whereas,
* SERVERS.PORT_1 & SERVERS.PORT_2 fail to receive this message and are stuck at epoch 1.
* <p>
* Part 2.
* All the 3 servers are now functional and receive all messages.
* <p>
* Part 3.
* The PING message gets rejected by the partially sealed router (WrongEpoch)
* and the management server realizes of the partial seal and corrects this
* by issuing another failure detected message.
*
* @throws Exception
*/
@Test
public void handleTransientFailure() throws Exception {
// Boolean flag turned to true when the MANAGEMENT_FAILURE_DETECTED message
// is sent by the Management client to its server.
final Semaphore failureDetected = new Semaphore(2, true);
addServer(SERVERS.PORT_0);
addServer(SERVERS.PORT_1);
addServer(SERVERS.PORT_2);
Layout l = new TestLayoutBuilder().setEpoch(1L).addLayoutServer(SERVERS.PORT_0).addLayoutServer(SERVERS.PORT_1).addLayoutServer(SERVERS.PORT_2).addSequencer(SERVERS.PORT_0).buildSegment().setReplicationMode(Layout.ReplicationMode.QUORUM_REPLICATION).buildStripe().addLogUnit(SERVERS.PORT_0).addLogUnit(SERVERS.PORT_1).addLogUnit(SERVERS.PORT_2).addToSegment().addToLayout().build();
bootstrapAllServers(l);
CorfuRuntime corfuRuntime = getRuntime(l).connect();
// Initiate SERVERS.ENDPOINT_0 failureHandler
corfuRuntime.getRouter(SERVERS.ENDPOINT_0).getClient(ManagementClient.class).initiateFailureHandler().get();
// Set aggressive timeouts.
setAggressiveTimeouts(l, corfuRuntime, getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), getManagementServer(SERVERS.PORT_1).getCorfuRuntime(), getManagementServer(SERVERS.PORT_2).getCorfuRuntime());
failureDetected.acquire(2);
// Only allow SERVERS.PORT_0 to manage failures.
getManagementServer(SERVERS.PORT_1).shutdown();
getManagementServer(SERVERS.PORT_2).shutdown();
// PART 1.
// Prevent ENDPOINT_1 from sealing.
addClientRule(getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), SERVERS.ENDPOINT_1, new TestRule().matches(corfuMsg -> corfuMsg.getMsgType().equals(CorfuMsgType.SET_EPOCH)).drop());
// Simulate ENDPOINT_2 failure from ENDPOINT_0 (only Management Server)
addClientRule(getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), SERVERS.ENDPOINT_2, new TestRule().matches(corfuMsg -> true).drop());
// Adding a rule on SERVERS.PORT_1 to toggle the flag when it sends the
// MANAGEMENT_FAILURE_DETECTED message.
addClientRule(getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), new TestRule().matches(corfuMsg -> {
if (corfuMsg.getMsgType().equals(CorfuMsgType.MANAGEMENT_FAILURE_DETECTED)) {
failureDetected.release();
}
return true;
}));
// Go ahead when sealing of ENDPOINT_0 takes place.
for (int i = 0; i < PARAMETERS.NUM_ITERATIONS_LOW; i++) {
if (getServerRouter(SERVERS.PORT_0).getServerEpoch() == 2L) {
failureDetected.release();
break;
}
Thread.sleep(PARAMETERS.TIMEOUT_VERY_SHORT.toMillis());
}
assertThat(failureDetected.tryAcquire(2, PARAMETERS.TIMEOUT_NORMAL.toNanos(), TimeUnit.NANOSECONDS)).isEqualTo(true);
addClientRule(getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), new TestRule().matches(corfuMsg -> corfuMsg.getMsgType().equals(CorfuMsgType.MANAGEMENT_FAILURE_DETECTED)).drop());
// Assert that only a partial seal was successful.
// ENDPOINT_0 sealed. ENDPOINT_1 & ENDPOINT_2 not sealed.
assertThat(getServerRouter(SERVERS.PORT_0).getServerEpoch()).isEqualTo(2L);
assertThat(getServerRouter(SERVERS.PORT_1).getServerEpoch()).isEqualTo(1L);
assertThat(getServerRouter(SERVERS.PORT_2).getServerEpoch()).isEqualTo(1L);
assertThat(getLayoutServer(SERVERS.PORT_0).getCurrentLayout().getEpoch()).isEqualTo(1L);
assertThat(getLayoutServer(SERVERS.PORT_1).getCurrentLayout().getEpoch()).isEqualTo(1L);
assertThat(getLayoutServer(SERVERS.PORT_2).getCurrentLayout().getEpoch()).isEqualTo(1L);
// PART 2.
// Simulate normal operations for all servers and clients.
clearClientRules(getManagementServer(SERVERS.PORT_0).getCorfuRuntime());
// PART 3.
// Allow management server to detect partial seal and correct this issue.
addClientRule(getManagementServer(SERVERS.PORT_0).getCorfuRuntime(), new TestRule().matches(corfuMsg -> {
if (corfuMsg.getMsgType().equals(CorfuMsgType.MANAGEMENT_FAILURE_DETECTED)) {
failureDetected.release(2);
}
return true;
}));
assertThat(failureDetected.tryAcquire(2, PARAMETERS.TIMEOUT_NORMAL.toNanos(), TimeUnit.NANOSECONDS)).isEqualTo(true);
for (int i = 0; i < PARAMETERS.NUM_ITERATIONS_LOW; i++) {
Thread.sleep(PARAMETERS.TIMEOUT_VERY_SHORT.toMillis());
// Assert successful seal of all servers.
if (getServerRouter(SERVERS.PORT_0).getServerEpoch() == 2L || getServerRouter(SERVERS.PORT_1).getServerEpoch() == 2L || getServerRouter(SERVERS.PORT_2).getServerEpoch() == 2L || getLayoutServer(SERVERS.PORT_0).getCurrentLayout().getEpoch() == 2L || getLayoutServer(SERVERS.PORT_1).getCurrentLayout().getEpoch() == 2L || getLayoutServer(SERVERS.PORT_2).getCurrentLayout().getEpoch() == 2L) {
return;
}
}
fail();
}
Aggregations