use of org.apache.cassandra.distributed.api.TokenSupplier in project cassandra by apache.
the class HostReplacementTest method replaceDownedHost.
/**
* Attempt to do a host replacement on a down host
*/
@Test
public void replaceDownedHost() throws IOException {
// start with 2 nodes, stop both nodes, start the seed, host replace the down node)
TokenSupplier even = TokenSupplier.evenlyDistributedTokens(2);
try (Cluster cluster = Cluster.build(2).withConfig(c -> c.with(Feature.GOSSIP, Feature.NETWORK)).withTokenSupplier(node -> even.token(node == 3 ? 2 : node)).start()) {
IInvokableInstance seed = cluster.get(1);
IInvokableInstance nodeToRemove = cluster.get(2);
setupCluster(cluster);
// collect rows to detect issues later on if the state doesn't match
SimpleQueryResult expectedState = nodeToRemove.coordinator().executeWithResult("SELECT * FROM " + KEYSPACE + ".tbl", ConsistencyLevel.ALL);
stopUnchecked(nodeToRemove);
// now create a new node to replace the other node
IInvokableInstance replacingNode = replaceHostAndStart(cluster, nodeToRemove, props -> {
// since we have a downed host there might be a schema version which is old show up but
// can't be fetched since the host is down...
props.set(BOOTSTRAP_SKIP_SCHEMA_CHECK, true);
});
// wait till the replacing node is in the ring
awaitRingJoin(seed, replacingNode);
awaitRingJoin(replacingNode, seed);
// make sure all nodes are healthy
awaitRingHealthy(seed);
assertRingIs(seed, seed, replacingNode);
logger.info("Current ring is {}", assertRingIs(replacingNode, seed, replacingNode));
validateRows(seed.coordinator(), expectedState);
validateRows(replacingNode.coordinator(), expectedState);
}
}
use of org.apache.cassandra.distributed.api.TokenSupplier in project cassandra by apache.
the class HostReplacementTest method replaceAliveHost.
/**
* Attempt to do a host replacement on a alive host
*/
@Test
public void replaceAliveHost() throws IOException {
// start with 2 nodes, stop both nodes, start the seed, host replace the down node)
TokenSupplier even = TokenSupplier.evenlyDistributedTokens(2);
try (Cluster cluster = Cluster.build(2).withConfig(c -> c.with(Feature.GOSSIP, Feature.NETWORK).set(Constants.KEY_DTEST_API_STARTUP_FAILURE_AS_SHUTDOWN, false)).withTokenSupplier(node -> even.token(node == 3 ? 2 : node)).start()) {
IInvokableInstance seed = cluster.get(1);
IInvokableInstance nodeToRemove = cluster.get(2);
setupCluster(cluster);
// collect rows to detect issues later on if the state doesn't match
SimpleQueryResult expectedState = nodeToRemove.coordinator().executeWithResult("SELECT * FROM " + KEYSPACE + ".tbl", ConsistencyLevel.ALL);
// now create a new node to replace the other node
Assertions.assertThatThrownBy(() -> replaceHostAndStart(cluster, nodeToRemove)).as("Startup of instance should have failed as you can not replace a alive node").hasMessageContaining("Cannot replace a live node").isInstanceOf(UnsupportedOperationException.class);
// make sure all nodes are healthy
awaitRingHealthy(seed);
assertRingIs(seed, seed, nodeToRemove);
logger.info("Current ring is {}", assertRingIs(nodeToRemove, seed, nodeToRemove));
validateRows(seed.coordinator(), expectedState);
validateRows(nodeToRemove.coordinator(), expectedState);
}
}
use of org.apache.cassandra.distributed.api.TokenSupplier in project cassandra by apache.
the class HostReplacementTest method seedGoesDownBeforeDownHost.
/**
* If the seed goes down, then another node, once the seed comes back, make sure host replacements still work.
*/
@Test
public void seedGoesDownBeforeDownHost() throws IOException {
// start with 3 nodes, stop both nodes, start the seed, host replace the down node)
TokenSupplier even = TokenSupplier.evenlyDistributedTokens(3);
try (Cluster cluster = Cluster.build(3).withConfig(c -> c.with(Feature.GOSSIP, Feature.NETWORK)).withTokenSupplier(node -> even.token(node == 4 ? 2 : node)).start()) {
// call early as this can't be touched on a down node
IInvokableInstance seed = cluster.get(1);
IInvokableInstance nodeToRemove = cluster.get(2);
IInvokableInstance nodeToStayAlive = cluster.get(3);
setupCluster(cluster);
// collect rows/tokens to detect issues later on if the state doesn't match
SimpleQueryResult expectedState = nodeToRemove.coordinator().executeWithResult("SELECT * FROM " + KEYSPACE + ".tbl", ConsistencyLevel.ALL);
List<String> beforeCrashTokens = getTokenMetadataTokens(seed);
// shutdown the seed, then the node to remove
stopUnchecked(seed);
stopUnchecked(nodeToRemove);
// restart the seed
seed.startup();
// make sure the node to remove is still in the ring
assertInRing(seed, nodeToRemove);
// make sure node1 still has node2's tokens
List<String> currentTokens = getTokenMetadataTokens(seed);
Assertions.assertThat(currentTokens).as("Tokens no longer match after restarting").isEqualTo(beforeCrashTokens);
// now create a new node to replace the other node
IInvokableInstance replacingNode = replaceHostAndStart(cluster, nodeToRemove);
List<IInvokableInstance> expectedRing = Arrays.asList(seed, replacingNode, nodeToStayAlive);
// wait till the replacing node is in the ring
awaitRingJoin(seed, replacingNode);
awaitRingJoin(replacingNode, seed);
awaitRingJoin(nodeToStayAlive, replacingNode);
// make sure all nodes are healthy
logger.info("Current ring is {}", awaitRingHealthy(seed));
expectedRing.forEach(i -> assertRingIs(i, expectedRing));
validateRows(seed.coordinator(), expectedState);
validateRows(replacingNode.coordinator(), expectedState);
}
}
use of org.apache.cassandra.distributed.api.TokenSupplier in project cassandra by apache.
the class HostReplacementAbruptDownedInstanceTest method hostReplaceAbruptShutdown.
/**
* Can we maybe also test with an abrupt shutdown, that is when the shutdown state is not broadcast and the node to be replaced is on NORMAL state?
*/
@Test
public void hostReplaceAbruptShutdown() throws IOException {
int numStartNodes = 3;
TokenSupplier even = TokenSupplier.evenlyDistributedTokens(numStartNodes);
try (Cluster cluster = Cluster.build(numStartNodes).withConfig(c -> c.with(Feature.GOSSIP, Feature.NETWORK)).withTokenSupplier(node -> even.token(node == (numStartNodes + 1) ? 2 : node)).start()) {
IInvokableInstance seed = cluster.get(1);
IInvokableInstance nodeToRemove = cluster.get(2);
IInvokableInstance peer = cluster.get(3);
List<IInvokableInstance> peers = Arrays.asList(seed, peer);
setupCluster(cluster);
// collect rows/tokens to detect issues later on if the state doesn't match
SimpleQueryResult expectedState = nodeToRemove.coordinator().executeWithResult("SELECT * FROM " + KEYSPACE + ".tbl", ConsistencyLevel.ALL);
stopAbrupt(cluster, nodeToRemove);
// at this point node 2 should still be NORMAL on all other nodes
peers.forEach(p -> assertRingState(p, nodeToRemove, "Normal"));
// node is down, but queries should still work
// TODO failing, but shouldn't!
// peers.forEach(p -> validateRows(p.coordinator(), expectedState));
// now create a new node to replace the other node
long startNanos = nanoTime();
IInvokableInstance replacingNode = replaceHostAndStart(cluster, nodeToRemove, properties -> {
// since node2 was killed abruptly its possible that node2's gossip state has an old schema version
// if this happens then bootstrap will fail waiting for a schema version it will never see; to avoid
// this, setting this property to log the warning rather than fail bootstrap
properties.set(BOOTSTRAP_SKIP_SCHEMA_CHECK, true);
});
logger.info("Host replacement of {} with {} took {}", nodeToRemove, replacingNode, Duration.ofNanos(nanoTime() - startNanos));
peers.forEach(p -> awaitRingJoin(p, replacingNode));
// make sure all nodes are healthy
awaitRingHealthy(seed);
List<IInvokableInstance> expectedRing = Arrays.asList(seed, peer, replacingNode);
expectedRing.forEach(p -> assertRingIs(p, expectedRing));
expectedRing.forEach(p -> validateRows(p.coordinator(), expectedState));
}
}
use of org.apache.cassandra.distributed.api.TokenSupplier in project cassandra by apache.
the class BaseAssassinatedCase method test.
@Test
public void test() throws IOException {
TokenSupplier even = TokenSupplier.evenlyDistributedTokens(3);
try (Cluster cluster = Cluster.build(3).withConfig(c -> c.with(Feature.GOSSIP, Feature.NETWORK)).withTokenSupplier(node -> even.token(node == 4 || node == 5 ? NODE_TO_REMOVE_NUM : node)).start()) {
IInvokableInstance seed = cluster.get(SEED_NUM);
IInvokableInstance nodeToRemove = cluster.get(NODE_TO_REMOVE_NUM);
IInvokableInstance peer = cluster.get(PEER_NUM);
setupCluster(cluster);
consume(cluster, nodeToRemove);
assertRingState(seed, nodeToRemove, "Normal");
// assassinate the node
peer.nodetoolResult("assassinate", getBroadcastAddressHostWithPortString(nodeToRemove)).asserts().success();
// wait until the peer sees this assassination
awaitGossipStatus(seed, nodeToRemove, "LEFT");
// Any extra checks to run after the node has been as LEFT
afterNodeStatusIsLeft(cluster, nodeToRemove);
// allow replacing nodes with the LEFT state, this should fail since the token isn't in the ring
assertThatThrownBy(() -> replaceHostAndStart(cluster, nodeToRemove, properties -> {
// since there are downed nodes its possible gossip has the downed node with an old schema, so need
// this property to allow startup
properties.set(BOOTSTRAP_SKIP_SCHEMA_CHECK, true);
// since the bootstrap should fail because the token, don't wait "too long" on schema as it doesn't
// matter for this test
properties.set(BOOTSTRAP_SCHEMA_DELAY_MS, 10);
})).hasMessage(expectedMessage(nodeToRemove));
}
}
Aggregations