use of org.opensearch.cluster.routing.RoutingTable in project OpenSearch by opensearch-project.
the class AwarenessAllocationTests method testFullAwareness2.
public void testFullAwareness2() {
AllocationService strategy = createAllocationService(Settings.builder().put("cluster.routing.allocation.node_concurrent_recoveries", 10).put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_REPLICAS_RECOVERIES_SETTING.getKey(), 10).put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always").put("cluster.routing.allocation.awareness.force.rack_id.values", "1,2").put("cluster.routing.allocation.awareness.attributes", "rack_id").build());
logger.info("Building initial routing table for 'fullAwareness2'");
Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)).build();
RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build();
ClusterState clusterState = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(initialRoutingTable).build();
logger.info("--> adding two nodes on same rack and do rerouting");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1", singletonMap("rack_id", "1"))).add(newNode("node2", singletonMap("rack_id", "1"))).add(newNode("node3", singletonMap("rack_id", "1")))).build();
clusterState = strategy.reroute(clusterState, "reroute");
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));
logger.info("--> start the shards (primaries)");
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
logger.info("--> replica will not start because we have only one rack value");
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(0));
logger.info("--> add a new node with a new rack and reroute");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node4", singletonMap("rack_id", "2")))).build();
clusterState = strategy.reroute(clusterState, "reroute");
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(), equalTo("node4"));
logger.info("--> complete relocation");
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2));
logger.info("--> do another reroute, make sure nothing moves");
assertThat(strategy.reroute(clusterState, "reroute").routingTable(), sameInstance(clusterState.routingTable()));
logger.info("--> add another node with a new rack, make sure nothing moves");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node5", singletonMap("rack_id", "3")))).build();
ClusterState newState = strategy.reroute(clusterState, "reroute");
assertThat(newState, equalTo(clusterState));
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(2));
}
use of org.opensearch.cluster.routing.RoutingTable in project OpenSearch by opensearch-project.
the class AwarenessAllocationTests method testMoveShardOnceNewNodeWithAttributeAdded3.
public void testMoveShardOnceNewNodeWithAttributeAdded3() {
AllocationService strategy = createAllocationService(Settings.builder().put("cluster.routing.allocation.node_concurrent_recoveries", 10).put("cluster.routing.allocation.node_initial_primaries_recoveries", 10).put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_REPLICAS_RECOVERIES_SETTING.getKey(), 10).put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always").put("cluster.routing.allocation.cluster_concurrent_rebalance", -1).put("cluster.routing.allocation.awareness.attributes", "rack_id").put("cluster.routing.allocation.balance.index", 0.0f).put("cluster.routing.allocation.balance.replica", 1.0f).put("cluster.routing.allocation.balance.primary", 0.0f).build());
logger.info("Building initial routing table for 'moveShardOnceNewNodeWithAttributeAdded3'");
Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(5).numberOfReplicas(1)).build();
RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build();
ClusterState clusterState = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(initialRoutingTable).build();
logger.info("--> adding two nodes on same rack and do rerouting");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1", singletonMap("rack_id", "1"))).add(newNode("node2", singletonMap("rack_id", "1")))).build();
clusterState = strategy.reroute(clusterState, "reroute");
logger.info("Initializing shards: {}", clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
logger.info("Started shards: {}", clusterState.getRoutingNodes().shardsWithState(STARTED));
logger.info("Relocating shards: {}", clusterState.getRoutingNodes().shardsWithState(RELOCATING));
logger.info("Unassigned shards: {}", clusterState.getRoutingNodes().shardsWithState(UNASSIGNED));
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(5));
logger.info("--> start the shards (primaries)");
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
logger.info("--> start the shards (replicas)");
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(10));
logger.info("--> add a new node with a new rack and reroute");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node3", singletonMap("rack_id", "2")))).build();
clusterState = strategy.reroute(clusterState, "reroute");
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(5));
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.RELOCATING).size(), equalTo(5));
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(5));
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.RELOCATING).get(0).relocatingNodeId(), equalTo("node3"));
logger.info("--> complete initializing");
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
logger.info("--> run it again, since we still might have relocation");
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(10));
logger.info("--> do another reroute, make sure nothing moves");
assertThat(strategy.reroute(clusterState, "reroute").routingTable(), sameInstance(clusterState.routingTable()));
logger.info("--> add another node with a new rack, some more relocation should happen");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node4", singletonMap("rack_id", "3")))).build();
clusterState = strategy.reroute(clusterState, "reroute");
assertThat(clusterState.getRoutingNodes().shardsWithState(RELOCATING).size(), greaterThan(0));
logger.info("--> complete relocation");
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(10));
logger.info("--> do another reroute, make sure nothing moves");
assertThat(strategy.reroute(clusterState, "reroute").routingTable(), sameInstance(clusterState.routingTable()));
}
use of org.opensearch.cluster.routing.RoutingTable in project OpenSearch by opensearch-project.
the class FailedShardsRoutingTests method testFailAllReplicasInitializingOnPrimaryFail.
public void testFailAllReplicasInitializingOnPrimaryFail() {
AllocationService allocation = createAllocationService(Settings.builder().build());
Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(2)).build();
RoutingTable routingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build();
ClusterState clusterState = ClusterState.builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(routingTable).build();
ShardId shardId = new ShardId(metadata.index("test").getIndex(), 0);
// add 4 nodes
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2")).add(newNode("node3")).add(newNode("node4"))).build();
clusterState = ClusterState.builder(clusterState).routingTable(allocation.reroute(clusterState, "reroute").routingTable()).build();
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(2));
// start primary shards
clusterState = startInitializingShardsAndReroute(allocation, clusterState);
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(2));
// start one replica so it can take over.
clusterState = startShardsAndReroute(allocation, clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING).get(0));
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(2));
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(1));
ShardRouting startedReplica = clusterState.getRoutingNodes().activeReplicaWithHighestVersion(shardId);
// fail the primary shard, check replicas get removed as well...
ShardRouting primaryShardToFail = clusterState.routingTable().index("test").shard(0).primaryShard();
ClusterState newState = allocation.applyFailedShard(clusterState, primaryShardToFail, randomBoolean());
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
// the primary gets allocated on another node, replicas are initializing
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(2));
ShardRouting newPrimaryShard = clusterState.routingTable().index("test").shard(0).primaryShard();
assertThat(newPrimaryShard, not(equalTo(primaryShardToFail)));
assertThat(newPrimaryShard.allocationId(), equalTo(startedReplica.allocationId()));
}
use of org.opensearch.cluster.routing.RoutingTable in project OpenSearch by opensearch-project.
the class FailedShardsRoutingTests method testSingleShardMultipleAllocationFailures.
public void testSingleShardMultipleAllocationFailures() {
AllocationService strategy = createAllocationService(Settings.builder().put("cluster.routing.allocation.node_concurrent_recoveries", 10).put(ClusterRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), "always").build());
logger.info("Building initial routing table");
int numberOfReplicas = scaledRandomIntBetween(2, 10);
Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(numberOfReplicas)).build();
RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build();
ClusterState clusterState = ClusterState.builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(initialRoutingTable).build();
logger.info("Adding {} nodes and performing rerouting", numberOfReplicas + 1);
DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder();
for (int i = 0; i < numberOfReplicas + 1; i++) {
nodeBuilder.add(newNode("node" + Integer.toString(i)));
}
clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build();
while (!clusterState.routingTable().shardsWithState(UNASSIGNED).isEmpty()) {
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
}
int shardsToFail = randomIntBetween(1, numberOfReplicas);
ArrayList<FailedShard> failedShards = new ArrayList<>();
RoutingNodes routingNodes = clusterState.getRoutingNodes();
Set<String> failedNodes = new HashSet<>();
Set<ShardRouting> shardRoutingsToFail = new HashSet<>();
for (int i = 0; i < shardsToFail; i++) {
String failedNode = "node" + Integer.toString(randomInt(numberOfReplicas));
logger.info("failing shard on node [{}]", failedNode);
ShardRouting shardToFail = routingNodes.node(failedNode).iterator().next();
if (shardRoutingsToFail.contains(shardToFail) == false) {
failedShards.add(new FailedShard(shardToFail, null, null, randomBoolean()));
failedNodes.add(failedNode);
shardRoutingsToFail.add(shardToFail);
}
}
clusterState = strategy.applyFailedShards(clusterState, failedShards);
routingNodes = clusterState.getRoutingNodes();
for (FailedShard failedShard : failedShards) {
if (routingNodes.getByAllocationId(failedShard.getRoutingEntry().shardId(), failedShard.getRoutingEntry().allocationId().getId()) != null) {
fail("shard " + failedShard + " was not failed");
}
}
for (String failedNode : failedNodes) {
if (!routingNodes.node(failedNode).isEmpty()) {
fail("shard was re-assigned to failed node " + failedNode);
}
}
}
use of org.opensearch.cluster.routing.RoutingTable in project OpenSearch by opensearch-project.
the class MaxRetryAllocationDeciderTests method testSingleRetryOnIgnore.
public void testSingleRetryOnIgnore() {
ClusterState clusterState = createInitialClusterState();
RoutingTable routingTable = clusterState.routingTable();
final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY);
// now fail it N-1 times
for (int i = 0; i < retries - 1; i++) {
List<FailedShard> failedShards = Collections.singletonList(new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, new UnsupportedOperationException(), randomBoolean()));
ClusterState newState = strategy.applyFailedShards(clusterState, failedShards);
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
routingTable = newState.routingTable();
assertEquals(routingTable.index("idx").shards().size(), 1);
assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i + 1);
assertThat(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), containsString("boom" + i));
}
// now we go and check that we are actually stick to unassigned on the next failure
List<FailedShard> failedShards = Collections.singletonList(new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", new UnsupportedOperationException(), randomBoolean()));
ClusterState newState = strategy.applyFailedShards(clusterState, failedShards);
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
routingTable = newState.routingTable();
assertEquals(routingTable.index("idx").shards().size(), 1);
assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries);
assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);
assertThat(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), containsString("boom"));
// manual resetting of retry count
newState = strategy.reroute(clusterState, new AllocationCommands(), false, true).getClusterState();
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
routingTable = newState.routingTable();
clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
assertEquals(routingTable.index("idx").shards().size(), 1);
assertEquals(0, routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations());
assertEquals(INITIALIZING, routingTable.index("idx").shard(0).shards().get(0).state());
assertThat(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), containsString("boom"));
// again fail it N-1 times
for (int i = 0; i < retries - 1; i++) {
failedShards = Collections.singletonList(new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", new UnsupportedOperationException(), randomBoolean()));
newState = strategy.applyFailedShards(clusterState, failedShards);
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
routingTable = newState.routingTable();
assertEquals(routingTable.index("idx").shards().size(), 1);
assertEquals(i + 1, routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations());
assertEquals(INITIALIZING, routingTable.index("idx").shard(0).shards().get(0).state());
assertThat(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), containsString("boom"));
}
// now we go and check that we are actually stick to unassigned on the next failure
failedShards = Collections.singletonList(new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", new UnsupportedOperationException(), randomBoolean()));
newState = strategy.applyFailedShards(clusterState, failedShards);
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
routingTable = newState.routingTable();
assertEquals(routingTable.index("idx").shards().size(), 1);
assertEquals(retries, routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations());
assertEquals(UNASSIGNED, routingTable.index("idx").shard(0).shards().get(0).state());
assertThat(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), containsString("boom"));
}
Aggregations