use of org.opensearch.cluster.routing.ShardRoutingState.UNASSIGNED in project OpenSearch by opensearch-project.
the class BalanceConfigurationTests method assertReplicaBalance.
private void assertReplicaBalance(RoutingNodes nodes, int numberOfNodes, int numberOfIndices, int numberOfReplicas, int numberOfShards, float threshold) {
final int unassigned = nodes.unassigned().size();
if (unassigned > 0) {
// Ensure that if there any unassigned shards, all of their replicas are unassigned as well
// (i.e. unassigned count is always [replicas] + 1 for each shard unassigned shardId)
nodes.shardsWithState(UNASSIGNED).stream().collect(Collectors.toMap(ShardRouting::shardId, s -> 1, (a, b) -> a + b)).values().forEach(count -> assertEquals(numberOfReplicas + 1, count.longValue()));
}
assertEquals(numberOfNodes, nodes.size());
final int numShards = numberOfIndices * numberOfShards * (numberOfReplicas + 1) - unassigned;
final float avgNumShards = (float) (numShards) / (float) (numberOfNodes);
final int minAvgNumberOfShards = Math.round(Math.round(Math.floor(avgNumShards - threshold)));
final int maxAvgNumberOfShards = Math.round(Math.round(Math.ceil(avgNumShards + threshold)));
for (RoutingNode node : nodes) {
assertThat(node.shardsWithState(STARTED).size(), Matchers.greaterThanOrEqualTo(minAvgNumberOfShards));
assertThat(node.shardsWithState(STARTED).size(), Matchers.lessThanOrEqualTo(maxAvgNumberOfShards));
}
}
use of org.opensearch.cluster.routing.ShardRoutingState.UNASSIGNED in project OpenSearch by opensearch-project.
the class BalanceConfigurationTests method testNoRebalanceOnPrimaryOverload.
public void testNoRebalanceOnPrimaryOverload() {
Settings.Builder settings = Settings.builder();
AllocationService strategy = new AllocationService(randomAllocationDeciders(settings.build(), new ClusterSettings(Settings.Builder.EMPTY_SETTINGS, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), random()), new TestGatewayAllocator(), new ShardsAllocator() {
/*
* // this allocator tries to rebuild this scenario where a rebalance is
* // triggered solely by the primary overload on node [1] where a shard
* // is rebalanced to node 0
routing_nodes:
-----node_id[0][V]
--------[test][0], node[0], [R], s[STARTED]
--------[test][4], node[0], [R], s[STARTED]
-----node_id[1][V]
--------[test][0], node[1], [P], s[STARTED]
--------[test][1], node[1], [P], s[STARTED]
--------[test][3], node[1], [R], s[STARTED]
-----node_id[2][V]
--------[test][1], node[2], [R], s[STARTED]
--------[test][2], node[2], [R], s[STARTED]
--------[test][4], node[2], [P], s[STARTED]
-----node_id[3][V]
--------[test][2], node[3], [P], s[STARTED]
--------[test][3], node[3], [P], s[STARTED]
---- unassigned
*/
public void allocate(RoutingAllocation allocation) {
RoutingNodes.UnassignedShards unassigned = allocation.routingNodes().unassigned();
ShardRouting[] drain = unassigned.drain();
// we have to allocate primaries first
ArrayUtil.timSort(drain, (a, b) -> {
return a.primary() ? -1 : 1;
});
for (ShardRouting sr : drain) {
switch(sr.id()) {
case 0:
if (sr.primary()) {
allocation.routingNodes().initializeShard(sr, "node1", null, -1, allocation.changes());
} else {
allocation.routingNodes().initializeShard(sr, "node0", null, -1, allocation.changes());
}
break;
case 1:
if (sr.primary()) {
allocation.routingNodes().initializeShard(sr, "node1", null, -1, allocation.changes());
} else {
allocation.routingNodes().initializeShard(sr, "node2", null, -1, allocation.changes());
}
break;
case 2:
if (sr.primary()) {
allocation.routingNodes().initializeShard(sr, "node3", null, -1, allocation.changes());
} else {
allocation.routingNodes().initializeShard(sr, "node2", null, -1, allocation.changes());
}
break;
case 3:
if (sr.primary()) {
allocation.routingNodes().initializeShard(sr, "node3", null, -1, allocation.changes());
} else {
allocation.routingNodes().initializeShard(sr, "node1", null, -1, allocation.changes());
}
break;
case 4:
if (sr.primary()) {
allocation.routingNodes().initializeShard(sr, "node2", null, -1, allocation.changes());
} else {
allocation.routingNodes().initializeShard(sr, "node0", null, -1, allocation.changes());
}
break;
}
}
}
@Override
public ShardAllocationDecision decideShardAllocation(ShardRouting shard, RoutingAllocation allocation) {
throw new UnsupportedOperationException("explain not supported");
}
}, EmptyClusterInfoService.INSTANCE, EmptySnapshotsInfoService.INSTANCE);
Metadata.Builder metadataBuilder = Metadata.builder();
RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
IndexMetadata.Builder indexMeta = IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(5).numberOfReplicas(1);
metadataBuilder = metadataBuilder.put(indexMeta);
Metadata metadata = metadataBuilder.build();
for (ObjectCursor<IndexMetadata> cursor : metadata.indices().values()) {
routingTableBuilder.addAsNew(cursor.value);
}
RoutingTable routingTable = routingTableBuilder.build();
DiscoveryNodes.Builder nodes = DiscoveryNodes.builder();
for (int i = 0; i < 4; i++) {
DiscoveryNode node = newNode("node" + i);
nodes.add(node);
}
ClusterState clusterState = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).nodes(nodes).metadata(metadata).routingTable(routingTable).build();
routingTable = strategy.reroute(clusterState, "reroute").routingTable();
clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
RoutingNodes routingNodes = clusterState.getRoutingNodes();
for (RoutingNode routingNode : routingNodes) {
for (ShardRouting shardRouting : routingNode) {
assertThat(shardRouting.state(), Matchers.equalTo(ShardRoutingState.INITIALIZING));
}
}
strategy = createAllocationService(settings.build(), new TestGatewayAllocator());
logger.info("use the new allocator and check if it moves shards");
routingTable = startInitializingShardsAndReroute(strategy, clusterState).routingTable();
clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
routingNodes = clusterState.getRoutingNodes();
for (RoutingNode routingNode : routingNodes) {
for (ShardRouting shardRouting : routingNode) {
assertThat(shardRouting.state(), Matchers.equalTo(ShardRoutingState.STARTED));
}
}
logger.info("start the replica shards");
routingTable = startInitializingShardsAndReroute(strategy, clusterState).routingTable();
clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
routingNodes = clusterState.getRoutingNodes();
for (RoutingNode routingNode : routingNodes) {
for (ShardRouting shardRouting : routingNode) {
assertThat(shardRouting.state(), Matchers.equalTo(ShardRoutingState.STARTED));
}
}
logger.info("rebalancing");
routingTable = strategy.reroute(clusterState, "reroute").routingTable();
clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build();
routingNodes = clusterState.getRoutingNodes();
for (RoutingNode routingNode : routingNodes) {
for (ShardRouting shardRouting : routingNode) {
assertThat(shardRouting.state(), Matchers.equalTo(ShardRoutingState.STARTED));
}
}
}
use of org.opensearch.cluster.routing.ShardRoutingState.UNASSIGNED in project OpenSearch by opensearch-project.
the class DiskThresholdDeciderTests method testDiskThresholdWithSnapshotShardSizes.
public void testDiskThresholdWithSnapshotShardSizes() {
final long shardSizeInBytes = randomBoolean() ? 10L : 50L;
logger.info("--> using shard size [{}]", shardSizeInBytes);
final Settings diskSettings = Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), true).put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), "90%").put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), "95%").build();
final ImmutableOpenMap.Builder<String, DiskUsage> usagesBuilder = ImmutableOpenMap.builder();
// 79% used
usagesBuilder.put("node1", new DiskUsage("node1", "n1", "/dev/null", 100, 21));
// 99% used
usagesBuilder.put("node2", new DiskUsage("node2", "n2", "/dev/null", 100, 1));
final ImmutableOpenMap<String, DiskUsage> usages = usagesBuilder.build();
final ClusterInfoService clusterInfoService = () -> new DevNullClusterInfo(usages, usages, ImmutableOpenMap.of());
final AllocationDeciders deciders = new AllocationDeciders(new HashSet<>(Arrays.asList(new RestoreInProgressAllocationDecider(), new SameShardAllocationDecider(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), makeDecider(diskSettings))));
final Snapshot snapshot = new Snapshot("_repository", new SnapshotId("_snapshot_name", UUIDs.randomBase64UUID(random())));
final IndexId indexId = new IndexId("_indexid_name", UUIDs.randomBase64UUID(random()));
final ShardId shardId = new ShardId(new Index("test", IndexMetadata.INDEX_UUID_NA_VALUE), 0);
final Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0).putInSyncAllocationIds(0, singleton(AllocationId.newInitializing().getId()))).build();
final RoutingTable routingTable = RoutingTable.builder().addAsNewRestore(metadata.index("test"), new RecoverySource.SnapshotRecoverySource("_restore_uuid", snapshot, Version.CURRENT, indexId), new IntHashSet()).build();
final ImmutableOpenMap.Builder<ShardId, RestoreInProgress.ShardRestoreStatus> shards = ImmutableOpenMap.builder();
shards.put(shardId, new RestoreInProgress.ShardRestoreStatus("node1"));
final RestoreInProgress.Builder restores = new RestoreInProgress.Builder().add(new RestoreInProgress.Entry("_restore_uuid", snapshot, RestoreInProgress.State.INIT, singletonList("test"), shards.build()));
ClusterState clusterState = ClusterState.builder(new ClusterName(getTestName())).metadata(metadata).routingTable(routingTable).putCustom(RestoreInProgress.TYPE, restores.build()).nodes(// node2 is added because DiskThresholdDecider
DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))).build();
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).stream().map(ShardRouting::unassignedInfo).allMatch(unassignedInfo -> Reason.NEW_INDEX_RESTORED.equals(unassignedInfo.getReason())), is(true));
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).stream().map(ShardRouting::unassignedInfo).allMatch(unassignedInfo -> AllocationStatus.NO_ATTEMPT.equals(unassignedInfo.getLastAllocationStatus())), is(true));
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(1));
final AtomicReference<SnapshotShardSizeInfo> snapshotShardSizeInfoRef = new AtomicReference<>(SnapshotShardSizeInfo.EMPTY);
final AllocationService strategy = new AllocationService(deciders, new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), clusterInfoService, snapshotShardSizeInfoRef::get);
// reroute triggers snapshot shard size fetching
clusterState = strategy.reroute(clusterState, "reroute");
logShardStates(clusterState);
// shard cannot be assigned yet as the snapshot shard size is unknown
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).stream().map(ShardRouting::unassignedInfo).allMatch(unassignedInfo -> AllocationStatus.FETCHING_SHARD_DATA.equals(unassignedInfo.getLastAllocationStatus())), is(true));
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(1));
final SnapshotShard snapshotShard = new SnapshotShard(snapshot, indexId, shardId);
final ImmutableOpenMap.Builder<SnapshotShard, Long> snapshotShardSizes = ImmutableOpenMap.builder();
final boolean shouldAllocate;
if (randomBoolean()) {
logger.info("--> simulating snapshot shards size retrieval success");
snapshotShardSizes.put(snapshotShard, shardSizeInBytes);
logger.info("--> shard allocation depends on its size");
shouldAllocate = shardSizeInBytes < usages.get("node1").getFreeBytes();
} else {
logger.info("--> simulating snapshot shards size retrieval failure");
snapshotShardSizes.put(snapshotShard, ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE);
logger.info("--> shard is always allocated when its size could not be retrieved");
shouldAllocate = true;
}
snapshotShardSizeInfoRef.set(new SnapshotShardSizeInfo(snapshotShardSizes.build()));
// reroute uses the previous snapshot shard size
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
logShardStates(clusterState);
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(shouldAllocate ? 0 : 1));
assertThat(clusterState.getRoutingNodes().shardsWithState("test", INITIALIZING, STARTED).size(), equalTo(shouldAllocate ? 1 : 0));
}
use of org.opensearch.cluster.routing.ShardRoutingState.UNASSIGNED in project OpenSearch by opensearch-project.
the class ThrottlingAllocationTests method testRecoveryCounts.
public void testRecoveryCounts() {
TestGatewayAllocator gatewayAllocator = new TestGatewayAllocator();
Settings settings = Settings.builder().put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_RECOVERIES_SETTING.getKey(), 1).put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_PRIMARIES_RECOVERIES_SETTING.getKey(), 20).put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_REPLICAS_RECOVERIES_SETTING.getKey(), 2).build();
AllocationService strategy = createAllocationService(settings, gatewayAllocator);
Metadata metadata = Metadata.builder().persistentSettings(settings).build();
ClusterState clusterState = ClusterState.builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))).build();
logger.info("create a new index and start all primaries");
clusterState = createNewIndexAndStartAllPrimaries(2, 2, strategy, clusterState);
logger.info("Add a new node for all replica assignment. 4 replicas move to INIT after reroute.");
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node3"))).build();
clusterState = strategy.reroute(clusterState, "reroute");
assertThat(clusterState.routingTable().index("new_index").shardsWithState(UNASSIGNED).size(), equalTo(0));
assertThat(clusterState.routingTable().index("new_index").shardsWithState(INITIALIZING).size(), equalTo(4));
assertThat(clusterState.getRoutingNodes().getIncomingRecoveries("node1"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getIncomingRecoveries("node2"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getIncomingRecoveries("node3"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getInitialIncomingRecoveries("node1"), equalTo(1));
assertThat(clusterState.getRoutingNodes().getInitialIncomingRecoveries("node2"), equalTo(1));
assertThat(clusterState.getRoutingNodes().getInitialIncomingRecoveries("node3"), equalTo(2));
assertThat(clusterState.getRoutingNodes().getOutgoingRecoveries("node1"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getOutgoingRecoveries("node2"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getOutgoingRecoveries("node3"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getInitialOutgoingRecoveries("node1"), equalTo(2));
assertThat(clusterState.getRoutingNodes().getInitialOutgoingRecoveries("node2"), equalTo(2));
assertThat(clusterState.getRoutingNodes().getInitialOutgoingRecoveries("node3"), equalTo(0));
logger.info("Exclude node1 and add node4. After this, primary shard on node1 moves to RELOCATING state and only " + "non initial replica recoveries are impacted");
settings = Settings.builder().put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_RECOVERIES_SETTING.getKey(), 1).put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_PRIMARIES_RECOVERIES_SETTING.getKey(), 20).put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_REPLICAS_RECOVERIES_SETTING.getKey(), 2).put("cluster.routing.allocation.exclude._id", "node1").build();
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node4"))).build();
strategy = createAllocationService(settings, gatewayAllocator);
clusterState = strategy.reroute(clusterState, "reroute");
assertThat(clusterState.routingTable().index("new_index").shardsWithState(UNASSIGNED).size(), equalTo(0));
assertThat(clusterState.routingTable().index("new_index").shardsWithState(INITIALIZING).size(), equalTo(5));
assertThat(clusterState.routingTable().index("new_index").shardsWithState(RELOCATING).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().getIncomingRecoveries("node1"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getIncomingRecoveries("node2"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getIncomingRecoveries("node3"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getIncomingRecoveries("node4"), equalTo(1));
assertThat(clusterState.getRoutingNodes().getInitialIncomingRecoveries("node1"), equalTo(1));
assertThat(clusterState.getRoutingNodes().getInitialIncomingRecoveries("node2"), equalTo(1));
assertThat(clusterState.getRoutingNodes().getInitialIncomingRecoveries("node3"), equalTo(2));
assertThat(clusterState.getRoutingNodes().getInitialIncomingRecoveries("node4"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getOutgoingRecoveries("node1"), equalTo(1));
assertThat(clusterState.getRoutingNodes().getOutgoingRecoveries("node2"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getOutgoingRecoveries("node3"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getOutgoingRecoveries("node4"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getInitialOutgoingRecoveries("node1"), equalTo(2));
assertThat(clusterState.getRoutingNodes().getInitialOutgoingRecoveries("node2"), equalTo(2));
assertThat(clusterState.getRoutingNodes().getInitialOutgoingRecoveries("node3"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getInitialOutgoingRecoveries("node4"), equalTo(0));
logger.info("Start primary on node4. Now all replicas for that primary start getting accounted for from node4.");
clusterState = strategy.applyStartedShards(clusterState, clusterState.routingTable().index("new_index").shardsWithState(INITIALIZING).stream().filter(routing -> routing.primary() && routing.isRelocationTarget()).collect(Collectors.toList()));
assertThat(clusterState.routingTable().index("new_index").shardsWithState(UNASSIGNED).size(), equalTo(0));
assertThat(clusterState.routingTable().index("new_index").shardsWithState(INITIALIZING).size(), equalTo(4));
assertThat(clusterState.routingTable().index("new_index").shardsWithState(RELOCATING).size(), equalTo(0));
assertThat(clusterState.getRoutingNodes().getIncomingRecoveries("node1"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getIncomingRecoveries("node2"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getIncomingRecoveries("node3"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getIncomingRecoveries("node4"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getInitialIncomingRecoveries("node1"), equalTo(1));
assertThat(clusterState.getRoutingNodes().getInitialIncomingRecoveries("node2"), equalTo(1));
assertThat(clusterState.getRoutingNodes().getInitialIncomingRecoveries("node3"), equalTo(2));
assertThat(clusterState.getRoutingNodes().getInitialIncomingRecoveries("node4"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getOutgoingRecoveries("node1"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getOutgoingRecoveries("node2"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getOutgoingRecoveries("node3"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getOutgoingRecoveries("node4"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getInitialOutgoingRecoveries("node1"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getInitialOutgoingRecoveries("node2"), equalTo(2));
assertThat(clusterState.getRoutingNodes().getInitialOutgoingRecoveries("node3"), equalTo(0));
assertThat(clusterState.getRoutingNodes().getInitialOutgoingRecoveries("node4"), equalTo(2));
}
Aggregations