use of org.opensearch.cluster.routing.allocation.decider.ReplicaAfterPrimaryActiveAllocationDecider in project OpenSearch by opensearch-project.
the class ClusterModule method createAllocationDeciders.
// TODO: this is public so allocation benchmark can access the default deciders...can we do that in another way?
/**
* Return a new {@link AllocationDecider} instance with builtin deciders as well as those from plugins.
*/
public static Collection<AllocationDecider> createAllocationDeciders(Settings settings, ClusterSettings clusterSettings, List<ClusterPlugin> clusterPlugins) {
// collect deciders by class so that we can detect duplicates
Map<Class, AllocationDecider> deciders = new LinkedHashMap<>();
addAllocationDecider(deciders, new MaxRetryAllocationDecider());
addAllocationDecider(deciders, new ResizeAllocationDecider());
addAllocationDecider(deciders, new ReplicaAfterPrimaryActiveAllocationDecider());
addAllocationDecider(deciders, new RebalanceOnlyWhenActiveAllocationDecider());
addAllocationDecider(deciders, new ClusterRebalanceAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new ConcurrentRebalanceAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new ConcurrentRecoveriesAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new EnableAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new NodeVersionAllocationDecider());
addAllocationDecider(deciders, new SnapshotInProgressAllocationDecider());
addAllocationDecider(deciders, new RestoreInProgressAllocationDecider());
addAllocationDecider(deciders, new FilterAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new SameShardAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new DiskThresholdDecider(settings, clusterSettings));
addAllocationDecider(deciders, new ThrottlingAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new ShardsLimitAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new AwarenessAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new NodeLoadAwareAllocationDecider(settings, clusterSettings));
clusterPlugins.stream().flatMap(p -> p.createAllocationDeciders(settings, clusterSettings).stream()).forEach(d -> addAllocationDecider(deciders, d));
return deciders.values();
}
use of org.opensearch.cluster.routing.allocation.decider.ReplicaAfterPrimaryActiveAllocationDecider in project OpenSearch by opensearch-project.
the class RandomAllocationDeciderTests method testRandomDecisions.
/* This test will make random allocation decision on a growing and shrinking
* cluster leading to a random distribution of the shards. After a certain
* amount of iterations the test allows allocation unless the same shard is
* already allocated on a node and balances the cluster to gain optimal
* balance.*/
public void testRandomDecisions() {
RandomAllocationDecider randomAllocationDecider = new RandomAllocationDecider(random());
AllocationService strategy = new AllocationService(new AllocationDeciders(new HashSet<>(Arrays.asList(new SameShardAllocationDecider(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), new ReplicaAfterPrimaryActiveAllocationDecider(), randomAllocationDecider))), new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE, EmptySnapshotsInfoService.INSTANCE);
int indices = scaledRandomIntBetween(1, 20);
Builder metaBuilder = Metadata.builder();
int maxNumReplicas = 1;
int totalNumShards = 0;
for (int i = 0; i < indices; i++) {
int replicas = scaledRandomIntBetween(0, 6);
maxNumReplicas = Math.max(maxNumReplicas, replicas + 1);
int numShards = scaledRandomIntBetween(1, 20);
totalNumShards += numShards * (replicas + 1);
metaBuilder.put(IndexMetadata.builder("INDEX_" + i).settings(settings(Version.CURRENT)).numberOfShards(numShards).numberOfReplicas(replicas));
}
Metadata metadata = metaBuilder.build();
RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
for (int i = 0; i < indices; i++) {
routingTableBuilder.addAsNew(metadata.index("INDEX_" + i));
}
RoutingTable initialRoutingTable = routingTableBuilder.build();
ClusterState clusterState = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(initialRoutingTable).build();
int numIters = scaledRandomIntBetween(5, 15);
int nodeIdCounter = 0;
int atMostNodes = scaledRandomIntBetween(Math.max(1, maxNumReplicas), 15);
final boolean frequentNodes = randomBoolean();
for (int i = 0; i < numIters; i++) {
logger.info("Start iteration [{}]", i);
ClusterState.Builder stateBuilder = ClusterState.builder(clusterState);
DiscoveryNodes.Builder newNodesBuilder = DiscoveryNodes.builder(clusterState.nodes());
if (clusterState.nodes().getSize() <= atMostNodes && (nodeIdCounter == 0 || (frequentNodes ? frequently() : rarely()))) {
int numNodes = scaledRandomIntBetween(1, 3);
for (int j = 0; j < numNodes; j++) {
logger.info("adding node [{}]", nodeIdCounter);
newNodesBuilder.add(newNode("NODE_" + (nodeIdCounter++)));
}
}
boolean nodesRemoved = false;
if (nodeIdCounter > 1 && rarely()) {
int nodeId = scaledRandomIntBetween(0, nodeIdCounter - 2);
final String node = "NODE_" + nodeId;
boolean safeToRemove = true;
RoutingNode routingNode = clusterState.getRoutingNodes().node(node);
for (ShardRouting shard : routingNode != null ? routingNode : Collections.<ShardRouting>emptyList()) {
if (shard.active() && shard.primary()) {
// make sure there is an active replica to prevent from going red
if (clusterState.routingTable().shardRoutingTable(shard.shardId()).activeShards().size() <= 1) {
safeToRemove = false;
break;
}
}
}
if (safeToRemove) {
logger.info("removing node [{}]", nodeId);
newNodesBuilder.remove(node);
nodesRemoved = true;
} else {
logger.debug("not removing node [{}] as it holds a primary with no replacement", nodeId);
}
}
stateBuilder.nodes(newNodesBuilder.build());
clusterState = stateBuilder.build();
if (nodesRemoved) {
clusterState = strategy.disassociateDeadNodes(clusterState, true, "reroute");
} else {
clusterState = strategy.reroute(clusterState, "reroute");
}
if (clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size() > 0) {
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
}
}
logger.info("Fill up nodes such that every shard can be allocated");
if (clusterState.nodes().getSize() < maxNumReplicas) {
ClusterState.Builder stateBuilder = ClusterState.builder(clusterState);
DiscoveryNodes.Builder newNodesBuilder = DiscoveryNodes.builder(clusterState.nodes());
for (int j = 0; j < (maxNumReplicas - clusterState.nodes().getSize()); j++) {
logger.info("adding node [{}]", nodeIdCounter);
newNodesBuilder.add(newNode("NODE_" + (nodeIdCounter++)));
}
stateBuilder.nodes(newNodesBuilder.build());
clusterState = stateBuilder.build();
}
randomAllocationDecider.alwaysSayYes = true;
logger.info("now say YES to everything");
int iterations = 0;
do {
iterations++;
clusterState = strategy.reroute(clusterState, "reroute");
if (clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size() > 0) {
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
}
} while (clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size() != 0 || clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size() != 0 && iterations < 200);
logger.info("Done Balancing after [{}] iterations. State:\n{}", iterations, clusterState);
// we stop after 200 iterations if it didn't stabelize by then something is likely to be wrong
assertThat("max num iteration exceeded", iterations, Matchers.lessThan(200));
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(0));
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(0));
int shards = clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size();
assertThat(shards, equalTo(totalNumShards));
final int numNodes = clusterState.nodes().getSize();
final int upperBound = (int) Math.round(((shards / numNodes) * 1.10));
final int lowerBound = (int) Math.round(((shards / numNodes) * 0.90));
for (int i = 0; i < nodeIdCounter; i++) {
if (clusterState.getRoutingNodes().node("NODE_" + i) == null) {
continue;
}
assertThat(clusterState.getRoutingNodes().node("NODE_" + i).size(), Matchers.anyOf(Matchers.anyOf(equalTo((shards / numNodes) + 1), equalTo((shards / numNodes) - 1), equalTo((shards / numNodes))), Matchers.allOf(Matchers.greaterThanOrEqualTo(lowerBound), Matchers.lessThanOrEqualTo(upperBound))));
}
}
use of org.opensearch.cluster.routing.allocation.decider.ReplicaAfterPrimaryActiveAllocationDecider in project OpenSearch by opensearch-project.
the class NodeVersionAllocationDeciderTests method testRestoreDoesNotAllocateSnapshotOnOlderNodes.
public void testRestoreDoesNotAllocateSnapshotOnOlderNodes() {
final DiscoveryNode newNode = new DiscoveryNode("newNode", buildNewFakeTransportAddress(), emptyMap(), MASTER_DATA_ROLES, Version.CURRENT);
final DiscoveryNode oldNode1 = new DiscoveryNode("oldNode1", buildNewFakeTransportAddress(), emptyMap(), MASTER_DATA_ROLES, VersionUtils.getPreviousVersion());
final DiscoveryNode oldNode2 = new DiscoveryNode("oldNode2", buildNewFakeTransportAddress(), emptyMap(), MASTER_DATA_ROLES, VersionUtils.getPreviousVersion());
final Snapshot snapshot = new Snapshot("rep1", new SnapshotId("snp1", UUIDs.randomBase64UUID()));
final IndexId indexId = new IndexId("test", UUIDs.randomBase64UUID(random()));
final int numberOfShards = randomIntBetween(1, 3);
final IndexMetadata.Builder indexMetadata = IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(numberOfShards).numberOfReplicas(randomIntBetween(0, 3));
for (int i = 0; i < numberOfShards; i++) {
indexMetadata.putInSyncAllocationIds(i, Collections.singleton("_test_"));
}
Metadata metadata = Metadata.builder().put(indexMetadata).build();
final ImmutableOpenMap.Builder<InternalSnapshotsInfoService.SnapshotShard, Long> snapshotShardSizes = ImmutableOpenMap.builder(numberOfShards);
final Index index = metadata.index("test").getIndex();
for (int i = 0; i < numberOfShards; i++) {
final ShardId shardId = new ShardId(index, i);
snapshotShardSizes.put(new InternalSnapshotsInfoService.SnapshotShard(snapshot, indexId, shardId), randomNonNegativeLong());
}
ClusterState state = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(RoutingTable.builder().addAsRestore(metadata.index("test"), new SnapshotRecoverySource(UUIDs.randomBase64UUID(), snapshot, Version.CURRENT, indexId)).build()).nodes(DiscoveryNodes.builder().add(newNode).add(oldNode1).add(oldNode2)).build();
AllocationDeciders allocationDeciders = new AllocationDeciders(Arrays.asList(new ReplicaAfterPrimaryActiveAllocationDecider(), new NodeVersionAllocationDecider()));
AllocationService strategy = new MockAllocationService(allocationDeciders, new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE, () -> new SnapshotShardSizeInfo(snapshotShardSizes.build()));
state = strategy.reroute(state, new AllocationCommands(), true, false).getClusterState();
// Make sure that primary shards are only allocated on the new node
for (int i = 0; i < numberOfShards; i++) {
assertEquals("newNode", state.routingTable().index("test").getShards().get(i).primaryShard().currentNodeId());
}
}
use of org.opensearch.cluster.routing.allocation.decider.ReplicaAfterPrimaryActiveAllocationDecider in project OpenSearch by opensearch-project.
the class GatewayServiceTests method createService.
private GatewayService createService(final Settings.Builder settings) {
final ClusterService clusterService = new ClusterService(Settings.builder().put("cluster.name", "GatewayServiceTests").build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null);
final AllocationService allocationService = new AllocationService(new AllocationDeciders(new HashSet<>(Arrays.asList(new SameShardAllocationDecider(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), new ReplicaAfterPrimaryActiveAllocationDecider()))), new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE, EmptySnapshotsInfoService.INSTANCE);
return new GatewayService(settings.build(), allocationService, clusterService, null, null, null);
}
Aggregations