use of org.opensearch.cluster.routing.allocation.decider.SameShardAllocationDecider in project OpenSearch by opensearch-project.
the class ClusterModule method createAllocationDeciders.
// TODO: this is public so allocation benchmark can access the default deciders...can we do that in another way?
/**
* Return a new {@link AllocationDecider} instance with builtin deciders as well as those from plugins.
*/
public static Collection<AllocationDecider> createAllocationDeciders(Settings settings, ClusterSettings clusterSettings, List<ClusterPlugin> clusterPlugins) {
// collect deciders by class so that we can detect duplicates
Map<Class, AllocationDecider> deciders = new LinkedHashMap<>();
addAllocationDecider(deciders, new MaxRetryAllocationDecider());
addAllocationDecider(deciders, new ResizeAllocationDecider());
addAllocationDecider(deciders, new ReplicaAfterPrimaryActiveAllocationDecider());
addAllocationDecider(deciders, new RebalanceOnlyWhenActiveAllocationDecider());
addAllocationDecider(deciders, new ClusterRebalanceAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new ConcurrentRebalanceAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new ConcurrentRecoveriesAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new EnableAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new NodeVersionAllocationDecider());
addAllocationDecider(deciders, new SnapshotInProgressAllocationDecider());
addAllocationDecider(deciders, new RestoreInProgressAllocationDecider());
addAllocationDecider(deciders, new FilterAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new SameShardAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new DiskThresholdDecider(settings, clusterSettings));
addAllocationDecider(deciders, new ThrottlingAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new ShardsLimitAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new AwarenessAllocationDecider(settings, clusterSettings));
addAllocationDecider(deciders, new NodeLoadAwareAllocationDecider(settings, clusterSettings));
clusterPlugins.stream().flatMap(p -> p.createAllocationDeciders(settings, clusterSettings).stream()).forEach(d -> addAllocationDecider(deciders, d));
return deciders.values();
}
use of org.opensearch.cluster.routing.allocation.decider.SameShardAllocationDecider in project OpenSearch by opensearch-project.
the class SameShardRoutingTests method testForceAllocatePrimaryOnSameNodeNotAllowed.
public void testForceAllocatePrimaryOnSameNodeNotAllowed() {
SameShardAllocationDecider decider = new SameShardAllocationDecider(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS));
ClusterState clusterState = ClusterStateCreationUtils.state("idx", randomIntBetween(2, 4), 1);
Index index = clusterState.getMetadata().index("idx").getIndex();
ShardRouting primaryShard = clusterState.routingTable().index(index).shard(0).primaryShard();
RoutingNode routingNode = clusterState.getRoutingNodes().node(primaryShard.currentNodeId());
RoutingAllocation routingAllocation = new RoutingAllocation(new AllocationDeciders(Collections.emptyList()), new RoutingNodes(clusterState, false), clusterState, ClusterInfo.EMPTY, SnapshotShardSizeInfo.EMPTY, System.nanoTime());
// can't force allocate same shard copy to the same node
ShardRouting newPrimary = TestShardRouting.newShardRouting(primaryShard.shardId(), null, true, ShardRoutingState.UNASSIGNED);
Decision decision = decider.canForceAllocatePrimary(newPrimary, routingNode, routingAllocation);
assertEquals(Decision.Type.NO, decision.type());
// can force allocate to a different node
RoutingNode unassignedNode = null;
for (RoutingNode node : clusterState.getRoutingNodes()) {
if (node.isEmpty()) {
unassignedNode = node;
break;
}
}
decision = decider.canForceAllocatePrimary(newPrimary, unassignedNode, routingAllocation);
assertEquals(Decision.Type.YES, decision.type());
}
use of org.opensearch.cluster.routing.allocation.decider.SameShardAllocationDecider in project OpenSearch by opensearch-project.
the class AllocationServiceTests method testAssignsPrimariesInPriorityOrderThenReplicas.
public void testAssignsPrimariesInPriorityOrderThenReplicas() {
// throttle (incoming) recoveries in order to observe the order of operations, but do not throttle outgoing recoveries since
// the effects of that depend on the earlier (random) allocations
final Settings settings = Settings.builder().put(CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_PRIMARIES_RECOVERIES_SETTING.getKey(), 1).put(CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_REPLICAS_RECOVERIES_SETTING.getKey(), 1).put(CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_INCOMING_RECOVERIES_SETTING.getKey(), 1).put(CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_OUTGOING_RECOVERIES_SETTING.getKey(), Integer.MAX_VALUE).build();
final ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
final AllocationService allocationService = new AllocationService(new AllocationDeciders(Arrays.asList(new SameShardAllocationDecider(settings, clusterSettings), new ThrottlingAllocationDecider(settings, clusterSettings))), new ShardsAllocator() {
@Override
public void allocate(RoutingAllocation allocation) {
// all primaries are handled by existing shards allocators in these tests; even the invalid allocator prevents shards
// from falling through to here
assertThat(allocation.routingNodes().unassigned().getNumPrimaries(), equalTo(0));
}
@Override
public ShardAllocationDecision decideShardAllocation(ShardRouting shard, RoutingAllocation allocation) {
return ShardAllocationDecision.NOT_TAKEN;
}
}, new EmptyClusterInfoService(), EmptySnapshotsInfoService.INSTANCE);
final String unrealisticAllocatorName = "unrealistic";
final Map<String, ExistingShardsAllocator> allocatorMap = new HashMap<>();
final TestGatewayAllocator testGatewayAllocator = new TestGatewayAllocator();
allocatorMap.put(GatewayAllocator.ALLOCATOR_NAME, testGatewayAllocator);
allocatorMap.put(unrealisticAllocatorName, new UnrealisticAllocator());
allocationService.setExistingShardsAllocators(allocatorMap);
final DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder();
nodesBuilder.add(new DiscoveryNode("node1", buildNewFakeTransportAddress(), Version.CURRENT));
nodesBuilder.add(new DiscoveryNode("node2", buildNewFakeTransportAddress(), Version.CURRENT));
nodesBuilder.add(new DiscoveryNode("node3", buildNewFakeTransportAddress(), Version.CURRENT));
final Metadata.Builder metadata = Metadata.builder().put(indexMetadata("highPriority", Settings.builder().put(IndexMetadata.SETTING_PRIORITY, 10))).put(indexMetadata("mediumPriority", Settings.builder().put(IndexMetadata.SETTING_PRIORITY, 5).put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_SETTING.getKey(), unrealisticAllocatorName))).put(indexMetadata("lowPriority", Settings.builder().put(IndexMetadata.SETTING_PRIORITY, 3))).put(indexMetadata("invalid", Settings.builder().put(IndexMetadata.SETTING_PRIORITY, between(0, 15)).put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_SETTING.getKey(), "unknown")));
final RoutingTable.Builder routingTableBuilder = RoutingTable.builder().addAsRecovery(metadata.get("highPriority")).addAsRecovery(metadata.get("mediumPriority")).addAsRecovery(metadata.get("lowPriority")).addAsRecovery(metadata.get("invalid"));
final ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT).nodes(nodesBuilder).metadata(metadata).routingTable(routingTableBuilder.build()).build();
// permit the testGatewayAllocator to allocate primaries to every node
for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) {
for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
final ShardRouting primaryShard = indexShardRoutingTable.primaryShard();
for (DiscoveryNode node : clusterState.nodes()) {
testGatewayAllocator.addKnownAllocation(primaryShard.initialize(node.getId(), FAKE_IN_SYNC_ALLOCATION_ID, 0L));
}
}
}
final ClusterState reroutedState1 = rerouteAndStartShards(allocationService, clusterState);
final RoutingTable routingTable1 = reroutedState1.routingTable();
// the test harness only permits one recovery per node, so we must have allocated all the high-priority primaries and one of the
// medium-priority ones
assertThat(routingTable1.shardsWithState(ShardRoutingState.INITIALIZING), empty());
assertThat(routingTable1.shardsWithState(ShardRoutingState.RELOCATING), empty());
assertTrue(routingTable1.shardsWithState(ShardRoutingState.STARTED).stream().allMatch(ShardRouting::primary));
assertThat(routingTable1.index("highPriority").primaryShardsActive(), equalTo(2));
assertThat(routingTable1.index("mediumPriority").primaryShardsActive(), equalTo(1));
assertThat(routingTable1.index("lowPriority").shardsWithState(ShardRoutingState.STARTED), empty());
assertThat(routingTable1.index("invalid").shardsWithState(ShardRoutingState.STARTED), empty());
final ClusterState reroutedState2 = rerouteAndStartShards(allocationService, reroutedState1);
final RoutingTable routingTable2 = reroutedState2.routingTable();
// this reroute starts the one remaining medium-priority primary and both of the low-priority ones,
// and also 1 medium priority replica
assertThat(routingTable2.shardsWithState(ShardRoutingState.INITIALIZING), empty());
assertThat(routingTable2.shardsWithState(ShardRoutingState.RELOCATING), empty());
assertTrue(routingTable2.index("highPriority").allPrimaryShardsActive());
assertTrue(routingTable2.index("mediumPriority").allPrimaryShardsActive());
assertThat(routingTable2.index("mediumPriority").shardsWithState(ShardRoutingState.STARTED).size(), equalTo(3));
assertThat(routingTable2.index("mediumPriority").shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(1));
assertTrue(routingTable2.index("lowPriority").allPrimaryShardsActive());
assertThat(routingTable2.index("invalid").shardsWithState(ShardRoutingState.STARTED), empty());
final ClusterState reroutedState3 = rerouteAndStartShards(allocationService, reroutedState2);
final RoutingTable routingTable3 = reroutedState3.routingTable();
// this reroute starts the one remaining medium-priority replica
assertThat(routingTable3.shardsWithState(ShardRoutingState.INITIALIZING), empty());
assertThat(routingTable3.shardsWithState(ShardRoutingState.RELOCATING), empty());
assertTrue(routingTable3.index("highPriority").allPrimaryShardsActive());
assertTrue(routingTable3.index("mediumPriority").allPrimaryShardsActive());
assertThat(routingTable3.index("mediumPriority").shardsWithState(ShardRoutingState.UNASSIGNED), empty());
assertThat(routingTable3.index("mediumPriority").shardsWithState(ShardRoutingState.STARTED).size(), equalTo(4));
assertTrue(routingTable3.index("lowPriority").allPrimaryShardsActive());
assertThat(routingTable3.index("invalid").shardsWithState(ShardRoutingState.STARTED), empty());
}
use of org.opensearch.cluster.routing.allocation.decider.SameShardAllocationDecider in project OpenSearch by opensearch-project.
the class RandomAllocationDeciderTests method testRandomDecisions.
/* This test will make random allocation decision on a growing and shrinking
* cluster leading to a random distribution of the shards. After a certain
* amount of iterations the test allows allocation unless the same shard is
* already allocated on a node and balances the cluster to gain optimal
* balance.*/
public void testRandomDecisions() {
RandomAllocationDecider randomAllocationDecider = new RandomAllocationDecider(random());
AllocationService strategy = new AllocationService(new AllocationDeciders(new HashSet<>(Arrays.asList(new SameShardAllocationDecider(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), new ReplicaAfterPrimaryActiveAllocationDecider(), randomAllocationDecider))), new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE, EmptySnapshotsInfoService.INSTANCE);
int indices = scaledRandomIntBetween(1, 20);
Builder metaBuilder = Metadata.builder();
int maxNumReplicas = 1;
int totalNumShards = 0;
for (int i = 0; i < indices; i++) {
int replicas = scaledRandomIntBetween(0, 6);
maxNumReplicas = Math.max(maxNumReplicas, replicas + 1);
int numShards = scaledRandomIntBetween(1, 20);
totalNumShards += numShards * (replicas + 1);
metaBuilder.put(IndexMetadata.builder("INDEX_" + i).settings(settings(Version.CURRENT)).numberOfShards(numShards).numberOfReplicas(replicas));
}
Metadata metadata = metaBuilder.build();
RoutingTable.Builder routingTableBuilder = RoutingTable.builder();
for (int i = 0; i < indices; i++) {
routingTableBuilder.addAsNew(metadata.index("INDEX_" + i));
}
RoutingTable initialRoutingTable = routingTableBuilder.build();
ClusterState clusterState = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata).routingTable(initialRoutingTable).build();
int numIters = scaledRandomIntBetween(5, 15);
int nodeIdCounter = 0;
int atMostNodes = scaledRandomIntBetween(Math.max(1, maxNumReplicas), 15);
final boolean frequentNodes = randomBoolean();
for (int i = 0; i < numIters; i++) {
logger.info("Start iteration [{}]", i);
ClusterState.Builder stateBuilder = ClusterState.builder(clusterState);
DiscoveryNodes.Builder newNodesBuilder = DiscoveryNodes.builder(clusterState.nodes());
if (clusterState.nodes().getSize() <= atMostNodes && (nodeIdCounter == 0 || (frequentNodes ? frequently() : rarely()))) {
int numNodes = scaledRandomIntBetween(1, 3);
for (int j = 0; j < numNodes; j++) {
logger.info("adding node [{}]", nodeIdCounter);
newNodesBuilder.add(newNode("NODE_" + (nodeIdCounter++)));
}
}
boolean nodesRemoved = false;
if (nodeIdCounter > 1 && rarely()) {
int nodeId = scaledRandomIntBetween(0, nodeIdCounter - 2);
final String node = "NODE_" + nodeId;
boolean safeToRemove = true;
RoutingNode routingNode = clusterState.getRoutingNodes().node(node);
for (ShardRouting shard : routingNode != null ? routingNode : Collections.<ShardRouting>emptyList()) {
if (shard.active() && shard.primary()) {
// make sure there is an active replica to prevent from going red
if (clusterState.routingTable().shardRoutingTable(shard.shardId()).activeShards().size() <= 1) {
safeToRemove = false;
break;
}
}
}
if (safeToRemove) {
logger.info("removing node [{}]", nodeId);
newNodesBuilder.remove(node);
nodesRemoved = true;
} else {
logger.debug("not removing node [{}] as it holds a primary with no replacement", nodeId);
}
}
stateBuilder.nodes(newNodesBuilder.build());
clusterState = stateBuilder.build();
if (nodesRemoved) {
clusterState = strategy.disassociateDeadNodes(clusterState, true, "reroute");
} else {
clusterState = strategy.reroute(clusterState, "reroute");
}
if (clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size() > 0) {
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
}
}
logger.info("Fill up nodes such that every shard can be allocated");
if (clusterState.nodes().getSize() < maxNumReplicas) {
ClusterState.Builder stateBuilder = ClusterState.builder(clusterState);
DiscoveryNodes.Builder newNodesBuilder = DiscoveryNodes.builder(clusterState.nodes());
for (int j = 0; j < (maxNumReplicas - clusterState.nodes().getSize()); j++) {
logger.info("adding node [{}]", nodeIdCounter);
newNodesBuilder.add(newNode("NODE_" + (nodeIdCounter++)));
}
stateBuilder.nodes(newNodesBuilder.build());
clusterState = stateBuilder.build();
}
randomAllocationDecider.alwaysSayYes = true;
logger.info("now say YES to everything");
int iterations = 0;
do {
iterations++;
clusterState = strategy.reroute(clusterState, "reroute");
if (clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size() > 0) {
clusterState = startInitializingShardsAndReroute(strategy, clusterState);
}
} while (clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size() != 0 || clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size() != 0 && iterations < 200);
logger.info("Done Balancing after [{}] iterations. State:\n{}", iterations, clusterState);
// we stop after 200 iterations if it didn't stabelize by then something is likely to be wrong
assertThat("max num iteration exceeded", iterations, Matchers.lessThan(200));
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(0));
assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(0));
int shards = clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size();
assertThat(shards, equalTo(totalNumShards));
final int numNodes = clusterState.nodes().getSize();
final int upperBound = (int) Math.round(((shards / numNodes) * 1.10));
final int lowerBound = (int) Math.round(((shards / numNodes) * 0.90));
for (int i = 0; i < nodeIdCounter; i++) {
if (clusterState.getRoutingNodes().node("NODE_" + i) == null) {
continue;
}
assertThat(clusterState.getRoutingNodes().node("NODE_" + i).size(), Matchers.anyOf(Matchers.anyOf(equalTo((shards / numNodes) + 1), equalTo((shards / numNodes) - 1), equalTo((shards / numNodes))), Matchers.allOf(Matchers.greaterThanOrEqualTo(lowerBound), Matchers.lessThanOrEqualTo(upperBound))));
}
}
use of org.opensearch.cluster.routing.allocation.decider.SameShardAllocationDecider in project OpenSearch by opensearch-project.
the class ReplicaShardAllocatorTests method testThrottleWhenAllocatingToMatchingNode.
/**
* Tests when the node to allocate to due to matching is being throttled, we move the shard to ignored
* to wait till throttling on it is done.
*/
public void testThrottleWhenAllocatingToMatchingNode() {
RoutingAllocation allocation = onePrimaryOnNode1And1Replica(new AllocationDeciders(Arrays.asList(new TestAllocateDecision(Decision.YES), new SameShardAllocationDecider(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), new AllocationDecider() {
@Override
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
if (node.node().equals(node2)) {
return Decision.THROTTLE;
}
return Decision.YES;
}
})));
testAllocator.addData(node1, "MATCH", new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)).addData(node2, "MATCH", new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
allocateAllUnassigned(allocation);
assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1));
assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId));
}
Aggregations