use of org.opensearch.cluster.routing.RerouteService in project OpenSearch by opensearch-project.
the class JoinTaskExecutorTests method testUpdatesNodeWithNewRoles.
public void testUpdatesNodeWithNewRoles() throws Exception {
// Node roles vary by version, and new roles are suppressed for BWC. This means we can receive a join from a node that's already
// in the cluster but with a different set of roles: the node didn't change roles, but the cluster state came via an older master.
// In this case we must properly process its join to ensure that the roles are correct.
final AllocationService allocationService = mock(AllocationService.class);
when(allocationService.adaptAutoExpandReplicas(any())).then(invocationOnMock -> invocationOnMock.getArguments()[0]);
final RerouteService rerouteService = (reason, priority, listener) -> listener.onResponse(null);
final JoinTaskExecutor joinTaskExecutor = new JoinTaskExecutor(Settings.EMPTY, allocationService, logger, rerouteService, null);
final DiscoveryNode masterNode = new DiscoveryNode(UUIDs.base64UUID(), buildNewFakeTransportAddress(), Version.CURRENT);
final DiscoveryNode actualNode = new DiscoveryNode(UUIDs.base64UUID(), buildNewFakeTransportAddress(), Version.CURRENT);
final DiscoveryNode bwcNode = new DiscoveryNode(actualNode.getName(), actualNode.getId(), actualNode.getEphemeralId(), actualNode.getHostName(), actualNode.getHostAddress(), actualNode.getAddress(), actualNode.getAttributes(), new HashSet<>(randomSubsetOf(actualNode.getRoles())), actualNode.getVersion());
final ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT).nodes(DiscoveryNodes.builder().add(masterNode).localNodeId(masterNode.getId()).masterNodeId(masterNode.getId()).add(bwcNode)).build();
final ClusterStateTaskExecutor.ClusterTasksResult<JoinTaskExecutor.Task> result = joinTaskExecutor.execute(clusterState, List.of(new JoinTaskExecutor.Task(actualNode, "test")));
assertThat(result.executionResults.entrySet(), hasSize(1));
final ClusterStateTaskExecutor.TaskResult taskResult = result.executionResults.values().iterator().next();
assertTrue(taskResult.isSuccess());
assertThat(result.resultingState.getNodes().get(actualNode.getId()).getRoles(), equalTo(actualNode.getRoles()));
}
use of org.opensearch.cluster.routing.RerouteService in project OpenSearch by opensearch-project.
the class JoinTaskExecutorTests method testUpdatesNodeWithOpenSearchVersionForExistingAndNewNodes.
public void testUpdatesNodeWithOpenSearchVersionForExistingAndNewNodes() throws Exception {
// During the upgrade from Elasticsearch, OpenSearch node send their version as 7.10.2 to Elasticsearch master
// in order to successfully join the cluster. But as soon as OpenSearch node becomes the master, cluster state
// should show the OpenSearch nodes version as 1.x. As the cluster state was carry forwarded from ES master,
// version in DiscoveryNode is stale 7.10.2.
final AllocationService allocationService = mock(AllocationService.class);
when(allocationService.adaptAutoExpandReplicas(any())).then(invocationOnMock -> invocationOnMock.getArguments()[0]);
when(allocationService.disassociateDeadNodes(any(), anyBoolean(), any())).then(invocationOnMock -> invocationOnMock.getArguments()[0]);
final RerouteService rerouteService = (reason, priority, listener) -> listener.onResponse(null);
Map<String, Version> channelVersions = new HashMap<>();
// OpenSearch node running BWC version
String node_1 = UUIDs.base64UUID();
// OpenSearch node running BWC version
String node_2 = UUIDs.base64UUID();
// OpenSearch node running BWC version, sending new join request and no active channel
String node_3 = UUIDs.base64UUID();
// ES node 7.10.2
String node_4 = UUIDs.base64UUID();
// ES node 7.10.2 in cluster but missing channel version
String node_5 = UUIDs.base64UUID();
// ES node 7.9.0
String node_6 = UUIDs.base64UUID();
// ES node 7.9.0 in cluster but missing channel version
String node_7 = UUIDs.base64UUID();
channelVersions.put(node_1, Version.CURRENT);
channelVersions.put(node_2, Version.CURRENT);
channelVersions.put(node_4, LegacyESVersion.V_7_10_2);
channelVersions.put(node_6, LegacyESVersion.V_7_10_0);
final TransportService transportService = mock(TransportService.class);
when(transportService.getChannelVersion(any())).thenReturn(channelVersions);
DiscoveryNodes.Builder nodes = new DiscoveryNodes.Builder().localNodeId(node_1);
nodes.add(new DiscoveryNode(node_1, buildNewFakeTransportAddress(), LegacyESVersion.V_7_10_2));
nodes.add(new DiscoveryNode(node_2, buildNewFakeTransportAddress(), LegacyESVersion.V_7_10_2));
nodes.add(new DiscoveryNode(node_3, buildNewFakeTransportAddress(), LegacyESVersion.V_7_10_2));
nodes.add(new DiscoveryNode(node_4, buildNewFakeTransportAddress(), LegacyESVersion.V_7_10_2));
nodes.add(new DiscoveryNode(node_5, buildNewFakeTransportAddress(), LegacyESVersion.V_7_10_2));
nodes.add(new DiscoveryNode(node_6, buildNewFakeTransportAddress(), LegacyESVersion.V_7_10_1));
nodes.add(new DiscoveryNode(node_7, buildNewFakeTransportAddress(), LegacyESVersion.V_7_10_0));
final ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT).nodes(nodes).build();
final JoinTaskExecutor joinTaskExecutor = new JoinTaskExecutor(Settings.EMPTY, allocationService, logger, rerouteService, transportService);
final DiscoveryNode existing_node_3 = clusterState.nodes().get(node_3);
final DiscoveryNode node_3_new_join = new DiscoveryNode(existing_node_3.getName(), existing_node_3.getId(), existing_node_3.getEphemeralId(), existing_node_3.getHostName(), existing_node_3.getHostAddress(), existing_node_3.getAddress(), existing_node_3.getAttributes(), existing_node_3.getRoles(), Version.CURRENT);
final ClusterStateTaskExecutor.ClusterTasksResult<JoinTaskExecutor.Task> result = joinTaskExecutor.execute(clusterState, List.of(new JoinTaskExecutor.Task(node_3_new_join, "test"), JoinTaskExecutor.newBecomeMasterTask(), JoinTaskExecutor.newFinishElectionTask()));
final ClusterStateTaskExecutor.TaskResult taskResult = result.executionResults.values().iterator().next();
assertTrue(taskResult.isSuccess());
DiscoveryNodes resultNodes = result.resultingState.getNodes();
assertEquals(Version.CURRENT, resultNodes.get(node_1).getVersion());
assertEquals(Version.CURRENT, resultNodes.get(node_2).getVersion());
// 7.10.2 in old state but sent new join and processed
assertEquals(Version.CURRENT, resultNodes.get(node_3).getVersion());
assertEquals(LegacyESVersion.V_7_10_2, resultNodes.get(node_4).getVersion());
// 7.10.2 node without active channel will be removed and should rejoin
assertFalse(resultNodes.nodeExists(node_5));
assertEquals(LegacyESVersion.V_7_10_0, resultNodes.get(node_6).getVersion());
// 7.9.0 node without active channel but shouldn't get removed
assertEquals(LegacyESVersion.V_7_10_0, resultNodes.get(node_7).getVersion());
}
use of org.opensearch.cluster.routing.RerouteService in project OpenSearch by opensearch-project.
the class InternalSnapshotsInfoServiceTests method testSnapshotShardSizes.
public void testSnapshotShardSizes() throws Exception {
final int maxConcurrentFetches = randomIntBetween(1, 10);
final int numberOfShards = randomIntBetween(1, 50);
final CountDownLatch rerouteLatch = new CountDownLatch(numberOfShards);
final RerouteService rerouteService = (reason, priority, listener) -> {
listener.onResponse(clusterService.state());
assertThat(rerouteLatch.getCount(), greaterThanOrEqualTo(0L));
rerouteLatch.countDown();
};
final InternalSnapshotsInfoService snapshotsInfoService = new InternalSnapshotsInfoService(Settings.builder().put(INTERNAL_SNAPSHOT_INFO_MAX_CONCURRENT_FETCHES_SETTING.getKey(), maxConcurrentFetches).build(), clusterService, () -> repositoriesService, () -> rerouteService);
final String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);
final long[] expectedShardSizes = new long[numberOfShards];
for (int i = 0; i < expectedShardSizes.length; i++) {
expectedShardSizes[i] = randomNonNegativeLong();
}
final AtomicInteger getShardSnapshotStatusCount = new AtomicInteger(0);
final CountDownLatch latch = new CountDownLatch(1);
final Repository mockRepository = new FilterRepository(mock(Repository.class)) {
@Override
public IndexShardSnapshotStatus getShardSnapshotStatus(SnapshotId snapshotId, IndexId indexId, ShardId shardId) {
try {
assertThat(indexId.getName(), equalTo(indexName));
assertThat(shardId.id(), allOf(greaterThanOrEqualTo(0), lessThan(numberOfShards)));
latch.await();
getShardSnapshotStatusCount.incrementAndGet();
return IndexShardSnapshotStatus.newDone(0L, 0L, 0, 0, 0L, expectedShardSizes[shardId.id()], null);
} catch (InterruptedException e) {
throw new AssertionError(e);
}
}
};
when(repositoriesService.repository("_repo")).thenReturn(mockRepository);
applyClusterState("add-unassigned-shards", clusterState -> addUnassignedShards(clusterState, indexName, numberOfShards));
waitForMaxActiveGenericThreads(Math.min(numberOfShards, maxConcurrentFetches));
if (randomBoolean()) {
applyClusterState("reapply-last-cluster-state-to-check-deduplication-works", state -> ClusterState.builder(state).incrementVersion().build());
}
assertThat(snapshotsInfoService.numberOfUnknownSnapshotShardSizes(), equalTo(numberOfShards));
assertThat(snapshotsInfoService.numberOfKnownSnapshotShardSizes(), equalTo(0));
latch.countDown();
assertTrue(rerouteLatch.await(30L, TimeUnit.SECONDS));
assertThat(snapshotsInfoService.numberOfKnownSnapshotShardSizes(), equalTo(numberOfShards));
assertThat(snapshotsInfoService.numberOfUnknownSnapshotShardSizes(), equalTo(0));
assertThat(snapshotsInfoService.numberOfFailedSnapshotShardSizes(), equalTo(0));
assertThat(getShardSnapshotStatusCount.get(), equalTo(numberOfShards));
final SnapshotShardSizeInfo snapshotShardSizeInfo = snapshotsInfoService.snapshotShardSizes();
for (int i = 0; i < numberOfShards; i++) {
final ShardRouting shardRouting = clusterService.state().routingTable().index(indexName).shard(i).primaryShard();
assertThat(snapshotShardSizeInfo.getShardSize(shardRouting), equalTo(expectedShardSizes[i]));
assertThat(snapshotShardSizeInfo.getShardSize(shardRouting, Long.MIN_VALUE), equalTo(expectedShardSizes[i]));
}
}
use of org.opensearch.cluster.routing.RerouteService in project OpenSearch by opensearch-project.
the class InternalSnapshotsInfoServiceTests method testErroneousSnapshotShardSizes.
public void testErroneousSnapshotShardSizes() throws Exception {
final AtomicInteger reroutes = new AtomicInteger();
final RerouteService rerouteService = (reason, priority, listener) -> {
reroutes.incrementAndGet();
listener.onResponse(clusterService.state());
};
final InternalSnapshotsInfoService snapshotsInfoService = new InternalSnapshotsInfoService(Settings.builder().put(INTERNAL_SNAPSHOT_INFO_MAX_CONCURRENT_FETCHES_SETTING.getKey(), randomIntBetween(1, 10)).build(), clusterService, () -> repositoriesService, () -> rerouteService);
final Map<InternalSnapshotsInfoService.SnapshotShard, Long> results = new ConcurrentHashMap<>();
final Repository mockRepository = new FilterRepository(mock(Repository.class)) {
@Override
public IndexShardSnapshotStatus getShardSnapshotStatus(SnapshotId snapshotId, IndexId indexId, ShardId shardId) {
final InternalSnapshotsInfoService.SnapshotShard snapshotShard = new InternalSnapshotsInfoService.SnapshotShard(new Snapshot("_repo", snapshotId), indexId, shardId);
if (randomBoolean()) {
results.put(snapshotShard, Long.MIN_VALUE);
throw new SnapshotException(snapshotShard.snapshot(), "simulated");
} else {
final long shardSize = randomNonNegativeLong();
results.put(snapshotShard, shardSize);
return IndexShardSnapshotStatus.newDone(0L, 0L, 0, 0, 0L, shardSize, null);
}
}
};
when(repositoriesService.repository("_repo")).thenReturn(mockRepository);
final int maxShardsToCreate = scaledRandomIntBetween(10, 500);
final Thread addSnapshotRestoreIndicesThread = new Thread(() -> {
int remainingShards = maxShardsToCreate;
while (remainingShards > 0) {
final String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);
final int numberOfShards = randomIntBetween(1, remainingShards);
try {
applyClusterState("add-more-unassigned-shards-for-" + indexName, clusterState -> addUnassignedShards(clusterState, indexName, numberOfShards));
} catch (Exception e) {
throw new AssertionError(e);
} finally {
remainingShards -= numberOfShards;
}
}
});
addSnapshotRestoreIndicesThread.start();
addSnapshotRestoreIndicesThread.join();
final Predicate<Long> failedSnapshotShardSizeRetrieval = shardSize -> shardSize == Long.MIN_VALUE;
assertBusy(() -> {
assertThat(snapshotsInfoService.numberOfKnownSnapshotShardSizes(), equalTo((int) results.values().stream().filter(size -> failedSnapshotShardSizeRetrieval.test(size) == false).count()));
assertThat(snapshotsInfoService.numberOfFailedSnapshotShardSizes(), equalTo((int) results.values().stream().filter(failedSnapshotShardSizeRetrieval).count()));
assertThat(snapshotsInfoService.numberOfUnknownSnapshotShardSizes(), equalTo(0));
});
final SnapshotShardSizeInfo snapshotShardSizeInfo = snapshotsInfoService.snapshotShardSizes();
for (Map.Entry<InternalSnapshotsInfoService.SnapshotShard, Long> snapshotShard : results.entrySet()) {
final ShardId shardId = snapshotShard.getKey().shardId();
final ShardRouting shardRouting = clusterService.state().routingTable().index(shardId.getIndexName()).shard(shardId.id()).primaryShard();
assertThat(shardRouting, notNullValue());
final boolean success = failedSnapshotShardSizeRetrieval.test(snapshotShard.getValue()) == false;
assertThat(snapshotShardSizeInfo.getShardSize(shardRouting), success ? equalTo(results.get(snapshotShard.getKey())) : equalTo(ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE));
final long defaultValue = randomNonNegativeLong();
assertThat(snapshotShardSizeInfo.getShardSize(shardRouting, defaultValue), success ? equalTo(results.get(snapshotShard.getKey())) : equalTo(defaultValue));
}
assertThat("Expecting all snapshot shard size fetches to provide a size", results.size(), equalTo(maxShardsToCreate));
assertThat("Expecting all snapshot shard size fetches to execute a Reroute", reroutes.get(), equalTo(maxShardsToCreate));
}
Aggregations