use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.
the class MasterDisruptionIT method testIsolateMasterAndVerifyClusterStateConsensus.
/**
* This test isolates the master from rest of the cluster, waits for a new master to be elected, restores the partition
* and verifies that all node agree on the new cluster state
*/
public void testIsolateMasterAndVerifyClusterStateConsensus() throws Exception {
final List<String> nodes = startCluster(3);
assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2)).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, randomInt(2))));
ensureGreen();
String isolatedNode = internalCluster().getMasterName();
TwoPartitions partitions = isolateNode(isolatedNode);
NetworkDisruption networkDisruption = addRandomDisruptionType(partitions);
networkDisruption.startDisrupting();
String nonIsolatedNode = partitions.getMajoritySide().iterator().next();
// make sure cluster reforms
ensureStableCluster(2, nonIsolatedNode);
// make sure isolated need picks up on things.
assertNoMaster(isolatedNode, TimeValue.timeValueSeconds(40));
// restore isolation
networkDisruption.stopDisrupting();
for (String node : nodes) {
ensureStableCluster(3, new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + networkDisruption.expectedTimeToHeal().millis()), true, node);
}
logger.info("issue a reroute");
// trigger a reroute now, instead of waiting for the background reroute of RerouteService
assertAcked(client().admin().cluster().prepareReroute());
// and wait for it to finish and for the cluster to stabilize
ensureGreen("test");
// verify all cluster states are the same
// use assert busy to wait for cluster states to be applied (as publish_timeout has low value)
assertBusy(() -> {
ClusterState state = null;
for (String node : nodes) {
ClusterState nodeState = getNodeClusterState(node);
if (state == null) {
state = nodeState;
continue;
}
// assert nodes are identical
try {
assertEquals("unequal versions", state.version(), nodeState.version());
assertEquals("unequal node count", state.nodes().getSize(), nodeState.nodes().getSize());
assertEquals("different masters ", state.nodes().getMasterNodeId(), nodeState.nodes().getMasterNodeId());
assertEquals("different meta data version", state.metadata().version(), nodeState.metadata().version());
assertEquals("different routing", state.routingTable().toString(), nodeState.routingTable().toString());
} catch (AssertionError t) {
fail("failed comparing cluster state: " + t.getMessage() + "\n" + "--- cluster state of node [" + nodes.get(0) + "]: ---\n" + state + "\n--- cluster state [" + node + "]: ---\n" + nodeState);
}
}
});
}
use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.
the class JoinTaskExecutor method becomeMasterAndTrimConflictingNodes.
protected ClusterState.Builder becomeMasterAndTrimConflictingNodes(ClusterState currentState, List<Task> joiningNodes) {
assert currentState.nodes().getMasterNodeId() == null : currentState;
DiscoveryNodes currentNodes = currentState.nodes();
DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(currentNodes);
nodesBuilder.masterNodeId(currentState.nodes().getLocalNodeId());
for (final Task joinTask : joiningNodes) {
if (joinTask.isBecomeMasterTask()) {
refreshDiscoveryNodeVersionAfterUpgrade(currentNodes, nodesBuilder);
} else if (joinTask.isFinishElectionTask()) {
// no-op
} else {
final DiscoveryNode joiningNode = joinTask.node();
final DiscoveryNode nodeWithSameId = nodesBuilder.get(joiningNode.getId());
if (nodeWithSameId != null && nodeWithSameId.equals(joiningNode) == false) {
logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameId, joiningNode);
nodesBuilder.remove(nodeWithSameId.getId());
}
final DiscoveryNode nodeWithSameAddress = currentNodes.findByAddress(joiningNode.getAddress());
if (nodeWithSameAddress != null && nodeWithSameAddress.equals(joiningNode) == false) {
logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameAddress, joiningNode);
nodesBuilder.remove(nodeWithSameAddress.getId());
}
}
}
// now trim any left over dead nodes - either left there when the previous master stepped down
// or removed by us above
ClusterState tmpState = ClusterState.builder(currentState).nodes(nodesBuilder).blocks(ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(NoMasterBlockService.NO_MASTER_BLOCK_ID)).build();
logger.trace("becomeMasterAndTrimConflictingNodes: {}", tmpState.nodes());
allocationService.cleanCaches();
tmpState = PersistentTasksCustomMetadata.disassociateDeadNodes(tmpState);
return ClusterState.builder(allocationService.disassociateDeadNodes(tmpState, false, "removed dead nodes on election"));
}
use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.
the class ConcurrentSnapshotsIT method testMasterFailOverWithQueuedDeletes.
public void testMasterFailOverWithQueuedDeletes() throws Exception {
internalCluster().startMasterOnlyNodes(3);
final String dataNode = internalCluster().startDataOnlyNode();
final String repoName = "test-repo";
createRepository(repoName, "mock");
final String firstIndex = "index-one";
createIndexWithContent(firstIndex);
final String firstSnapshot = "snapshot-one";
blockDataNode(repoName, dataNode);
final ActionFuture<CreateSnapshotResponse> firstSnapshotResponse = startFullSnapshotFromNonMasterClient(repoName, firstSnapshot);
waitForBlock(dataNode, repoName, TimeValue.timeValueSeconds(30L));
final String dataNode2 = internalCluster().startDataOnlyNode();
ensureStableCluster(5);
final String secondIndex = "index-two";
createIndexWithContent(secondIndex, dataNode2, dataNode);
final String secondSnapshot = "snapshot-two";
final ActionFuture<CreateSnapshotResponse> secondSnapshotResponse = startFullSnapshot(repoName, secondSnapshot);
logger.info("--> wait for snapshot on second data node to finish");
awaitClusterState(state -> {
final SnapshotsInProgress snapshotsInProgress = state.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
return snapshotsInProgress.entries().size() == 2 && snapshotHasCompletedShard(secondSnapshot, snapshotsInProgress);
});
final ActionFuture<AcknowledgedResponse> firstDeleteFuture = startDeleteFromNonMasterClient(repoName, firstSnapshot);
awaitNDeletionsInProgress(1);
blockNodeOnAnyFiles(repoName, dataNode2);
final ActionFuture<CreateSnapshotResponse> snapshotThreeFuture = startFullSnapshotFromNonMasterClient(repoName, "snapshot-three");
waitForBlock(dataNode2, repoName, TimeValue.timeValueSeconds(30L));
assertThat(firstSnapshotResponse.isDone(), is(false));
assertThat(secondSnapshotResponse.isDone(), is(false));
logger.info("--> waiting for all three snapshots to show up as in-progress");
assertBusy(() -> assertThat(currentSnapshots(repoName), hasSize(3)), 30L, TimeUnit.SECONDS);
final ActionFuture<AcknowledgedResponse> deleteAllSnapshots = startDeleteFromNonMasterClient(repoName, "*");
logger.info("--> wait for delete to be enqueued in cluster state");
awaitClusterState(state -> {
final SnapshotDeletionsInProgress deletionsInProgress = state.custom(SnapshotDeletionsInProgress.TYPE);
return deletionsInProgress.getEntries().size() == 1 && deletionsInProgress.getEntries().get(0).getSnapshots().size() == 3;
});
logger.info("--> waiting for second snapshot to finish and the other two snapshots to become aborted");
assertBusy(() -> {
assertThat(currentSnapshots(repoName), hasSize(2));
for (SnapshotsInProgress.Entry entry : clusterService().state().custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY).entries()) {
assertThat(entry.state(), is(SnapshotsInProgress.State.ABORTED));
assertThat(entry.snapshot().getSnapshotId().getName(), not(secondSnapshot));
}
}, 30L, TimeUnit.SECONDS);
logger.info("--> stopping current master node");
internalCluster().stopCurrentMasterNode();
unblockNode(repoName, dataNode);
unblockNode(repoName, dataNode2);
for (ActionFuture<AcknowledgedResponse> deleteFuture : Arrays.asList(firstDeleteFuture, deleteAllSnapshots)) {
try {
assertAcked(deleteFuture.actionGet());
} catch (RepositoryException rex) {
// rarely the master node fails over twice when shutting down the initial master and fails the transport listener
assertThat(rex.repository(), is("_all"));
assertThat(rex.getMessage(), endsWith("Failed to update cluster state during repository operation"));
} catch (SnapshotMissingException sme) {
// very rarely a master node fail-over happens at such a time that the client on the data-node sees a disconnect exception
// after the master has already started the delete, leading to the delete retry to run into a situation where the
// snapshot has already been deleted potentially
assertThat(sme.getSnapshotName(), is(firstSnapshot));
}
}
expectThrows(SnapshotException.class, snapshotThreeFuture::actionGet);
logger.info("--> verify that all snapshots are gone and no more work is left in the cluster state");
assertBusy(() -> {
assertThat(client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(), empty());
final ClusterState state = clusterService().state();
final SnapshotsInProgress snapshotsInProgress = state.custom(SnapshotsInProgress.TYPE);
assertThat(snapshotsInProgress.entries(), empty());
final SnapshotDeletionsInProgress snapshotDeletionsInProgress = state.custom(SnapshotDeletionsInProgress.TYPE);
assertThat(snapshotDeletionsInProgress.getEntries(), empty());
}, 30L, TimeUnit.SECONDS);
}
use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.
the class FakeThreadPoolMasterServiceTests method testFakeMasterService.
public void testFakeMasterService() {
List<Runnable> runnableTasks = new ArrayList<>();
AtomicReference<ClusterState> lastClusterStateRef = new AtomicReference<>();
DiscoveryNode discoveryNode = new DiscoveryNode("node", OpenSearchTestCase.buildNewFakeTransportAddress(), Collections.emptyMap(), new HashSet<>(DiscoveryNodeRole.BUILT_IN_ROLES), Version.CURRENT);
lastClusterStateRef.set(ClusterStateCreationUtils.state(discoveryNode, discoveryNode));
long firstClusterStateVersion = lastClusterStateRef.get().version();
AtomicReference<ActionListener<Void>> publishingCallback = new AtomicReference<>();
final ThreadContext context = new ThreadContext(Settings.EMPTY);
final ThreadPool mockThreadPool = mock(ThreadPool.class);
when(mockThreadPool.getThreadContext()).thenReturn(context);
final ExecutorService executorService = mock(ExecutorService.class);
doAnswer(invocationOnMock -> runnableTasks.add((Runnable) invocationOnMock.getArguments()[0])).when(executorService).execute(any());
when(mockThreadPool.generic()).thenReturn(executorService);
FakeThreadPoolMasterService masterService = new FakeThreadPoolMasterService("test_node", "test", mockThreadPool, runnableTasks::add);
masterService.setClusterStateSupplier(lastClusterStateRef::get);
masterService.setClusterStatePublisher((event, publishListener, ackListener) -> {
lastClusterStateRef.set(event.state());
publishingCallback.set(publishListener);
});
masterService.start();
AtomicBoolean firstTaskCompleted = new AtomicBoolean();
masterService.submitStateUpdateTask("test1", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
return ClusterState.builder(currentState).metadata(Metadata.builder(currentState.metadata()).put(indexBuilder("test1"))).build();
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
assertFalse(firstTaskCompleted.get());
firstTaskCompleted.set(true);
}
@Override
public void onFailure(String source, Exception e) {
throw new AssertionError();
}
});
assertThat(runnableTasks.size(), equalTo(1));
assertThat(lastClusterStateRef.get().metadata().indices().size(), equalTo(0));
assertThat(lastClusterStateRef.get().version(), equalTo(firstClusterStateVersion));
assertNull(publishingCallback.get());
assertFalse(firstTaskCompleted.get());
final Runnable scheduleTask = runnableTasks.remove(0);
assertThat(scheduleTask, hasToString("master service scheduling next task"));
scheduleTask.run();
final Runnable publishTask = runnableTasks.remove(0);
assertThat(publishTask, hasToString(containsString("publish change of cluster state")));
publishTask.run();
assertThat(lastClusterStateRef.get().metadata().indices().size(), equalTo(1));
assertThat(lastClusterStateRef.get().version(), equalTo(firstClusterStateVersion + 1));
assertNotNull(publishingCallback.get());
assertFalse(firstTaskCompleted.get());
assertThat(runnableTasks.size(), equalTo(0));
AtomicBoolean secondTaskCompleted = new AtomicBoolean();
masterService.submitStateUpdateTask("test2", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
return ClusterState.builder(currentState).metadata(Metadata.builder(currentState.metadata()).put(indexBuilder("test2"))).build();
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
assertFalse(secondTaskCompleted.get());
secondTaskCompleted.set(true);
}
@Override
public void onFailure(String source, Exception e) {
throw new AssertionError();
}
});
assertThat(runnableTasks.size(), equalTo(0));
publishingCallback.getAndSet(null).onResponse(null);
assertTrue(firstTaskCompleted.get());
// check that new task gets queued
assertThat(runnableTasks.size(), equalTo(1));
// schedule again
runnableTasks.remove(0).run();
// publish again
runnableTasks.remove(0).run();
assertThat(lastClusterStateRef.get().metadata().indices().size(), equalTo(2));
assertThat(lastClusterStateRef.get().version(), equalTo(firstClusterStateVersion + 2));
assertNotNull(publishingCallback.get());
assertFalse(secondTaskCompleted.get());
publishingCallback.getAndSet(null).onResponse(null);
assertTrue(secondTaskCompleted.get());
// check that no more tasks are queued
assertThat(runnableTasks.size(), equalTo(0));
}
use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.
the class ShardLimitValidatorTests method testValidateShardLimit.
public void testValidateShardLimit() {
int nodesInCluster = randomIntBetween(2, 90);
ShardCounts counts = forDataNodeCount(nodesInCluster);
ClusterState state = createClusterForShardLimitTest(nodesInCluster, counts.getFirstIndexShards(), counts.getFirstIndexReplicas(), counts.getFailingIndexShards(), counts.getFailingIndexReplicas());
Index[] indices = Arrays.stream(state.metadata().indices().values().toArray(IndexMetadata.class)).map(IndexMetadata::getIndex).collect(Collectors.toList()).toArray(new Index[2]);
int totalShards = counts.getFailingIndexShards() * (1 + counts.getFailingIndexReplicas());
int currentShards = counts.getFirstIndexShards() * (1 + counts.getFirstIndexReplicas());
int maxShards = counts.getShardsPerNode() * nodesInCluster;
ShardLimitValidator shardLimitValidator = createTestShardLimitService(counts.getShardsPerNode());
ValidationException exception = expectThrows(ValidationException.class, () -> shardLimitValidator.validateShardLimit(state, indices));
assertEquals("Validation Failed: 1: this action would add [" + totalShards + "] total shards, but this cluster currently has [" + currentShards + "]/[" + maxShards + "] maximum shards open;", exception.getMessage());
}
Aggregations