Search in sources :

Example 26 with ClusterState

use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.

the class MasterDisruptionIT method testIsolateMasterAndVerifyClusterStateConsensus.

/**
 * This test isolates the master from rest of the cluster, waits for a new master to be elected, restores the partition
 * and verifies that all node agree on the new cluster state
 */
public void testIsolateMasterAndVerifyClusterStateConsensus() throws Exception {
    final List<String> nodes = startCluster(3);
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2)).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, randomInt(2))));
    ensureGreen();
    String isolatedNode = internalCluster().getMasterName();
    TwoPartitions partitions = isolateNode(isolatedNode);
    NetworkDisruption networkDisruption = addRandomDisruptionType(partitions);
    networkDisruption.startDisrupting();
    String nonIsolatedNode = partitions.getMajoritySide().iterator().next();
    // make sure cluster reforms
    ensureStableCluster(2, nonIsolatedNode);
    // make sure isolated need picks up on things.
    assertNoMaster(isolatedNode, TimeValue.timeValueSeconds(40));
    // restore isolation
    networkDisruption.stopDisrupting();
    for (String node : nodes) {
        ensureStableCluster(3, new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + networkDisruption.expectedTimeToHeal().millis()), true, node);
    }
    logger.info("issue a reroute");
    // trigger a reroute now, instead of waiting for the background reroute of RerouteService
    assertAcked(client().admin().cluster().prepareReroute());
    // and wait for it to finish and for the cluster to stabilize
    ensureGreen("test");
    // verify all cluster states are the same
    // use assert busy to wait for cluster states to be applied (as publish_timeout has low value)
    assertBusy(() -> {
        ClusterState state = null;
        for (String node : nodes) {
            ClusterState nodeState = getNodeClusterState(node);
            if (state == null) {
                state = nodeState;
                continue;
            }
            // assert nodes are identical
            try {
                assertEquals("unequal versions", state.version(), nodeState.version());
                assertEquals("unequal node count", state.nodes().getSize(), nodeState.nodes().getSize());
                assertEquals("different masters ", state.nodes().getMasterNodeId(), nodeState.nodes().getMasterNodeId());
                assertEquals("different meta data version", state.metadata().version(), nodeState.metadata().version());
                assertEquals("different routing", state.routingTable().toString(), nodeState.routingTable().toString());
            } catch (AssertionError t) {
                fail("failed comparing cluster state: " + t.getMessage() + "\n" + "--- cluster state of node [" + nodes.get(0) + "]: ---\n" + state + "\n--- cluster state [" + node + "]: ---\n" + nodeState);
            }
        }
    });
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) TwoPartitions(org.opensearch.test.disruption.NetworkDisruption.TwoPartitions) NetworkDisruption(org.opensearch.test.disruption.NetworkDisruption) TimeValue(org.opensearch.common.unit.TimeValue)

Example 27 with ClusterState

use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.

the class JoinTaskExecutor method becomeMasterAndTrimConflictingNodes.

protected ClusterState.Builder becomeMasterAndTrimConflictingNodes(ClusterState currentState, List<Task> joiningNodes) {
    assert currentState.nodes().getMasterNodeId() == null : currentState;
    DiscoveryNodes currentNodes = currentState.nodes();
    DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(currentNodes);
    nodesBuilder.masterNodeId(currentState.nodes().getLocalNodeId());
    for (final Task joinTask : joiningNodes) {
        if (joinTask.isBecomeMasterTask()) {
            refreshDiscoveryNodeVersionAfterUpgrade(currentNodes, nodesBuilder);
        } else if (joinTask.isFinishElectionTask()) {
        // no-op
        } else {
            final DiscoveryNode joiningNode = joinTask.node();
            final DiscoveryNode nodeWithSameId = nodesBuilder.get(joiningNode.getId());
            if (nodeWithSameId != null && nodeWithSameId.equals(joiningNode) == false) {
                logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameId, joiningNode);
                nodesBuilder.remove(nodeWithSameId.getId());
            }
            final DiscoveryNode nodeWithSameAddress = currentNodes.findByAddress(joiningNode.getAddress());
            if (nodeWithSameAddress != null && nodeWithSameAddress.equals(joiningNode) == false) {
                logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameAddress, joiningNode);
                nodesBuilder.remove(nodeWithSameAddress.getId());
            }
        }
    }
    // now trim any left over dead nodes - either left there when the previous master stepped down
    // or removed by us above
    ClusterState tmpState = ClusterState.builder(currentState).nodes(nodesBuilder).blocks(ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(NoMasterBlockService.NO_MASTER_BLOCK_ID)).build();
    logger.trace("becomeMasterAndTrimConflictingNodes: {}", tmpState.nodes());
    allocationService.cleanCaches();
    tmpState = PersistentTasksCustomMetadata.disassociateDeadNodes(tmpState);
    return ClusterState.builder(allocationService.disassociateDeadNodes(tmpState, false, "removed dead nodes on election"));
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) DiscoveryNodes(org.opensearch.cluster.node.DiscoveryNodes)

Example 28 with ClusterState

use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.

the class ConcurrentSnapshotsIT method testMasterFailOverWithQueuedDeletes.

public void testMasterFailOverWithQueuedDeletes() throws Exception {
    internalCluster().startMasterOnlyNodes(3);
    final String dataNode = internalCluster().startDataOnlyNode();
    final String repoName = "test-repo";
    createRepository(repoName, "mock");
    final String firstIndex = "index-one";
    createIndexWithContent(firstIndex);
    final String firstSnapshot = "snapshot-one";
    blockDataNode(repoName, dataNode);
    final ActionFuture<CreateSnapshotResponse> firstSnapshotResponse = startFullSnapshotFromNonMasterClient(repoName, firstSnapshot);
    waitForBlock(dataNode, repoName, TimeValue.timeValueSeconds(30L));
    final String dataNode2 = internalCluster().startDataOnlyNode();
    ensureStableCluster(5);
    final String secondIndex = "index-two";
    createIndexWithContent(secondIndex, dataNode2, dataNode);
    final String secondSnapshot = "snapshot-two";
    final ActionFuture<CreateSnapshotResponse> secondSnapshotResponse = startFullSnapshot(repoName, secondSnapshot);
    logger.info("--> wait for snapshot on second data node to finish");
    awaitClusterState(state -> {
        final SnapshotsInProgress snapshotsInProgress = state.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
        return snapshotsInProgress.entries().size() == 2 && snapshotHasCompletedShard(secondSnapshot, snapshotsInProgress);
    });
    final ActionFuture<AcknowledgedResponse> firstDeleteFuture = startDeleteFromNonMasterClient(repoName, firstSnapshot);
    awaitNDeletionsInProgress(1);
    blockNodeOnAnyFiles(repoName, dataNode2);
    final ActionFuture<CreateSnapshotResponse> snapshotThreeFuture = startFullSnapshotFromNonMasterClient(repoName, "snapshot-three");
    waitForBlock(dataNode2, repoName, TimeValue.timeValueSeconds(30L));
    assertThat(firstSnapshotResponse.isDone(), is(false));
    assertThat(secondSnapshotResponse.isDone(), is(false));
    logger.info("--> waiting for all three snapshots to show up as in-progress");
    assertBusy(() -> assertThat(currentSnapshots(repoName), hasSize(3)), 30L, TimeUnit.SECONDS);
    final ActionFuture<AcknowledgedResponse> deleteAllSnapshots = startDeleteFromNonMasterClient(repoName, "*");
    logger.info("--> wait for delete to be enqueued in cluster state");
    awaitClusterState(state -> {
        final SnapshotDeletionsInProgress deletionsInProgress = state.custom(SnapshotDeletionsInProgress.TYPE);
        return deletionsInProgress.getEntries().size() == 1 && deletionsInProgress.getEntries().get(0).getSnapshots().size() == 3;
    });
    logger.info("--> waiting for second snapshot to finish and the other two snapshots to become aborted");
    assertBusy(() -> {
        assertThat(currentSnapshots(repoName), hasSize(2));
        for (SnapshotsInProgress.Entry entry : clusterService().state().custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY).entries()) {
            assertThat(entry.state(), is(SnapshotsInProgress.State.ABORTED));
            assertThat(entry.snapshot().getSnapshotId().getName(), not(secondSnapshot));
        }
    }, 30L, TimeUnit.SECONDS);
    logger.info("--> stopping current master node");
    internalCluster().stopCurrentMasterNode();
    unblockNode(repoName, dataNode);
    unblockNode(repoName, dataNode2);
    for (ActionFuture<AcknowledgedResponse> deleteFuture : Arrays.asList(firstDeleteFuture, deleteAllSnapshots)) {
        try {
            assertAcked(deleteFuture.actionGet());
        } catch (RepositoryException rex) {
            // rarely the master node fails over twice when shutting down the initial master and fails the transport listener
            assertThat(rex.repository(), is("_all"));
            assertThat(rex.getMessage(), endsWith("Failed to update cluster state during repository operation"));
        } catch (SnapshotMissingException sme) {
            // very rarely a master node fail-over happens at such a time that the client on the data-node sees a disconnect exception
            // after the master has already started the delete, leading to the delete retry to run into a situation where the
            // snapshot has already been deleted potentially
            assertThat(sme.getSnapshotName(), is(firstSnapshot));
        }
    }
    expectThrows(SnapshotException.class, snapshotThreeFuture::actionGet);
    logger.info("--> verify that all snapshots are gone and no more work is left in the cluster state");
    assertBusy(() -> {
        assertThat(client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(), empty());
        final ClusterState state = clusterService().state();
        final SnapshotsInProgress snapshotsInProgress = state.custom(SnapshotsInProgress.TYPE);
        assertThat(snapshotsInProgress.entries(), empty());
        final SnapshotDeletionsInProgress snapshotDeletionsInProgress = state.custom(SnapshotDeletionsInProgress.TYPE);
        assertThat(snapshotDeletionsInProgress.getEntries(), empty());
    }, 30L, TimeUnit.SECONDS);
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) CreateSnapshotResponse(org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse) AcknowledgedResponse(org.opensearch.action.support.master.AcknowledgedResponse) SnapshotsInProgress(org.opensearch.cluster.SnapshotsInProgress) RepositoryException(org.opensearch.repositories.RepositoryException) Matchers.containsString(org.hamcrest.Matchers.containsString) SnapshotDeletionsInProgress(org.opensearch.cluster.SnapshotDeletionsInProgress)

Example 29 with ClusterState

use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.

the class FakeThreadPoolMasterServiceTests method testFakeMasterService.

public void testFakeMasterService() {
    List<Runnable> runnableTasks = new ArrayList<>();
    AtomicReference<ClusterState> lastClusterStateRef = new AtomicReference<>();
    DiscoveryNode discoveryNode = new DiscoveryNode("node", OpenSearchTestCase.buildNewFakeTransportAddress(), Collections.emptyMap(), new HashSet<>(DiscoveryNodeRole.BUILT_IN_ROLES), Version.CURRENT);
    lastClusterStateRef.set(ClusterStateCreationUtils.state(discoveryNode, discoveryNode));
    long firstClusterStateVersion = lastClusterStateRef.get().version();
    AtomicReference<ActionListener<Void>> publishingCallback = new AtomicReference<>();
    final ThreadContext context = new ThreadContext(Settings.EMPTY);
    final ThreadPool mockThreadPool = mock(ThreadPool.class);
    when(mockThreadPool.getThreadContext()).thenReturn(context);
    final ExecutorService executorService = mock(ExecutorService.class);
    doAnswer(invocationOnMock -> runnableTasks.add((Runnable) invocationOnMock.getArguments()[0])).when(executorService).execute(any());
    when(mockThreadPool.generic()).thenReturn(executorService);
    FakeThreadPoolMasterService masterService = new FakeThreadPoolMasterService("test_node", "test", mockThreadPool, runnableTasks::add);
    masterService.setClusterStateSupplier(lastClusterStateRef::get);
    masterService.setClusterStatePublisher((event, publishListener, ackListener) -> {
        lastClusterStateRef.set(event.state());
        publishingCallback.set(publishListener);
    });
    masterService.start();
    AtomicBoolean firstTaskCompleted = new AtomicBoolean();
    masterService.submitStateUpdateTask("test1", new ClusterStateUpdateTask() {

        @Override
        public ClusterState execute(ClusterState currentState) {
            return ClusterState.builder(currentState).metadata(Metadata.builder(currentState.metadata()).put(indexBuilder("test1"))).build();
        }

        @Override
        public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
            assertFalse(firstTaskCompleted.get());
            firstTaskCompleted.set(true);
        }

        @Override
        public void onFailure(String source, Exception e) {
            throw new AssertionError();
        }
    });
    assertThat(runnableTasks.size(), equalTo(1));
    assertThat(lastClusterStateRef.get().metadata().indices().size(), equalTo(0));
    assertThat(lastClusterStateRef.get().version(), equalTo(firstClusterStateVersion));
    assertNull(publishingCallback.get());
    assertFalse(firstTaskCompleted.get());
    final Runnable scheduleTask = runnableTasks.remove(0);
    assertThat(scheduleTask, hasToString("master service scheduling next task"));
    scheduleTask.run();
    final Runnable publishTask = runnableTasks.remove(0);
    assertThat(publishTask, hasToString(containsString("publish change of cluster state")));
    publishTask.run();
    assertThat(lastClusterStateRef.get().metadata().indices().size(), equalTo(1));
    assertThat(lastClusterStateRef.get().version(), equalTo(firstClusterStateVersion + 1));
    assertNotNull(publishingCallback.get());
    assertFalse(firstTaskCompleted.get());
    assertThat(runnableTasks.size(), equalTo(0));
    AtomicBoolean secondTaskCompleted = new AtomicBoolean();
    masterService.submitStateUpdateTask("test2", new ClusterStateUpdateTask() {

        @Override
        public ClusterState execute(ClusterState currentState) {
            return ClusterState.builder(currentState).metadata(Metadata.builder(currentState.metadata()).put(indexBuilder("test2"))).build();
        }

        @Override
        public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
            assertFalse(secondTaskCompleted.get());
            secondTaskCompleted.set(true);
        }

        @Override
        public void onFailure(String source, Exception e) {
            throw new AssertionError();
        }
    });
    assertThat(runnableTasks.size(), equalTo(0));
    publishingCallback.getAndSet(null).onResponse(null);
    assertTrue(firstTaskCompleted.get());
    // check that new task gets queued
    assertThat(runnableTasks.size(), equalTo(1));
    // schedule again
    runnableTasks.remove(0).run();
    // publish again
    runnableTasks.remove(0).run();
    assertThat(lastClusterStateRef.get().metadata().indices().size(), equalTo(2));
    assertThat(lastClusterStateRef.get().version(), equalTo(firstClusterStateVersion + 2));
    assertNotNull(publishingCallback.get());
    assertFalse(secondTaskCompleted.get());
    publishingCallback.getAndSet(null).onResponse(null);
    assertTrue(secondTaskCompleted.get());
    // check that no more tasks are queued
    assertThat(runnableTasks.size(), equalTo(0));
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) ArrayList(java.util.ArrayList) ThreadContext(org.opensearch.common.util.concurrent.ThreadContext) ThreadPool(org.opensearch.threadpool.ThreadPool) ClusterStateUpdateTask(org.opensearch.cluster.ClusterStateUpdateTask) AtomicReference(java.util.concurrent.atomic.AtomicReference) Matchers.hasToString(org.hamcrest.Matchers.hasToString) Matchers.containsString(org.hamcrest.Matchers.containsString) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ActionListener(org.opensearch.action.ActionListener) ExecutorService(java.util.concurrent.ExecutorService)

Example 30 with ClusterState

use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.

the class ShardLimitValidatorTests method testValidateShardLimit.

public void testValidateShardLimit() {
    int nodesInCluster = randomIntBetween(2, 90);
    ShardCounts counts = forDataNodeCount(nodesInCluster);
    ClusterState state = createClusterForShardLimitTest(nodesInCluster, counts.getFirstIndexShards(), counts.getFirstIndexReplicas(), counts.getFailingIndexShards(), counts.getFailingIndexReplicas());
    Index[] indices = Arrays.stream(state.metadata().indices().values().toArray(IndexMetadata.class)).map(IndexMetadata::getIndex).collect(Collectors.toList()).toArray(new Index[2]);
    int totalShards = counts.getFailingIndexShards() * (1 + counts.getFailingIndexReplicas());
    int currentShards = counts.getFirstIndexShards() * (1 + counts.getFirstIndexReplicas());
    int maxShards = counts.getShardsPerNode() * nodesInCluster;
    ShardLimitValidator shardLimitValidator = createTestShardLimitService(counts.getShardsPerNode());
    ValidationException exception = expectThrows(ValidationException.class, () -> shardLimitValidator.validateShardLimit(state, indices));
    assertEquals("Validation Failed: 1: this action would add [" + totalShards + "] total shards, but this cluster currently has [" + currentShards + "]/[" + maxShards + "] maximum shards open;", exception.getMessage());
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) ValidationException(org.opensearch.common.ValidationException) ShardCounts(org.opensearch.cluster.shards.ShardCounts) Index(org.opensearch.index.Index) MetadataIndexStateServiceTests.addClosedIndex(org.opensearch.cluster.metadata.MetadataIndexStateServiceTests.addClosedIndex) MetadataIndexStateServiceTests.addOpenedIndex(org.opensearch.cluster.metadata.MetadataIndexStateServiceTests.addOpenedIndex) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata)

Aggregations

ClusterState (org.opensearch.cluster.ClusterState)1064 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)376 Metadata (org.opensearch.cluster.metadata.Metadata)322 Settings (org.opensearch.common.settings.Settings)220 RoutingTable (org.opensearch.cluster.routing.RoutingTable)214 ClusterName (org.opensearch.cluster.ClusterName)213 DiscoveryNode (org.opensearch.cluster.node.DiscoveryNode)213 ShardRouting (org.opensearch.cluster.routing.ShardRouting)182 Matchers.containsString (org.hamcrest.Matchers.containsString)169 ClusterService (org.opensearch.cluster.service.ClusterService)157 DiscoveryNodes (org.opensearch.cluster.node.DiscoveryNodes)140 Index (org.opensearch.index.Index)130 ShardId (org.opensearch.index.shard.ShardId)125 HashSet (java.util.HashSet)123 IOException (java.io.IOException)120 ArrayList (java.util.ArrayList)119 List (java.util.List)119 ClusterSettings (org.opensearch.common.settings.ClusterSettings)107 IndexShardRoutingTable (org.opensearch.cluster.routing.IndexShardRoutingTable)103 HashMap (java.util.HashMap)101