Search in sources :

Example 1 with ClusterState

use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.

the class ZenDiscoveryIT method testHandleNodeJoin_incompatibleClusterState.

public void testHandleNodeJoin_incompatibleClusterState() throws InterruptedException, ExecutionException, TimeoutException {
    String masterNode = internalCluster().startMasterOnlyNode();
    String node1 = internalCluster().startNode();
    ClusterService clusterService = internalCluster().getInstance(ClusterService.class, node1);
    Coordinator coordinator = (Coordinator) internalCluster().getInstance(Discovery.class, masterNode);
    final ClusterState state = clusterService.state();
    Metadata.Builder mdBuilder = Metadata.builder(state.metadata());
    mdBuilder.putCustom(CustomMetadata.TYPE, new CustomMetadata("data"));
    ClusterState stateWithCustomMetadata = ClusterState.builder(state).metadata(mdBuilder).build();
    final CompletableFuture<Throwable> future = new CompletableFuture<>();
    DiscoveryNode node = state.nodes().getLocalNode();
    coordinator.sendValidateJoinRequest(stateWithCustomMetadata, new JoinRequest(node, 0L, Optional.empty()), new JoinHelper.JoinCallback() {

        @Override
        public void onSuccess() {
            future.completeExceptionally(new AssertionError("onSuccess should not be called"));
        }

        @Override
        public void onFailure(Exception e) {
            future.complete(e);
        }
    });
    Throwable t = future.get(10, TimeUnit.SECONDS);
    assertTrue(t instanceof IllegalStateException);
    assertTrue(t.getCause() instanceof RemoteTransportException);
    assertTrue(t.getCause().getCause() instanceof IllegalArgumentException);
    assertThat(t.getCause().getCause().getMessage(), containsString("Unknown NamedWriteable"));
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) RemoteTransportException(org.opensearch.transport.RemoteTransportException) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) Discovery(org.opensearch.discovery.Discovery) Metadata(org.opensearch.cluster.metadata.Metadata) TestCustomMetadata(org.opensearch.test.TestCustomMetadata) Matchers.containsString(org.hamcrest.Matchers.containsString) TimeoutException(java.util.concurrent.TimeoutException) RemoteTransportException(org.opensearch.transport.RemoteTransportException) ExecutionException(java.util.concurrent.ExecutionException) CompletableFuture(java.util.concurrent.CompletableFuture) ClusterService(org.opensearch.cluster.service.ClusterService) TestCustomMetadata(org.opensearch.test.TestCustomMetadata)

Example 2 with ClusterState

use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.

the class AllocationIdIT method testFailedRecoveryOnAllocateStalePrimaryRequiresAnotherAllocateStalePrimary.

public void testFailedRecoveryOnAllocateStalePrimaryRequiresAnotherAllocateStalePrimary() throws Exception {
    /*
         * Allocation id is put on start of shard while historyUUID is adjusted after recovery is done.
         *
         * If during execution of AllocateStalePrimary a proper allocation id is stored in allocation id set and recovery is failed
         * shard restart skips the stage where historyUUID is changed.
         *
         * That leads to situation where allocated stale primary and its replica belongs to the same historyUUID and
         * replica will receive operations after local checkpoint while documents before checkpoints could be significant different.
         *
         * Therefore, on AllocateStalePrimary we put some fake allocation id (no real one could be generated like that)
         * and any failure during recovery requires extra AllocateStalePrimary command to be executed.
         */
    // initial set up
    final String indexName = "index42";
    final String master = internalCluster().startMasterOnlyNode();
    String node1 = internalCluster().startNode();
    createIndex(indexName, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).put(IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(), "checksum").build());
    final int numDocs = indexDocs(indexName, "foo", "bar");
    final IndexSettings indexSettings = getIndexSettings(indexName, node1);
    final Set<String> allocationIds = getAllocationIds(indexName);
    final ShardId shardId = new ShardId(resolveIndex(indexName), 0);
    final Path indexPath = getIndexPath(node1, shardId);
    assertThat(allocationIds, hasSize(1));
    final String historyUUID = historyUUID(node1, indexName);
    String node2 = internalCluster().startNode();
    ensureGreen(indexName);
    internalCluster().assertSameDocIdsOnShards();
    // initial set up is done
    Settings node1DataPathSettings = internalCluster().dataPathSettings(node1);
    Settings node2DataPathSettings = internalCluster().dataPathSettings(node2);
    internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node1));
    // index more docs to node2 that marks node1 as stale
    int numExtraDocs = indexDocs(indexName, "foo", "bar2");
    assertHitCount(client(node2).prepareSearch(indexName).setQuery(matchAllQuery()).get(), numDocs + numExtraDocs);
    internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node2));
    // create fake corrupted marker on node1
    putFakeCorruptionMarker(indexSettings, shardId, indexPath);
    // thanks to master node1 is out of sync
    node1 = internalCluster().startNode(node1DataPathSettings);
    // there is only _stale_ primary
    checkNoValidShardCopy(indexName, shardId);
    // allocate stale primary
    client(node1).admin().cluster().prepareReroute().add(new AllocateStalePrimaryAllocationCommand(indexName, 0, node1, true)).get();
    // allocation fails due to corruption marker
    assertBusy(() -> {
        final ClusterState state = client().admin().cluster().prepareState().get().getState();
        final ShardRouting shardRouting = state.routingTable().index(indexName).shard(shardId.id()).primaryShard();
        assertThat(shardRouting.state(), equalTo(ShardRoutingState.UNASSIGNED));
        assertThat(shardRouting.unassignedInfo().getReason(), equalTo(UnassignedInfo.Reason.ALLOCATION_FAILED));
    });
    internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node1));
    try (Store store = new Store(shardId, indexSettings, new NIOFSDirectory(indexPath), new DummyShardLock(shardId))) {
        store.removeCorruptionMarker();
    }
    node1 = internalCluster().startNode(node1DataPathSettings);
    // index is red: no any shard is allocated (allocation id is a fake id that does not match to anything)
    checkHealthStatus(indexName, ClusterHealthStatus.RED);
    checkNoValidShardCopy(indexName, shardId);
    // no any valid shard is there; have to invoke AllocateStalePrimary again
    client().admin().cluster().prepareReroute().add(new AllocateStalePrimaryAllocationCommand(indexName, 0, node1, true)).get();
    ensureYellow(indexName);
    // bring node2 back
    node2 = internalCluster().startNode(node2DataPathSettings);
    ensureGreen(indexName);
    assertThat(historyUUID(node1, indexName), not(equalTo(historyUUID)));
    assertThat(historyUUID(node1, indexName), equalTo(historyUUID(node2, indexName)));
    internalCluster().assertSameDocIdsOnShards();
}
Also used : ShardPath(org.opensearch.index.shard.ShardPath) Path(java.nio.file.Path) ClusterState(org.opensearch.cluster.ClusterState) NIOFSDirectory(org.apache.lucene.store.NIOFSDirectory) AllocateStalePrimaryAllocationCommand(org.opensearch.cluster.routing.allocation.command.AllocateStalePrimaryAllocationCommand) IndexSettings(org.opensearch.index.IndexSettings) Store(org.opensearch.index.store.Store) ShardId(org.opensearch.index.shard.ShardId) DummyShardLock(org.opensearch.test.DummyShardLock) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings)

Example 3 with ClusterState

use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.

the class ClusterServiceIT method testPendingUpdateTask.

public void testPendingUpdateTask() throws Exception {
    String node_0 = internalCluster().startNode();
    internalCluster().startCoordinatingOnlyNode(Settings.EMPTY);
    final ClusterService clusterService = internalCluster().getInstance(ClusterService.class, node_0);
    final CountDownLatch block1 = new CountDownLatch(1);
    final CountDownLatch invoked1 = new CountDownLatch(1);
    clusterService.submitStateUpdateTask("1", new ClusterStateUpdateTask() {

        @Override
        public ClusterState execute(ClusterState currentState) {
            invoked1.countDown();
            try {
                block1.await();
            } catch (InterruptedException e) {
                fail();
            }
            return currentState;
        }

        @Override
        public void onFailure(String source, Exception e) {
            invoked1.countDown();
            fail();
        }
    });
    invoked1.await();
    final CountDownLatch invoked2 = new CountDownLatch(9);
    for (int i = 2; i <= 10; i++) {
        clusterService.submitStateUpdateTask(Integer.toString(i), new ClusterStateUpdateTask() {

            @Override
            public ClusterState execute(ClusterState currentState) {
                return currentState;
            }

            @Override
            public void onFailure(String source, Exception e) {
                fail();
            }

            @Override
            public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
                invoked2.countDown();
            }
        });
    }
    // there might be other tasks in this node, make sure to only take the ones we add into account in this test
    // The tasks can be re-ordered, so we need to check out-of-order
    Set<String> controlSources = new HashSet<>(Arrays.asList("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"));
    List<PendingClusterTask> pendingClusterTasks = clusterService.getMasterService().pendingTasks();
    assertThat(pendingClusterTasks.size(), greaterThanOrEqualTo(10));
    assertThat(pendingClusterTasks.get(0).getSource().string(), equalTo("1"));
    assertThat(pendingClusterTasks.get(0).isExecuting(), equalTo(true));
    for (PendingClusterTask task : pendingClusterTasks) {
        controlSources.remove(task.getSource().string());
    }
    assertTrue(controlSources.isEmpty());
    controlSources = new HashSet<>(Arrays.asList("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"));
    PendingClusterTasksResponse response = internalCluster().coordOnlyNodeClient().admin().cluster().preparePendingClusterTasks().get();
    assertThat(response.pendingTasks().size(), greaterThanOrEqualTo(10));
    assertThat(response.pendingTasks().get(0).getSource().string(), equalTo("1"));
    assertThat(response.pendingTasks().get(0).isExecuting(), equalTo(true));
    for (PendingClusterTask task : response) {
        controlSources.remove(task.getSource().string());
    }
    assertTrue(controlSources.isEmpty());
    block1.countDown();
    invoked2.await();
    // whenever we test for no tasks, we need to wait since this is a live node
    assertBusy(() -> assertTrue("Pending tasks not empty", clusterService.getMasterService().pendingTasks().isEmpty()));
    waitNoPendingTasksOnAll();
    final CountDownLatch block2 = new CountDownLatch(1);
    final CountDownLatch invoked3 = new CountDownLatch(1);
    clusterService.submitStateUpdateTask("1", new ClusterStateUpdateTask() {

        @Override
        public ClusterState execute(ClusterState currentState) {
            invoked3.countDown();
            try {
                block2.await();
            } catch (InterruptedException e) {
                fail();
            }
            return currentState;
        }

        @Override
        public void onFailure(String source, Exception e) {
            invoked3.countDown();
            fail();
        }
    });
    invoked3.await();
    for (int i = 2; i <= 5; i++) {
        clusterService.submitStateUpdateTask(Integer.toString(i), new ClusterStateUpdateTask() {

            @Override
            public ClusterState execute(ClusterState currentState) {
                return currentState;
            }

            @Override
            public void onFailure(String source, Exception e) {
                fail();
            }
        });
    }
    Thread.sleep(100);
    pendingClusterTasks = clusterService.getMasterService().pendingTasks();
    assertThat(pendingClusterTasks.size(), greaterThanOrEqualTo(5));
    controlSources = new HashSet<>(Arrays.asList("1", "2", "3", "4", "5"));
    for (PendingClusterTask task : pendingClusterTasks) {
        controlSources.remove(task.getSource().string());
    }
    assertTrue(controlSources.isEmpty());
    response = internalCluster().coordOnlyNodeClient().admin().cluster().preparePendingClusterTasks().get();
    assertThat(response.pendingTasks().size(), greaterThanOrEqualTo(5));
    controlSources = new HashSet<>(Arrays.asList("1", "2", "3", "4", "5"));
    for (PendingClusterTask task : response) {
        if (controlSources.remove(task.getSource().string())) {
            assertThat(task.getTimeInQueueInMillis(), greaterThan(0L));
        }
    }
    assertTrue(controlSources.isEmpty());
    block2.countDown();
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) ClusterStateUpdateTask(org.opensearch.cluster.ClusterStateUpdateTask) AckedClusterStateUpdateTask(org.opensearch.cluster.AckedClusterStateUpdateTask) CountDownLatch(java.util.concurrent.CountDownLatch) PendingClusterTasksResponse(org.opensearch.action.admin.cluster.tasks.PendingClusterTasksResponse) HashSet(java.util.HashSet)

Example 4 with ClusterState

use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.

the class ClusterShardLimitIT method testIndexCreationOverLimit.

public void testIndexCreationOverLimit() {
    int dataNodes = client().admin().cluster().prepareState().get().getState().getNodes().getDataNodes().size();
    ShardCounts counts = ShardCounts.forDataNodeCount(dataNodes);
    setShardsPerNode(counts.getShardsPerNode());
    // Create an index that will bring us up to the limit
    createIndex("test", Settings.builder().put(indexSettings()).put(SETTING_NUMBER_OF_SHARDS, counts.getFirstIndexShards()).put(SETTING_NUMBER_OF_REPLICAS, counts.getFirstIndexReplicas()).build());
    try {
        prepareCreate("should-fail", Settings.builder().put(indexSettings()).put(SETTING_NUMBER_OF_SHARDS, counts.getFailingIndexShards()).put(SETTING_NUMBER_OF_REPLICAS, counts.getFailingIndexReplicas())).get();
        fail("Should not have been able to go over the limit");
    } catch (IllegalArgumentException e) {
        verifyException(dataNodes, counts, e);
    }
    ClusterState clusterState = client().admin().cluster().prepareState().get().getState();
    assertFalse(clusterState.getMetadata().hasIndex("should-fail"));
}
Also used : ClusterState(org.opensearch.cluster.ClusterState)

Example 5 with ClusterState

use of org.opensearch.cluster.ClusterState in project OpenSearch by opensearch-project.

the class ClusterShardLimitIT method testIndexCreationOverLimitFromTemplate.

public void testIndexCreationOverLimitFromTemplate() {
    int dataNodes = client().admin().cluster().prepareState().get().getState().getNodes().getDataNodes().size();
    final ShardCounts counts = ShardCounts.forDataNodeCount(dataNodes);
    setShardsPerNode(counts.getShardsPerNode());
    if (counts.getFirstIndexShards() > 0) {
        createIndex("test", Settings.builder().put(indexSettings()).put(SETTING_NUMBER_OF_SHARDS, counts.getFirstIndexShards()).put(SETTING_NUMBER_OF_REPLICAS, counts.getFirstIndexReplicas()).build());
    }
    assertAcked(client().admin().indices().preparePutTemplate("should-fail").setPatterns(Collections.singletonList("should-fail")).setOrder(1).setSettings(Settings.builder().put(SETTING_NUMBER_OF_SHARDS, counts.getFailingIndexShards()).put(SETTING_NUMBER_OF_REPLICAS, counts.getFailingIndexReplicas())).get());
    final IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> client().admin().indices().prepareCreate("should-fail").get());
    verifyException(dataNodes, counts, e);
    ClusterState clusterState = client().admin().cluster().prepareState().get().getState();
    assertFalse(clusterState.getMetadata().hasIndex("should-fail"));
}
Also used : ClusterState(org.opensearch.cluster.ClusterState)

Aggregations

ClusterState (org.opensearch.cluster.ClusterState)1000 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)357 Metadata (org.opensearch.cluster.metadata.Metadata)307 RoutingTable (org.opensearch.cluster.routing.RoutingTable)214 Settings (org.opensearch.common.settings.Settings)211 ClusterName (org.opensearch.cluster.ClusterName)194 DiscoveryNode (org.opensearch.cluster.node.DiscoveryNode)192 ShardRouting (org.opensearch.cluster.routing.ShardRouting)183 Matchers.containsString (org.hamcrest.Matchers.containsString)164 ClusterService (org.opensearch.cluster.service.ClusterService)136 Index (org.opensearch.index.Index)129 DiscoveryNodes (org.opensearch.cluster.node.DiscoveryNodes)128 ShardId (org.opensearch.index.shard.ShardId)125 List (java.util.List)118 HashSet (java.util.HashSet)117 IOException (java.io.IOException)115 ArrayList (java.util.ArrayList)113 IndexShardRoutingTable (org.opensearch.cluster.routing.IndexShardRoutingTable)103 Version (org.opensearch.Version)100 ClusterSettings (org.opensearch.common.settings.ClusterSettings)99