Search in sources :

Example 1 with IndexShardNotStartedException

use of org.opensearch.index.shard.IndexShardNotStartedException in project OpenSearch by opensearch-project.

the class ReplicationOperationTests method testReplication.

public void testReplication() throws Exception {
    final String index = "test";
    final ShardId shardId = new ShardId(index, "_na_", 0);
    ClusterState initialState = stateWithActivePrimary(index, true, randomInt(5));
    IndexMetadata indexMetadata = initialState.getMetadata().index(index);
    final long primaryTerm = indexMetadata.primaryTerm(0);
    final IndexShardRoutingTable indexShardRoutingTable = initialState.getRoutingTable().shardRoutingTable(shardId);
    ShardRouting primaryShard = indexShardRoutingTable.primaryShard();
    if (primaryShard.relocating() && randomBoolean()) {
        // simulate execution of the replication phase on the relocation target node after relocation source was marked as relocated
        initialState = ClusterState.builder(initialState).nodes(DiscoveryNodes.builder(initialState.nodes()).localNodeId(primaryShard.relocatingNodeId())).build();
        primaryShard = primaryShard.getTargetRelocatingShard();
    }
    // add a few in-sync allocation ids that don't have corresponding routing entries
    final Set<String> staleAllocationIds = Sets.newHashSet(generateRandomStringArray(4, 10, false));
    final Set<String> inSyncAllocationIds = Sets.union(indexMetadata.inSyncAllocationIds(0), staleAllocationIds);
    final Set<String> trackedShards = new HashSet<>();
    final Set<String> untrackedShards = new HashSet<>();
    addTrackingInfo(indexShardRoutingTable, primaryShard, trackedShards, untrackedShards);
    trackedShards.addAll(staleAllocationIds);
    final ReplicationGroup replicationGroup = new ReplicationGroup(indexShardRoutingTable, inSyncAllocationIds, trackedShards, 0);
    final Set<ShardRouting> expectedReplicas = getExpectedReplicas(shardId, initialState, trackedShards);
    final Map<ShardRouting, Exception> simulatedFailures = new HashMap<>();
    final Map<ShardRouting, Exception> reportedFailures = new HashMap<>();
    for (ShardRouting replica : expectedReplicas) {
        if (randomBoolean()) {
            Exception t;
            boolean criticalFailure = randomBoolean();
            if (criticalFailure) {
                t = new CorruptIndexException("simulated", (String) null);
                reportedFailures.put(replica, t);
            } else {
                t = new IndexShardNotStartedException(shardId, IndexShardState.RECOVERING);
            }
            logger.debug("--> simulating failure on {} with [{}]", replica, t.getClass().getSimpleName());
            simulatedFailures.put(replica, t);
        }
    }
    Request request = new Request(shardId);
    PlainActionFuture<TestPrimary.Result> listener = new PlainActionFuture<>();
    final TestReplicaProxy replicasProxy = new TestReplicaProxy(simulatedFailures);
    final TestPrimary primary = new TestPrimary(primaryShard, () -> replicationGroup, threadPool);
    final TestReplicationOperation op = new TestReplicationOperation(request, primary, listener, replicasProxy, primaryTerm);
    op.execute();
    assertThat("request was not processed on primary", request.processedOnPrimary.get(), equalTo(true));
    assertThat(request.processedOnReplicas, equalTo(expectedReplicas));
    assertThat(replicasProxy.failedReplicas, equalTo(simulatedFailures.keySet()));
    assertThat(replicasProxy.markedAsStaleCopies, equalTo(staleAllocationIds));
    assertThat("post replication operations not run on primary", request.runPostReplicationActionsOnPrimary.get(), equalTo(true));
    assertTrue("listener is not marked as done", listener.isDone());
    ShardInfo shardInfo = listener.actionGet().getShardInfo();
    assertThat(shardInfo.getFailed(), equalTo(reportedFailures.size()));
    assertThat(shardInfo.getFailures(), arrayWithSize(reportedFailures.size()));
    assertThat(shardInfo.getSuccessful(), equalTo(1 + expectedReplicas.size() - simulatedFailures.size()));
    final List<ShardRouting> unassignedShards = indexShardRoutingTable.shardsWithState(ShardRoutingState.UNASSIGNED);
    final int totalShards = 1 + expectedReplicas.size() + unassignedShards.size() + untrackedShards.size();
    assertThat(replicationGroup.toString(), shardInfo.getTotal(), equalTo(totalShards));
    assertThat(primary.knownLocalCheckpoints.remove(primaryShard.allocationId().getId()), equalTo(primary.localCheckpoint));
    assertThat(primary.knownLocalCheckpoints, equalTo(replicasProxy.generatedLocalCheckpoints));
    assertThat(primary.knownGlobalCheckpoints.remove(primaryShard.allocationId().getId()), equalTo(primary.globalCheckpoint));
    assertThat(primary.knownGlobalCheckpoints, equalTo(replicasProxy.generatedGlobalCheckpoints));
}
Also used : IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) HashMap(java.util.HashMap) ReplicationGroup(org.opensearch.index.shard.ReplicationGroup) ShardId(org.opensearch.index.shard.ShardId) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) HashSet(java.util.HashSet) ShardInfo(org.opensearch.action.support.replication.ReplicationResponse.ShardInfo) ClusterState(org.opensearch.cluster.ClusterState) IndexShardNotStartedException(org.opensearch.index.shard.IndexShardNotStartedException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) OpenSearchRejectedExecutionException(org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) NodeClosedException(org.opensearch.node.NodeClosedException) UnavailableShardsException(org.opensearch.action.UnavailableShardsException) RemoteTransportException(org.opensearch.transport.RemoteTransportException) ConnectTransportException(org.opensearch.transport.ConnectTransportException) CircuitBreakingException(org.opensearch.common.breaker.CircuitBreakingException) IndexShardNotStartedException(org.opensearch.index.shard.IndexShardNotStartedException) SendRequestTransportException(org.opensearch.transport.SendRequestTransportException) ExecutionException(java.util.concurrent.ExecutionException) PlainActionFuture(org.opensearch.action.support.PlainActionFuture) ShardRouting(org.opensearch.cluster.routing.ShardRouting)

Aggregations

HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 ExecutionException (java.util.concurrent.ExecutionException)1 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)1 AlreadyClosedException (org.apache.lucene.store.AlreadyClosedException)1 UnavailableShardsException (org.opensearch.action.UnavailableShardsException)1 PlainActionFuture (org.opensearch.action.support.PlainActionFuture)1 ShardInfo (org.opensearch.action.support.replication.ReplicationResponse.ShardInfo)1 ClusterState (org.opensearch.cluster.ClusterState)1 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)1 IndexShardRoutingTable (org.opensearch.cluster.routing.IndexShardRoutingTable)1 ShardRouting (org.opensearch.cluster.routing.ShardRouting)1 CircuitBreakingException (org.opensearch.common.breaker.CircuitBreakingException)1 OpenSearchRejectedExecutionException (org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException)1 IndexShardNotStartedException (org.opensearch.index.shard.IndexShardNotStartedException)1 ReplicationGroup (org.opensearch.index.shard.ReplicationGroup)1 ShardId (org.opensearch.index.shard.ShardId)1 NodeClosedException (org.opensearch.node.NodeClosedException)1 ConnectTransportException (org.opensearch.transport.ConnectTransportException)1 RemoteTransportException (org.opensearch.transport.RemoteTransportException)1