Search in sources :

Example 1 with ShardInfo

use of org.opensearch.action.support.replication.ReplicationResponse.ShardInfo in project OpenSearch by opensearch-project.

the class ReplicationOperationTests method testPrimaryFailureHandlingReplicaResponse.

public void testPrimaryFailureHandlingReplicaResponse() throws Exception {
    final String index = "test";
    final ShardId shardId = new ShardId(index, "_na_", 0);
    final Request request = new Request(shardId);
    final ClusterState state = stateWithActivePrimary(index, true, 1, 0);
    final IndexMetadata indexMetadata = state.getMetadata().index(index);
    final long primaryTerm = indexMetadata.primaryTerm(0);
    final ShardRouting primaryRouting = state.getRoutingTable().shardRoutingTable(shardId).primaryShard();
    final Set<String> inSyncAllocationIds = indexMetadata.inSyncAllocationIds(0);
    final IndexShardRoutingTable shardRoutingTable = state.routingTable().index(index).shard(shardId.id());
    final Set<String> trackedShards = shardRoutingTable.getAllAllocationIds();
    final ReplicationGroup initialReplicationGroup = new ReplicationGroup(shardRoutingTable, inSyncAllocationIds, trackedShards, 0);
    final boolean fatal = randomBoolean();
    final AtomicBoolean primaryFailed = new AtomicBoolean();
    final ReplicationOperation.Primary<Request, Request, TestPrimary.Result> primary = new TestPrimary(primaryRouting, () -> initialReplicationGroup, threadPool) {

        @Override
        public void failShard(String message, Exception exception) {
            primaryFailed.set(true);
        }

        @Override
        public void updateLocalCheckpointForShard(String allocationId, long checkpoint) {
            if (primaryRouting.allocationId().getId().equals(allocationId)) {
                super.updateLocalCheckpointForShard(allocationId, checkpoint);
            } else {
                if (fatal) {
                    throw new NullPointerException();
                } else {
                    throw new AlreadyClosedException("already closed");
                }
            }
        }
    };
    final PlainActionFuture<TestPrimary.Result> listener = new PlainActionFuture<>();
    final ReplicationOperation.Replicas<Request> replicas = new TestReplicaProxy(Collections.emptyMap());
    TestReplicationOperation operation = new TestReplicationOperation(request, primary, listener, replicas, primaryTerm);
    operation.execute();
    assertThat(primaryFailed.get(), equalTo(fatal));
    final ShardInfo shardInfo = listener.actionGet().getShardInfo();
    assertThat(shardInfo.getFailed(), equalTo(0));
    assertThat(shardInfo.getFailures(), arrayWithSize(0));
    assertThat(shardInfo.getSuccessful(), equalTo(1 + getExpectedReplicas(shardId, state, trackedShards).size()));
}
Also used : IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) ReplicationGroup(org.opensearch.index.shard.ReplicationGroup) ShardId(org.opensearch.index.shard.ShardId) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) ShardInfo(org.opensearch.action.support.replication.ReplicationResponse.ShardInfo) ClusterState(org.opensearch.cluster.ClusterState) OpenSearchRejectedExecutionException(org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) NodeClosedException(org.opensearch.node.NodeClosedException) UnavailableShardsException(org.opensearch.action.UnavailableShardsException) RemoteTransportException(org.opensearch.transport.RemoteTransportException) ConnectTransportException(org.opensearch.transport.ConnectTransportException) CircuitBreakingException(org.opensearch.common.breaker.CircuitBreakingException) IndexShardNotStartedException(org.opensearch.index.shard.IndexShardNotStartedException) SendRequestTransportException(org.opensearch.transport.SendRequestTransportException) ExecutionException(java.util.concurrent.ExecutionException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) PlainActionFuture(org.opensearch.action.support.PlainActionFuture) ShardRouting(org.opensearch.cluster.routing.ShardRouting)

Example 2 with ShardInfo

use of org.opensearch.action.support.replication.ReplicationResponse.ShardInfo in project OpenSearch by opensearch-project.

the class ReplicationOperationTests method testReplication.

public void testReplication() throws Exception {
    final String index = "test";
    final ShardId shardId = new ShardId(index, "_na_", 0);
    ClusterState initialState = stateWithActivePrimary(index, true, randomInt(5));
    IndexMetadata indexMetadata = initialState.getMetadata().index(index);
    final long primaryTerm = indexMetadata.primaryTerm(0);
    final IndexShardRoutingTable indexShardRoutingTable = initialState.getRoutingTable().shardRoutingTable(shardId);
    ShardRouting primaryShard = indexShardRoutingTable.primaryShard();
    if (primaryShard.relocating() && randomBoolean()) {
        // simulate execution of the replication phase on the relocation target node after relocation source was marked as relocated
        initialState = ClusterState.builder(initialState).nodes(DiscoveryNodes.builder(initialState.nodes()).localNodeId(primaryShard.relocatingNodeId())).build();
        primaryShard = primaryShard.getTargetRelocatingShard();
    }
    // add a few in-sync allocation ids that don't have corresponding routing entries
    final Set<String> staleAllocationIds = Sets.newHashSet(generateRandomStringArray(4, 10, false));
    final Set<String> inSyncAllocationIds = Sets.union(indexMetadata.inSyncAllocationIds(0), staleAllocationIds);
    final Set<String> trackedShards = new HashSet<>();
    final Set<String> untrackedShards = new HashSet<>();
    addTrackingInfo(indexShardRoutingTable, primaryShard, trackedShards, untrackedShards);
    trackedShards.addAll(staleAllocationIds);
    final ReplicationGroup replicationGroup = new ReplicationGroup(indexShardRoutingTable, inSyncAllocationIds, trackedShards, 0);
    final Set<ShardRouting> expectedReplicas = getExpectedReplicas(shardId, initialState, trackedShards);
    final Map<ShardRouting, Exception> simulatedFailures = new HashMap<>();
    final Map<ShardRouting, Exception> reportedFailures = new HashMap<>();
    for (ShardRouting replica : expectedReplicas) {
        if (randomBoolean()) {
            Exception t;
            boolean criticalFailure = randomBoolean();
            if (criticalFailure) {
                t = new CorruptIndexException("simulated", (String) null);
                reportedFailures.put(replica, t);
            } else {
                t = new IndexShardNotStartedException(shardId, IndexShardState.RECOVERING);
            }
            logger.debug("--> simulating failure on {} with [{}]", replica, t.getClass().getSimpleName());
            simulatedFailures.put(replica, t);
        }
    }
    Request request = new Request(shardId);
    PlainActionFuture<TestPrimary.Result> listener = new PlainActionFuture<>();
    final TestReplicaProxy replicasProxy = new TestReplicaProxy(simulatedFailures);
    final TestPrimary primary = new TestPrimary(primaryShard, () -> replicationGroup, threadPool);
    final TestReplicationOperation op = new TestReplicationOperation(request, primary, listener, replicasProxy, primaryTerm);
    op.execute();
    assertThat("request was not processed on primary", request.processedOnPrimary.get(), equalTo(true));
    assertThat(request.processedOnReplicas, equalTo(expectedReplicas));
    assertThat(replicasProxy.failedReplicas, equalTo(simulatedFailures.keySet()));
    assertThat(replicasProxy.markedAsStaleCopies, equalTo(staleAllocationIds));
    assertThat("post replication operations not run on primary", request.runPostReplicationActionsOnPrimary.get(), equalTo(true));
    assertTrue("listener is not marked as done", listener.isDone());
    ShardInfo shardInfo = listener.actionGet().getShardInfo();
    assertThat(shardInfo.getFailed(), equalTo(reportedFailures.size()));
    assertThat(shardInfo.getFailures(), arrayWithSize(reportedFailures.size()));
    assertThat(shardInfo.getSuccessful(), equalTo(1 + expectedReplicas.size() - simulatedFailures.size()));
    final List<ShardRouting> unassignedShards = indexShardRoutingTable.shardsWithState(ShardRoutingState.UNASSIGNED);
    final int totalShards = 1 + expectedReplicas.size() + unassignedShards.size() + untrackedShards.size();
    assertThat(replicationGroup.toString(), shardInfo.getTotal(), equalTo(totalShards));
    assertThat(primary.knownLocalCheckpoints.remove(primaryShard.allocationId().getId()), equalTo(primary.localCheckpoint));
    assertThat(primary.knownLocalCheckpoints, equalTo(replicasProxy.generatedLocalCheckpoints));
    assertThat(primary.knownGlobalCheckpoints.remove(primaryShard.allocationId().getId()), equalTo(primary.globalCheckpoint));
    assertThat(primary.knownGlobalCheckpoints, equalTo(replicasProxy.generatedGlobalCheckpoints));
}
Also used : IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) HashMap(java.util.HashMap) ReplicationGroup(org.opensearch.index.shard.ReplicationGroup) ShardId(org.opensearch.index.shard.ShardId) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) HashSet(java.util.HashSet) ShardInfo(org.opensearch.action.support.replication.ReplicationResponse.ShardInfo) ClusterState(org.opensearch.cluster.ClusterState) IndexShardNotStartedException(org.opensearch.index.shard.IndexShardNotStartedException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) OpenSearchRejectedExecutionException(org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) NodeClosedException(org.opensearch.node.NodeClosedException) UnavailableShardsException(org.opensearch.action.UnavailableShardsException) RemoteTransportException(org.opensearch.transport.RemoteTransportException) ConnectTransportException(org.opensearch.transport.ConnectTransportException) CircuitBreakingException(org.opensearch.common.breaker.CircuitBreakingException) IndexShardNotStartedException(org.opensearch.index.shard.IndexShardNotStartedException) SendRequestTransportException(org.opensearch.transport.SendRequestTransportException) ExecutionException(java.util.concurrent.ExecutionException) PlainActionFuture(org.opensearch.action.support.PlainActionFuture) ShardRouting(org.opensearch.cluster.routing.ShardRouting)

Example 3 with ShardInfo

use of org.opensearch.action.support.replication.ReplicationResponse.ShardInfo in project OpenSearch by opensearch-project.

the class RandomObjects method randomShardInfo.

/**
 * Returns a tuple that contains a randomized {@link ShardInfo} value (left side) and its corresponding
 * value (right side) after it has been printed out as a {@link ToXContent} and parsed back using a parsing
 * method like {@link ShardInfo#fromXContent(XContentParser)}. A `withShardFailures` parameter indicates if
 * the randomized ShardInfo must or must not contain shard failures.
 *
 * @param random            Random generator
 * @param withShardFailures indicates if the generated ShardInfo must contain shard failures
 */
public static Tuple<ShardInfo, ShardInfo> randomShardInfo(Random random, boolean withShardFailures) {
    int total = randomIntBetween(random, 1, 10);
    if (withShardFailures == false) {
        return Tuple.tuple(new ShardInfo(total, total), new ShardInfo(total, total));
    }
    int successful = randomIntBetween(random, 1, Math.max(1, (total - 1)));
    int failures = Math.max(1, (total - successful));
    Failure[] actualFailures = new Failure[failures];
    Failure[] expectedFailures = new Failure[failures];
    for (int i = 0; i < failures; i++) {
        Tuple<Failure, Failure> failure = randomShardInfoFailure(random);
        actualFailures[i] = failure.v1();
        expectedFailures[i] = failure.v2();
    }
    return Tuple.tuple(new ShardInfo(total, successful, actualFailures), new ShardInfo(total, successful, expectedFailures));
}
Also used : Failure(org.opensearch.action.support.replication.ReplicationResponse.ShardInfo.Failure) ShardInfo(org.opensearch.action.support.replication.ReplicationResponse.ShardInfo)

Example 4 with ShardInfo

use of org.opensearch.action.support.replication.ReplicationResponse.ShardInfo in project OpenSearch by opensearch-project.

the class DocWriteResponseTests method testToXContentDoesntIncludeForcedRefreshUnlessForced.

/**
 * Tests that {@link DocWriteResponse#toXContent(XContentBuilder, ToXContent.Params)} doesn't include {@code forced_refresh} unless it
 * is true. We can't assert this in the yaml tests because "not found" is also "false" there....
 */
public void testToXContentDoesntIncludeForcedRefreshUnlessForced() throws IOException {
    DocWriteResponse response = new DocWriteResponse(new ShardId("index", "uuid", 0), "id", SequenceNumbers.UNASSIGNED_SEQ_NO, 17, 0, Result.CREATED) {
    };
    response.setShardInfo(new ShardInfo(1, 1));
    response.setForcedRefresh(false);
    try (XContentBuilder builder = JsonXContent.contentBuilder()) {
        response.toXContent(builder, ToXContent.EMPTY_PARAMS);
        try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) {
            assertThat(parser.map(), not(hasKey("forced_refresh")));
        }
    }
    response.setForcedRefresh(true);
    try (XContentBuilder builder = JsonXContent.contentBuilder()) {
        response.toXContent(builder, ToXContent.EMPTY_PARAMS);
        try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) {
            assertThat(parser.map(), hasEntry("forced_refresh", true));
        }
    }
}
Also used : ShardId(org.opensearch.index.shard.ShardId) XContentBuilder(org.opensearch.common.xcontent.XContentBuilder) XContentParser(org.opensearch.common.xcontent.XContentParser) ShardInfo(org.opensearch.action.support.replication.ReplicationResponse.ShardInfo)

Example 5 with ShardInfo

use of org.opensearch.action.support.replication.ReplicationResponse.ShardInfo in project OpenSearch by opensearch-project.

the class ReplicationOperationTests method testRetryTransientReplicationFailure.

public void testRetryTransientReplicationFailure() throws Exception {
    final String index = "test";
    final ShardId shardId = new ShardId(index, "_na_", 0);
    ClusterState initialState = stateWithActivePrimary(index, true, randomInt(5));
    IndexMetadata indexMetadata = initialState.getMetadata().index(index);
    final long primaryTerm = indexMetadata.primaryTerm(0);
    final IndexShardRoutingTable indexShardRoutingTable = initialState.getRoutingTable().shardRoutingTable(shardId);
    ShardRouting primaryShard = indexShardRoutingTable.primaryShard();
    if (primaryShard.relocating() && randomBoolean()) {
        // simulate execution of the replication phase on the relocation target node after relocation source was marked as relocated
        initialState = ClusterState.builder(initialState).nodes(DiscoveryNodes.builder(initialState.nodes()).localNodeId(primaryShard.relocatingNodeId())).build();
        primaryShard = primaryShard.getTargetRelocatingShard();
    }
    // add a few in-sync allocation ids that don't have corresponding routing entries
    final Set<String> staleAllocationIds = Sets.newHashSet(generateRandomStringArray(4, 10, false));
    final Set<String> inSyncAllocationIds = Sets.union(indexMetadata.inSyncAllocationIds(0), staleAllocationIds);
    final Set<String> trackedShards = new HashSet<>();
    final Set<String> untrackedShards = new HashSet<>();
    addTrackingInfo(indexShardRoutingTable, primaryShard, trackedShards, untrackedShards);
    trackedShards.addAll(staleAllocationIds);
    final ReplicationGroup replicationGroup = new ReplicationGroup(indexShardRoutingTable, inSyncAllocationIds, trackedShards, 0);
    final Set<ShardRouting> expectedReplicas = getExpectedReplicas(shardId, initialState, trackedShards);
    final Map<ShardRouting, Exception> simulatedFailures = new HashMap<>();
    for (ShardRouting replica : expectedReplicas) {
        Exception cause;
        Exception exception;
        if (randomBoolean()) {
            if (randomBoolean()) {
                cause = new CircuitBreakingException("broken", CircuitBreaker.Durability.PERMANENT);
            } else {
                cause = new OpenSearchRejectedExecutionException("rejected");
            }
            exception = new RemoteTransportException("remote", cause);
        } else {
            TransportAddress address = new TransportAddress(InetAddress.getLoopbackAddress(), 9300);
            DiscoveryNode node = new DiscoveryNode("replica", address, Version.CURRENT);
            cause = new ConnectTransportException(node, "broken");
            exception = cause;
        }
        logger.debug("--> simulating failure on {} with [{}]", replica, exception.getClass().getSimpleName());
        simulatedFailures.put(replica, exception);
    }
    Request request = new Request(shardId);
    PlainActionFuture<TestPrimary.Result> listener = new PlainActionFuture<>();
    final TestReplicaProxy replicasProxy = new TestReplicaProxy(simulatedFailures, true);
    final TestPrimary primary = new TestPrimary(primaryShard, () -> replicationGroup, threadPool);
    final TestReplicationOperation op = new TestReplicationOperation(request, primary, listener, replicasProxy, primaryTerm, TimeValue.timeValueMillis(20), TimeValue.timeValueSeconds(60));
    op.execute();
    assertThat("request was not processed on primary", request.processedOnPrimary.get(), equalTo(true));
    assertThat(request.processedOnReplicas, equalTo(expectedReplicas));
    assertThat(replicasProxy.failedReplicas.size(), equalTo(0));
    assertThat(replicasProxy.markedAsStaleCopies, equalTo(staleAllocationIds));
    assertThat("post replication operations not run on primary", request.runPostReplicationActionsOnPrimary.get(), equalTo(true));
    ShardInfo shardInfo = listener.actionGet().getShardInfo();
    assertThat(shardInfo.getSuccessful(), equalTo(1 + expectedReplicas.size()));
    final List<ShardRouting> unassignedShards = indexShardRoutingTable.shardsWithState(ShardRoutingState.UNASSIGNED);
    final int totalShards = 1 + expectedReplicas.size() + unassignedShards.size() + untrackedShards.size();
    assertThat(replicationGroup.toString(), shardInfo.getTotal(), equalTo(totalShards));
    assertThat(primary.knownLocalCheckpoints.remove(primaryShard.allocationId().getId()), equalTo(primary.localCheckpoint));
    assertThat(primary.knownLocalCheckpoints, equalTo(replicasProxy.generatedLocalCheckpoints));
    assertThat(primary.knownGlobalCheckpoints.remove(primaryShard.allocationId().getId()), equalTo(primary.globalCheckpoint));
    assertThat(primary.knownGlobalCheckpoints, equalTo(replicasProxy.generatedGlobalCheckpoints));
}
Also used : IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) HashMap(java.util.HashMap) TransportAddress(org.opensearch.common.transport.TransportAddress) OpenSearchRejectedExecutionException(org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException) ReplicationGroup(org.opensearch.index.shard.ReplicationGroup) ShardId(org.opensearch.index.shard.ShardId) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) HashSet(java.util.HashSet) ShardInfo(org.opensearch.action.support.replication.ReplicationResponse.ShardInfo) ClusterState(org.opensearch.cluster.ClusterState) RemoteTransportException(org.opensearch.transport.RemoteTransportException) OpenSearchRejectedExecutionException(org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) NodeClosedException(org.opensearch.node.NodeClosedException) UnavailableShardsException(org.opensearch.action.UnavailableShardsException) RemoteTransportException(org.opensearch.transport.RemoteTransportException) ConnectTransportException(org.opensearch.transport.ConnectTransportException) CircuitBreakingException(org.opensearch.common.breaker.CircuitBreakingException) IndexShardNotStartedException(org.opensearch.index.shard.IndexShardNotStartedException) SendRequestTransportException(org.opensearch.transport.SendRequestTransportException) ExecutionException(java.util.concurrent.ExecutionException) ConnectTransportException(org.opensearch.transport.ConnectTransportException) PlainActionFuture(org.opensearch.action.support.PlainActionFuture) CircuitBreakingException(org.opensearch.common.breaker.CircuitBreakingException) ShardRouting(org.opensearch.cluster.routing.ShardRouting)

Aggregations

ShardInfo (org.opensearch.action.support.replication.ReplicationResponse.ShardInfo)8 ShardId (org.opensearch.index.shard.ShardId)4 ExecutionException (java.util.concurrent.ExecutionException)3 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)3 AlreadyClosedException (org.apache.lucene.store.AlreadyClosedException)3 UnavailableShardsException (org.opensearch.action.UnavailableShardsException)3 PlainActionFuture (org.opensearch.action.support.PlainActionFuture)3 ClusterState (org.opensearch.cluster.ClusterState)3 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)3 IndexShardRoutingTable (org.opensearch.cluster.routing.IndexShardRoutingTable)3 ShardRouting (org.opensearch.cluster.routing.ShardRouting)3 CircuitBreakingException (org.opensearch.common.breaker.CircuitBreakingException)3 OpenSearchRejectedExecutionException (org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException)3 IndexShardNotStartedException (org.opensearch.index.shard.IndexShardNotStartedException)3 ReplicationGroup (org.opensearch.index.shard.ReplicationGroup)3 NodeClosedException (org.opensearch.node.NodeClosedException)3 ConnectTransportException (org.opensearch.transport.ConnectTransportException)3 RemoteTransportException (org.opensearch.transport.RemoteTransportException)3 SendRequestTransportException (org.opensearch.transport.SendRequestTransportException)3 HashMap (java.util.HashMap)2