use of org.opensearch.index.shard.ReplicationGroup in project OpenSearch by opensearch-project.
the class TransportVerifyShardBeforeCloseActionTests method testUnavailableShardsMarkedAsStale.
public void testUnavailableShardsMarkedAsStale() throws Exception {
final String index = "test";
final ShardId shardId = new ShardId(index, "_na_", 0);
final int nbReplicas = randomIntBetween(1, 10);
final ShardRoutingState[] replicaStates = new ShardRoutingState[nbReplicas];
for (int i = 0; i < replicaStates.length; i++) {
replicaStates[i] = ShardRoutingState.STARTED;
}
final ClusterState clusterState = state(index, true, ShardRoutingState.STARTED, replicaStates);
setState(clusterService, clusterState);
IndexShardRoutingTable shardRoutingTable = clusterState.routingTable().index(index).shard(shardId.id());
final IndexMetadata indexMetadata = clusterState.getMetadata().index(index);
final ShardRouting primaryRouting = shardRoutingTable.primaryShard();
final long primaryTerm = indexMetadata.primaryTerm(0);
final Set<String> inSyncAllocationIds = indexMetadata.inSyncAllocationIds(0);
final Set<String> trackedShards = shardRoutingTable.getAllAllocationIds();
List<ShardRouting> unavailableShards = randomSubsetOf(randomIntBetween(1, nbReplicas), shardRoutingTable.replicaShards());
IndexShardRoutingTable.Builder shardRoutingTableBuilder = new IndexShardRoutingTable.Builder(shardRoutingTable);
unavailableShards.forEach(shardRoutingTableBuilder::removeShard);
shardRoutingTable = shardRoutingTableBuilder.build();
final ReplicationGroup replicationGroup = new ReplicationGroup(shardRoutingTable, inSyncAllocationIds, trackedShards, 0);
assertThat(replicationGroup.getUnavailableInSyncShards().size(), greaterThan(0));
final PlainActionFuture<PrimaryResult> listener = new PlainActionFuture<>();
TaskId taskId = new TaskId(clusterService.localNode().getId(), 0L);
TransportVerifyShardBeforeCloseAction.ShardRequest request = new TransportVerifyShardBeforeCloseAction.ShardRequest(shardId, clusterBlock, false, taskId);
ReplicationOperation.Replicas<TransportVerifyShardBeforeCloseAction.ShardRequest> proxy = action.newReplicasProxy();
ReplicationOperation<TransportVerifyShardBeforeCloseAction.ShardRequest, TransportVerifyShardBeforeCloseAction.ShardRequest, PrimaryResult> operation = new ReplicationOperation<>(request, createPrimary(primaryRouting, replicationGroup), listener, proxy, logger, threadPool, "test", primaryTerm, TimeValue.timeValueMillis(20), TimeValue.timeValueSeconds(60));
operation.execute();
final CapturingTransport.CapturedRequest[] capturedRequests = transport.getCapturedRequestsAndClear();
assertThat(capturedRequests.length, equalTo(nbReplicas));
for (CapturingTransport.CapturedRequest capturedRequest : capturedRequests) {
final String actionName = capturedRequest.action;
if (actionName.startsWith(ShardStateAction.SHARD_FAILED_ACTION_NAME)) {
assertThat(capturedRequest.request, instanceOf(ShardStateAction.FailedShardEntry.class));
String allocationId = ((ShardStateAction.FailedShardEntry) capturedRequest.request).getAllocationId();
assertTrue(unavailableShards.stream().anyMatch(shardRouting -> shardRouting.allocationId().getId().equals(allocationId)));
transport.handleResponse(capturedRequest.requestId, TransportResponse.Empty.INSTANCE);
} else if (actionName.startsWith(TransportVerifyShardBeforeCloseAction.NAME)) {
assertThat(capturedRequest.request, instanceOf(ConcreteShardRequest.class));
String allocationId = ((ConcreteShardRequest) capturedRequest.request).getTargetAllocationID();
assertFalse(unavailableShards.stream().anyMatch(shardRouting -> shardRouting.allocationId().getId().equals(allocationId)));
assertTrue(inSyncAllocationIds.stream().anyMatch(inSyncAllocationId -> inSyncAllocationId.equals(allocationId)));
transport.handleResponse(capturedRequest.requestId, new TransportReplicationAction.ReplicaResponse(0L, 0L));
} else {
fail("Test does not support action " + capturedRequest.action);
}
}
final ReplicationResponse.ShardInfo shardInfo = listener.get().getShardInfo();
assertThat(shardInfo.getFailed(), equalTo(0));
assertThat(shardInfo.getFailures(), arrayWithSize(0));
assertThat(shardInfo.getSuccessful(), equalTo(1 + nbReplicas - unavailableShards.size()));
}
use of org.opensearch.index.shard.ReplicationGroup in project OpenSearch by opensearch-project.
the class ReplicationOperationTests method testWaitForActiveShards.
public void testWaitForActiveShards() throws Exception {
final String index = "test";
final ShardId shardId = new ShardId(index, "_na_", 0);
final int assignedReplicas = randomInt(2);
final int unassignedReplicas = randomInt(2);
final int totalShards = 1 + assignedReplicas + unassignedReplicas;
final int activeShardCount = randomIntBetween(0, totalShards);
Request request = new Request(shardId).waitForActiveShards(activeShardCount == totalShards ? ActiveShardCount.ALL : ActiveShardCount.from(activeShardCount));
final boolean passesActiveShardCheck = activeShardCount <= assignedReplicas + 1;
ShardRoutingState[] replicaStates = new ShardRoutingState[assignedReplicas + unassignedReplicas];
for (int i = 0; i < assignedReplicas; i++) {
replicaStates[i] = randomFrom(ShardRoutingState.STARTED, ShardRoutingState.RELOCATING);
}
for (int i = assignedReplicas; i < replicaStates.length; i++) {
replicaStates[i] = ShardRoutingState.UNASSIGNED;
}
final ClusterState state = state(index, true, ShardRoutingState.STARTED, replicaStates);
logger.debug("using active shard count of [{}], assigned shards [{}], total shards [{}]." + " expecting op to [{}]. using state: \n{}", request.waitForActiveShards(), 1 + assignedReplicas, 1 + assignedReplicas + unassignedReplicas, passesActiveShardCheck ? "succeed" : "retry", state);
final long primaryTerm = state.metadata().index(index).primaryTerm(shardId.id());
final IndexShardRoutingTable shardRoutingTable = state.routingTable().index(index).shard(shardId.id());
final Set<String> inSyncAllocationIds = state.metadata().index(index).inSyncAllocationIds(0);
Set<String> trackedShards = new HashSet<>();
addTrackingInfo(shardRoutingTable, null, trackedShards, new HashSet<>());
final ReplicationGroup initialReplicationGroup = new ReplicationGroup(shardRoutingTable, inSyncAllocationIds, trackedShards, 0);
PlainActionFuture<TestPrimary.Result> listener = new PlainActionFuture<>();
final ShardRouting primaryShard = shardRoutingTable.primaryShard();
final TestReplicationOperation op = new TestReplicationOperation(request, new TestPrimary(primaryShard, () -> initialReplicationGroup, threadPool), listener, new TestReplicaProxy(), logger, threadPool, "test", primaryTerm);
if (passesActiveShardCheck) {
assertThat(op.checkActiveShardCount(), nullValue());
op.execute();
assertTrue("operations should have been performed, active shard count is met", request.processedOnPrimary.get());
} else {
assertThat(op.checkActiveShardCount(), notNullValue());
op.execute();
assertFalse("operations should not have been perform, active shard count is *NOT* met", request.processedOnPrimary.get());
assertListenerThrows("should throw exception to trigger retry", listener, UnavailableShardsException.class);
}
}
use of org.opensearch.index.shard.ReplicationGroup in project OpenSearch by opensearch-project.
the class ReplicationOperationTests method testAddedReplicaAfterPrimaryOperation.
public void testAddedReplicaAfterPrimaryOperation() throws Exception {
final String index = "test";
final ShardId shardId = new ShardId(index, "_na_", 0);
final ClusterState initialState = stateWithActivePrimary(index, true, 0);
Set<String> inSyncAllocationIds = initialState.metadata().index(index).inSyncAllocationIds(0);
IndexShardRoutingTable shardRoutingTable = initialState.getRoutingTable().shardRoutingTable(shardId);
Set<String> trackedShards = new HashSet<>();
addTrackingInfo(shardRoutingTable, null, trackedShards, new HashSet<>());
ReplicationGroup initialReplicationGroup = new ReplicationGroup(shardRoutingTable, inSyncAllocationIds, trackedShards, 0);
final ClusterState stateWithAddedReplicas;
if (randomBoolean()) {
stateWithAddedReplicas = state(index, true, ShardRoutingState.STARTED, randomBoolean() ? ShardRoutingState.INITIALIZING : ShardRoutingState.STARTED);
} else {
stateWithAddedReplicas = state(index, true, ShardRoutingState.RELOCATING);
}
inSyncAllocationIds = stateWithAddedReplicas.metadata().index(index).inSyncAllocationIds(0);
shardRoutingTable = stateWithAddedReplicas.getRoutingTable().shardRoutingTable(shardId);
trackedShards = new HashSet<>();
addTrackingInfo(shardRoutingTable, null, trackedShards, new HashSet<>());
ReplicationGroup updatedReplicationGroup = new ReplicationGroup(shardRoutingTable, inSyncAllocationIds, trackedShards, 0);
final AtomicReference<ReplicationGroup> replicationGroup = new AtomicReference<>(initialReplicationGroup);
logger.debug("--> using initial replicationGroup:\n{}", replicationGroup.get());
final long primaryTerm = initialState.getMetadata().index(shardId.getIndexName()).primaryTerm(shardId.id());
final ShardRouting primaryShard = updatedReplicationGroup.getRoutingTable().primaryShard();
final TestPrimary primary = new TestPrimary(primaryShard, replicationGroup::get, threadPool) {
@Override
public void perform(Request request, ActionListener<Result> listener) {
super.perform(request, ActionListener.map(listener, result -> {
replicationGroup.set(updatedReplicationGroup);
logger.debug("--> state after primary operation:\n{}", replicationGroup.get());
return result;
}));
}
};
Request request = new Request(shardId);
PlainActionFuture<TestPrimary.Result> listener = new PlainActionFuture<>();
final TestReplicationOperation op = new TestReplicationOperation(request, primary, listener, new TestReplicaProxy(), primaryTerm);
op.execute();
assertThat("request was not processed on primary", request.processedOnPrimary.get(), equalTo(true));
Set<ShardRouting> expectedReplicas = getExpectedReplicas(shardId, stateWithAddedReplicas, trackedShards);
assertThat(request.processedOnReplicas, equalTo(expectedReplicas));
}
use of org.opensearch.index.shard.ReplicationGroup in project OpenSearch by opensearch-project.
the class ReplicationOperationTests method testPrimaryFailureHandlingReplicaResponse.
public void testPrimaryFailureHandlingReplicaResponse() throws Exception {
final String index = "test";
final ShardId shardId = new ShardId(index, "_na_", 0);
final Request request = new Request(shardId);
final ClusterState state = stateWithActivePrimary(index, true, 1, 0);
final IndexMetadata indexMetadata = state.getMetadata().index(index);
final long primaryTerm = indexMetadata.primaryTerm(0);
final ShardRouting primaryRouting = state.getRoutingTable().shardRoutingTable(shardId).primaryShard();
final Set<String> inSyncAllocationIds = indexMetadata.inSyncAllocationIds(0);
final IndexShardRoutingTable shardRoutingTable = state.routingTable().index(index).shard(shardId.id());
final Set<String> trackedShards = shardRoutingTable.getAllAllocationIds();
final ReplicationGroup initialReplicationGroup = new ReplicationGroup(shardRoutingTable, inSyncAllocationIds, trackedShards, 0);
final boolean fatal = randomBoolean();
final AtomicBoolean primaryFailed = new AtomicBoolean();
final ReplicationOperation.Primary<Request, Request, TestPrimary.Result> primary = new TestPrimary(primaryRouting, () -> initialReplicationGroup, threadPool) {
@Override
public void failShard(String message, Exception exception) {
primaryFailed.set(true);
}
@Override
public void updateLocalCheckpointForShard(String allocationId, long checkpoint) {
if (primaryRouting.allocationId().getId().equals(allocationId)) {
super.updateLocalCheckpointForShard(allocationId, checkpoint);
} else {
if (fatal) {
throw new NullPointerException();
} else {
throw new AlreadyClosedException("already closed");
}
}
}
};
final PlainActionFuture<TestPrimary.Result> listener = new PlainActionFuture<>();
final ReplicationOperation.Replicas<Request> replicas = new TestReplicaProxy(Collections.emptyMap());
TestReplicationOperation operation = new TestReplicationOperation(request, primary, listener, replicas, primaryTerm);
operation.execute();
assertThat(primaryFailed.get(), equalTo(fatal));
final ShardInfo shardInfo = listener.actionGet().getShardInfo();
assertThat(shardInfo.getFailed(), equalTo(0));
assertThat(shardInfo.getFailures(), arrayWithSize(0));
assertThat(shardInfo.getSuccessful(), equalTo(1 + getExpectedReplicas(shardId, state, trackedShards).size()));
}
use of org.opensearch.index.shard.ReplicationGroup in project OpenSearch by opensearch-project.
the class ReplicationOperationTests method testNoLongerPrimary.
public void testNoLongerPrimary() throws Exception {
final String index = "test";
final ShardId shardId = new ShardId(index, "_na_", 0);
ClusterState initialState = stateWithActivePrimary(index, true, 1 + randomInt(2), randomInt(2));
IndexMetadata indexMetadata = initialState.getMetadata().index(index);
final long primaryTerm = indexMetadata.primaryTerm(0);
final IndexShardRoutingTable indexShardRoutingTable = initialState.getRoutingTable().shardRoutingTable(shardId);
ShardRouting primaryShard = indexShardRoutingTable.primaryShard();
if (primaryShard.relocating() && randomBoolean()) {
// simulate execution of the replication phase on the relocation target node after relocation source was marked as relocated
initialState = ClusterState.builder(initialState).nodes(DiscoveryNodes.builder(initialState.nodes()).localNodeId(primaryShard.relocatingNodeId())).build();
primaryShard = primaryShard.getTargetRelocatingShard();
}
// add an in-sync allocation id that doesn't have a corresponding routing entry
final Set<String> staleAllocationIds = Sets.newHashSet(randomAlphaOfLength(10));
final Set<String> inSyncAllocationIds = Sets.union(indexMetadata.inSyncAllocationIds(0), staleAllocationIds);
final Set<String> trackedShards = new HashSet<>();
addTrackingInfo(indexShardRoutingTable, primaryShard, trackedShards, new HashSet<>());
trackedShards.addAll(staleAllocationIds);
final ReplicationGroup replicationGroup = new ReplicationGroup(indexShardRoutingTable, inSyncAllocationIds, trackedShards, 0);
final Set<ShardRouting> expectedReplicas = getExpectedReplicas(shardId, initialState, trackedShards);
final Map<ShardRouting, Exception> expectedFailures = new HashMap<>();
if (expectedReplicas.isEmpty()) {
return;
}
final ShardRouting failedReplica = randomFrom(new ArrayList<>(expectedReplicas));
expectedFailures.put(failedReplica, new CorruptIndexException("simulated", (String) null));
Request request = new Request(shardId);
PlainActionFuture<TestPrimary.Result> listener = new PlainActionFuture<>();
final boolean testPrimaryDemotedOnStaleShardCopies = randomBoolean();
final Exception shardActionFailure;
if (randomBoolean()) {
shardActionFailure = new NodeClosedException(new DiscoveryNode("foo", buildNewFakeTransportAddress(), Version.CURRENT));
} else if (randomBoolean()) {
DiscoveryNode node = new DiscoveryNode("foo", buildNewFakeTransportAddress(), Version.CURRENT);
shardActionFailure = new SendRequestTransportException(node, ShardStateAction.SHARD_FAILED_ACTION_NAME, new NodeClosedException(node));
} else {
shardActionFailure = new ShardStateAction.NoLongerPrimaryShardException(failedReplica.shardId(), "the king is dead");
}
final TestReplicaProxy replicasProxy = new TestReplicaProxy(expectedFailures) {
@Override
public void failShardIfNeeded(ShardRouting replica, long primaryTerm, String message, Exception exception, ActionListener<Void> shardActionListener) {
if (testPrimaryDemotedOnStaleShardCopies) {
super.failShardIfNeeded(replica, primaryTerm, message, exception, shardActionListener);
} else {
assertThat(replica, equalTo(failedReplica));
shardActionListener.onFailure(shardActionFailure);
}
}
@Override
public void markShardCopyAsStaleIfNeeded(ShardId shardId, String allocationId, long primaryTerm, ActionListener<Void> shardActionListener) {
if (testPrimaryDemotedOnStaleShardCopies) {
shardActionListener.onFailure(shardActionFailure);
} else {
super.markShardCopyAsStaleIfNeeded(shardId, allocationId, primaryTerm, shardActionListener);
}
}
};
AtomicBoolean primaryFailed = new AtomicBoolean();
final TestPrimary primary = new TestPrimary(primaryShard, () -> replicationGroup, threadPool) {
@Override
public void failShard(String message, Exception exception) {
assertThat(exception, instanceOf(ShardStateAction.NoLongerPrimaryShardException.class));
assertTrue(primaryFailed.compareAndSet(false, true));
}
};
final TestReplicationOperation op = new TestReplicationOperation(request, primary, listener, replicasProxy, primaryTerm);
op.execute();
assertThat("request was not processed on primary", request.processedOnPrimary.get(), equalTo(true));
assertTrue("listener is not marked as done", listener.isDone());
if (shardActionFailure instanceof ShardStateAction.NoLongerPrimaryShardException) {
assertTrue(primaryFailed.get());
} else {
assertFalse(primaryFailed.get());
}
assertListenerThrows("should throw exception to trigger retry", listener, ReplicationOperation.RetryOnPrimaryException.class);
}
Aggregations