use of org.elasticsearch.index.shard.ReplicationGroup in project crate by crate.
the class ReplicationOperationTests method testNoLongerPrimary.
@Test
public void testNoLongerPrimary() throws Exception {
final String index = "test";
final ShardId shardId = new ShardId(index, "_na_", 0);
ClusterState initialState = stateWithActivePrimary(index, true, 1 + randomInt(2), randomInt(2));
IndexMetadata indexMetadata = initialState.getMetadata().index(index);
final long primaryTerm = indexMetadata.primaryTerm(0);
final IndexShardRoutingTable indexShardRoutingTable = initialState.getRoutingTable().shardRoutingTable(shardId);
ShardRouting primaryShard = indexShardRoutingTable.primaryShard();
if (primaryShard.relocating() && randomBoolean()) {
// simulate execution of the replication phase on the relocation target node after relocation source was marked as relocated
initialState = ClusterState.builder(initialState).nodes(DiscoveryNodes.builder(initialState.nodes()).localNodeId(primaryShard.relocatingNodeId())).build();
primaryShard = primaryShard.getTargetRelocatingShard();
}
// add an in-sync allocation id that doesn't have a corresponding routing entry
final Set<String> staleAllocationIds = Set.of(randomAlphaOfLength(10));
final Set<String> inSyncAllocationIds = Sets.union(indexMetadata.inSyncAllocationIds(0), staleAllocationIds);
final Set<String> trackedShards = new HashSet<>();
addTrackingInfo(indexShardRoutingTable, primaryShard, trackedShards, new HashSet<>());
trackedShards.addAll(staleAllocationIds);
final ReplicationGroup replicationGroup = new ReplicationGroup(indexShardRoutingTable, inSyncAllocationIds, trackedShards);
final Set<ShardRouting> expectedReplicas = getExpectedReplicas(shardId, initialState, trackedShards);
final Map<ShardRouting, Exception> expectedFailures = new HashMap<>();
if (expectedReplicas.isEmpty()) {
return;
}
final ShardRouting failedReplica = randomFrom(new ArrayList<>(expectedReplicas));
expectedFailures.put(failedReplica, new CorruptIndexException("simulated", (String) null));
Request request = new Request(shardId);
PlainActionFuture<TestPrimary.Result> listener = new PlainActionFuture<>();
final boolean testPrimaryDemotedOnStaleShardCopies = randomBoolean();
final Exception shardActionFailure;
if (randomBoolean()) {
shardActionFailure = new NodeClosedException(new DiscoveryNode("foo", buildNewFakeTransportAddress(), Version.CURRENT));
} else if (randomBoolean()) {
DiscoveryNode node = new DiscoveryNode("foo", buildNewFakeTransportAddress(), Version.CURRENT);
shardActionFailure = new SendRequestTransportException(node, ShardStateAction.SHARD_FAILED_ACTION_NAME, new NodeClosedException(node));
} else {
shardActionFailure = new ShardStateAction.NoLongerPrimaryShardException(failedReplica.shardId(), "the king is dead");
}
final TestReplicaProxy replicasProxy = new TestReplicaProxy(expectedFailures) {
@Override
public void failShardIfNeeded(ShardRouting replica, long primaryTerm, String message, Exception exception, ActionListener<Void> shardActionListener) {
if (testPrimaryDemotedOnStaleShardCopies) {
super.failShardIfNeeded(replica, primaryTerm, message, exception, shardActionListener);
} else {
assertThat(replica, equalTo(failedReplica));
shardActionListener.onFailure(shardActionFailure);
}
}
@Override
public void markShardCopyAsStaleIfNeeded(ShardId shardId, String allocationId, long primaryTerm, ActionListener<Void> shardActionListener) {
if (testPrimaryDemotedOnStaleShardCopies) {
shardActionListener.onFailure(shardActionFailure);
} else {
super.markShardCopyAsStaleIfNeeded(shardId, allocationId, primaryTerm, shardActionListener);
}
}
};
AtomicBoolean primaryFailed = new AtomicBoolean();
final TestPrimary primary = new TestPrimary(primaryShard, () -> replicationGroup) {
@Override
public void failShard(String message, Exception exception) {
assertTrue(primaryFailed.compareAndSet(false, true));
}
};
final TestReplicationOperation op = new TestReplicationOperation(request, primary, listener, replicasProxy, primaryTerm);
op.execute();
assertThat("request was not processed on primary", request.processedOnPrimary.get(), equalTo(true));
assertTrue("listener is not marked as done", listener.isDone());
if (shardActionFailure instanceof ShardStateAction.NoLongerPrimaryShardException) {
assertTrue(primaryFailed.get());
} else {
assertFalse(primaryFailed.get());
}
assertListenerThrows("should throw exception to trigger retry", listener, ReplicationOperation.RetryOnPrimaryException.class);
}
use of org.elasticsearch.index.shard.ReplicationGroup in project crate by crate.
the class ReplicationOperationTests method testWaitForActiveShards.
@Test
public void testWaitForActiveShards() throws Exception {
final String index = "test";
final ShardId shardId = new ShardId(index, "_na_", 0);
final int assignedReplicas = randomInt(2);
final int unassignedReplicas = randomInt(2);
final int totalShards = 1 + assignedReplicas + unassignedReplicas;
final int activeShardCount = randomIntBetween(0, totalShards);
Request request = new Request(shardId).waitForActiveShards(activeShardCount == totalShards ? ActiveShardCount.ALL : ActiveShardCount.from(activeShardCount));
final boolean passesActiveShardCheck = activeShardCount <= assignedReplicas + 1;
ShardRoutingState[] replicaStates = new ShardRoutingState[assignedReplicas + unassignedReplicas];
for (int i = 0; i < assignedReplicas; i++) {
replicaStates[i] = randomFrom(ShardRoutingState.STARTED, ShardRoutingState.RELOCATING);
}
for (int i = assignedReplicas; i < replicaStates.length; i++) {
replicaStates[i] = ShardRoutingState.UNASSIGNED;
}
final ClusterState state = state(index, true, ShardRoutingState.STARTED, replicaStates);
logger.debug("using active shard count of [{}], assigned shards [{}], total shards [{}]." + " expecting op to [{}]. using state: \n{}", request.waitForActiveShards(), 1 + assignedReplicas, 1 + assignedReplicas + unassignedReplicas, passesActiveShardCheck ? "succeed" : "retry", state);
final long primaryTerm = state.metadata().index(index).primaryTerm(shardId.id());
final IndexShardRoutingTable shardRoutingTable = state.routingTable().index(index).shard(shardId.id());
final Set<String> inSyncAllocationIds = state.metadata().index(index).inSyncAllocationIds(0);
Set<String> trackedShards = new HashSet<>();
addTrackingInfo(shardRoutingTable, null, trackedShards, new HashSet<>());
final ReplicationGroup initialReplicationGroup = new ReplicationGroup(shardRoutingTable, inSyncAllocationIds, trackedShards);
PlainActionFuture<TestPrimary.Result> listener = new PlainActionFuture<>();
final ShardRouting primaryShard = shardRoutingTable.primaryShard();
final TestReplicationOperation op = new TestReplicationOperation(request, new TestPrimary(primaryShard, () -> initialReplicationGroup), listener, new TestReplicaProxy(), logger, "test", primaryTerm);
if (passesActiveShardCheck) {
assertThat(op.checkActiveShardCount(), nullValue());
op.execute();
assertTrue("operations should have been performed, active shard count is met", request.processedOnPrimary.get());
} else {
assertThat(op.checkActiveShardCount(), notNullValue());
op.execute();
assertFalse("operations should not have been perform, active shard count is *NOT* met", request.processedOnPrimary.get());
assertListenerThrows("should throw exception to trigger retry", listener, UnavailableShardsException.class);
}
}
use of org.elasticsearch.index.shard.ReplicationGroup in project crate by crate.
the class ReplicationOperationTests method testReplication.
@Test
public void testReplication() throws Exception {
final String index = "test";
final ShardId shardId = new ShardId(index, "_na_", 0);
ClusterState initialState = stateWithActivePrimary(index, true, randomInt(5));
IndexMetadata indexMetadata = initialState.getMetadata().index(index);
final long primaryTerm = indexMetadata.primaryTerm(0);
final IndexShardRoutingTable indexShardRoutingTable = initialState.getRoutingTable().shardRoutingTable(shardId);
ShardRouting primaryShard = indexShardRoutingTable.primaryShard();
if (primaryShard.relocating() && randomBoolean()) {
// simulate execution of the replication phase on the relocation target node after relocation source was marked as relocated
initialState = ClusterState.builder(initialState).nodes(DiscoveryNodes.builder(initialState.nodes()).localNodeId(primaryShard.relocatingNodeId())).build();
primaryShard = primaryShard.getTargetRelocatingShard();
}
// add a few in-sync allocation ids that don't have corresponding routing entries
final Set<String> staleAllocationIds = Set.of(generateRandomStringArray(4, 10, false));
final Set<String> inSyncAllocationIds = Sets.union(indexMetadata.inSyncAllocationIds(0), staleAllocationIds);
final Set<String> trackedShards = new HashSet<>();
final Set<String> untrackedShards = new HashSet<>();
addTrackingInfo(indexShardRoutingTable, primaryShard, trackedShards, untrackedShards);
trackedShards.addAll(staleAllocationIds);
final ReplicationGroup replicationGroup = new ReplicationGroup(indexShardRoutingTable, inSyncAllocationIds, trackedShards);
final Set<ShardRouting> expectedReplicas = getExpectedReplicas(shardId, initialState, trackedShards);
final Map<ShardRouting, Exception> simulatedFailures = new HashMap<>();
final Map<ShardRouting, Exception> reportedFailures = new HashMap<>();
for (ShardRouting replica : expectedReplicas) {
if (randomBoolean()) {
Exception t;
boolean criticalFailure = randomBoolean();
if (criticalFailure) {
t = new CorruptIndexException("simulated", (String) null);
reportedFailures.put(replica, t);
} else {
t = new IndexShardNotStartedException(shardId, IndexShardState.RECOVERING);
}
logger.debug("--> simulating failure on {} with [{}]", replica, t.getClass().getSimpleName());
simulatedFailures.put(replica, t);
}
}
Request request = new Request(shardId);
PlainActionFuture<TestPrimary.Result> listener = new PlainActionFuture<>();
final TestReplicaProxy replicasProxy = new TestReplicaProxy(simulatedFailures);
final TestPrimary primary = new TestPrimary(primaryShard, () -> replicationGroup);
final TestReplicationOperation op = new TestReplicationOperation(request, primary, listener, replicasProxy, primaryTerm);
op.execute();
assertThat("request was not processed on primary", request.processedOnPrimary.get(), equalTo(true));
assertThat(request.processedOnReplicas, equalTo(expectedReplicas));
assertThat(replicasProxy.failedReplicas, equalTo(simulatedFailures.keySet()));
assertThat(replicasProxy.markedAsStaleCopies, equalTo(staleAllocationIds));
assertThat("post replication operations not run on primary", request.runPostReplicationActionsOnPrimary.get(), equalTo(true));
assertTrue("listener is not marked as done", listener.isDone());
ShardInfo shardInfo = listener.actionGet().getShardInfo();
assertThat(shardInfo.getFailed(), equalTo(reportedFailures.size()));
assertThat(shardInfo.getFailures(), arrayWithSize(reportedFailures.size()));
assertThat(shardInfo.getSuccessful(), equalTo(1 + expectedReplicas.size() - simulatedFailures.size()));
final List<ShardRouting> unassignedShards = indexShardRoutingTable.shardsWithState(ShardRoutingState.UNASSIGNED);
final int totalShards = 1 + expectedReplicas.size() + unassignedShards.size() + untrackedShards.size();
assertThat(replicationGroup.toString(), shardInfo.getTotal(), equalTo(totalShards));
assertThat(primary.knownLocalCheckpoints.remove(primaryShard.allocationId().getId()), equalTo(primary.localCheckpoint));
assertThat(primary.knownLocalCheckpoints, equalTo(replicasProxy.generatedLocalCheckpoints));
assertThat(primary.knownGlobalCheckpoints.remove(primaryShard.allocationId().getId()), equalTo(primary.globalCheckpoint));
assertThat(primary.knownGlobalCheckpoints, equalTo(replicasProxy.generatedGlobalCheckpoints));
}
use of org.elasticsearch.index.shard.ReplicationGroup in project crate by crate.
the class ReplicationOperation method handlePrimaryResult.
private void handlePrimaryResult(final PrimaryResultT primaryResult) {
this.primaryResult = primaryResult;
final ReplicaRequest replicaRequest = primaryResult.replicaRequest();
if (replicaRequest != null) {
if (logger.isTraceEnabled()) {
logger.trace("[{}] op [{}] completed on primary for request [{}]", primary.routingEntry().shardId(), opType, request);
}
// we have to get the replication group after successfully indexing into the primary in order to honour recovery semantics.
// we have to make sure that every operation indexed into the primary after recovery start will also be replicated
// to the recovery target. If we used an old replication group, we may miss a recovery that has started since then.
// we also have to make sure to get the global checkpoint before the replication group, to ensure that the global checkpoint
// is valid for this replication group. If we would sample in the reverse, the global checkpoint might be based on a subset
// of the sampled replication group, and advanced further than what the given replication group would allow it to.
// This would entail that some shards could learn about a global checkpoint that would be higher than its local checkpoint.
final long globalCheckpoint = primary.computedGlobalCheckpoint();
// we have to capture the max_seq_no_of_updates after this request was completed on the primary to make sure the value of
// max_seq_no_of_updates on replica when this request is executed is at least the value on the primary when it was executed on.
final long maxSeqNoOfUpdatesOrDeletes = primary.maxSeqNoOfUpdatesOrDeletes();
assert maxSeqNoOfUpdatesOrDeletes != SequenceNumbers.UNASSIGNED_SEQ_NO : "seqno_of_updates still uninitialized";
final ReplicationGroup replicationGroup = primary.getReplicationGroup();
markUnavailableShardsAsStale(replicaRequest, replicationGroup);
performOnReplicas(replicaRequest, globalCheckpoint, maxSeqNoOfUpdatesOrDeletes, replicationGroup);
}
primaryResult.runPostReplicationActions(new ActionListener<>() {
@Override
public void onResponse(Void aVoid) {
successfulShards.incrementAndGet();
try {
updateCheckPoints(primary.routingEntry(), primary::localCheckpoint, primary::globalCheckpoint);
} finally {
decPendingAndFinishIfNeeded();
}
}
@Override
public void onFailure(Exception e) {
logger.trace("[{}] op [{}] post replication actions failed for [{}]", primary.routingEntry().shardId(), opType, request);
// TODO: fail shard? This will otherwise have the local / global checkpoint info lagging, or possibly have replicas
// go out of sync with the primary
finishAsFailed(e);
}
});
}
use of org.elasticsearch.index.shard.ReplicationGroup in project crate by crate.
the class TransportVerifyShardBeforeCloseActionTests method testUnavailableShardsMarkedAsStale.
@Test
public void testUnavailableShardsMarkedAsStale() throws Exception {
String index = "test";
ShardId shardId = new ShardId(index, "_na_", 0);
int nbReplicas = randomIntBetween(1, 10);
ShardRoutingState[] replicaStates = new ShardRoutingState[nbReplicas];
Arrays.fill(replicaStates, ShardRoutingState.STARTED);
final ClusterState clusterState = state(index, true, ShardRoutingState.STARTED, replicaStates);
setState(clusterService, clusterState);
IndexShardRoutingTable shardRoutingTable = clusterState.routingTable().index(index).shard(shardId.id());
IndexMetadata indexMetaData = clusterState.getMetadata().index(index);
ShardRouting primaryRouting = shardRoutingTable.primaryShard();
long primaryTerm = indexMetaData.primaryTerm(0);
Set<String> inSyncAllocationIds = indexMetaData.inSyncAllocationIds(0);
Set<String> trackedShards = shardRoutingTable.getAllAllocationIds();
List<ShardRouting> unavailableShards = randomSubsetOf(randomIntBetween(1, nbReplicas), shardRoutingTable.replicaShards());
IndexShardRoutingTable.Builder shardRoutingTableBuilder = new IndexShardRoutingTable.Builder(shardRoutingTable);
unavailableShards.forEach(shardRoutingTableBuilder::removeShard);
shardRoutingTable = shardRoutingTableBuilder.build();
ReplicationGroup replicationGroup = new ReplicationGroup(shardRoutingTable, inSyncAllocationIds, trackedShards);
assertThat(replicationGroup.getUnavailableInSyncShards().size(), greaterThan(0));
PlainActionFuture<PrimaryResult> listener = new PlainActionFuture<>();
TransportVerifyShardBeforeCloseAction.ShardRequest request = new TransportVerifyShardBeforeCloseAction.ShardRequest(shardId, false, clusterBlock);
ReplicationOperation.Replicas<TransportVerifyShardBeforeCloseAction.ShardRequest> proxy = action.newReplicasProxy();
ReplicationOperation<TransportVerifyShardBeforeCloseAction.ShardRequest, TransportVerifyShardBeforeCloseAction.ShardRequest, PrimaryResult> operation = new ReplicationOperation<>(request, createPrimary(primaryRouting, replicationGroup), listener, proxy, logger, "test", primaryTerm);
operation.execute();
CapturingTransport.CapturedRequest[] capturedRequests = transport.getCapturedRequestsAndClear();
assertThat(capturedRequests.length, equalTo(nbReplicas));
for (CapturingTransport.CapturedRequest capturedRequest : capturedRequests) {
String actionName = capturedRequest.action;
if (actionName.startsWith(ShardStateAction.SHARD_FAILED_ACTION_NAME)) {
assertThat(capturedRequest.request, instanceOf(ShardStateAction.FailedShardEntry.class));
String allocationId = ((ShardStateAction.FailedShardEntry) capturedRequest.request).getAllocationId();
assertTrue(unavailableShards.stream().anyMatch(shardRouting -> shardRouting.allocationId().getId().equals(allocationId)));
transport.handleResponse(capturedRequest.requestId, TransportResponse.Empty.INSTANCE);
} else if (actionName.startsWith(TransportVerifyShardBeforeCloseAction.NAME)) {
assertThat(capturedRequest.request, instanceOf(ConcreteShardRequest.class));
String allocationId = ((ConcreteShardRequest) capturedRequest.request).getTargetAllocationID();
assertFalse(unavailableShards.stream().anyMatch(shardRouting -> shardRouting.allocationId().getId().equals(allocationId)));
assertTrue(inSyncAllocationIds.stream().anyMatch(inSyncAllocationId -> inSyncAllocationId.equals(allocationId)));
transport.handleResponse(capturedRequest.requestId, new TransportReplicationAction.ReplicaResponse(0L, 0L));
} else {
fail("Test does not support action " + capturedRequest.action);
}
}
ReplicationResponse.ShardInfo shardInfo = listener.get().getShardInfo();
assertThat(shardInfo.getFailed(), equalTo(0));
assertThat(shardInfo.getFailures(), arrayWithSize(0));
assertThat(shardInfo.getSuccessful(), equalTo(1 + nbReplicas - unavailableShards.size()));
}
Aggregations