use of org.opensearch.action.support.replication.ReplicationResponse in project OpenSearch by opensearch-project.
the class ReplicationTrackerTests method testPrimaryContextHandoff.
public void testPrimaryContextHandoff() throws IOException {
final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY);
final ShardId shardId = new ShardId("test", "_na_", 0);
FakeClusterState clusterState = initialState();
final AllocationId aId = clusterState.routingTable.primaryShard().allocationId();
final LongConsumer onUpdate = updatedGlobalCheckpoint -> {
};
final long primaryTerm = randomNonNegativeLong();
final long globalCheckpoint = UNASSIGNED_SEQ_NO;
final BiConsumer<RetentionLeases, ActionListener<ReplicationResponse>> onNewRetentionLease = (leases, listener) -> {
};
ReplicationTracker oldPrimary = new ReplicationTracker(shardId, aId.getId(), indexSettings, primaryTerm, globalCheckpoint, onUpdate, () -> 0L, onNewRetentionLease, OPS_BASED_RECOVERY_ALWAYS_REASONABLE);
ReplicationTracker newPrimary = new ReplicationTracker(shardId, aId.getRelocationId(), indexSettings, primaryTerm, globalCheckpoint, onUpdate, () -> 0L, onNewRetentionLease, OPS_BASED_RECOVERY_ALWAYS_REASONABLE);
Set<String> allocationIds = new HashSet<>(Arrays.asList(oldPrimary.shardAllocationId, newPrimary.shardAllocationId));
clusterState.apply(oldPrimary);
clusterState.apply(newPrimary);
oldPrimary.activatePrimaryMode(randomIntBetween(Math.toIntExact(NO_OPS_PERFORMED), 10));
addPeerRecoveryRetentionLease(oldPrimary, newPrimary.shardAllocationId);
newPrimary.updateRetentionLeasesOnReplica(oldPrimary.getRetentionLeases());
final int numUpdates = randomInt(10);
for (int i = 0; i < numUpdates; i++) {
if (rarely()) {
clusterState = randomUpdateClusterState(allocationIds, clusterState);
clusterState.apply(oldPrimary);
clusterState.apply(newPrimary);
}
if (randomBoolean()) {
randomLocalCheckpointUpdate(oldPrimary);
}
if (randomBoolean()) {
randomMarkInSync(oldPrimary, newPrimary);
}
}
// simulate transferring the global checkpoint to the new primary after finalizing recovery before the handoff
markAsTrackingAndInSyncQuietly(oldPrimary, newPrimary.shardAllocationId, Math.max(SequenceNumbers.NO_OPS_PERFORMED, oldPrimary.getGlobalCheckpoint() + randomInt(5)));
oldPrimary.updateGlobalCheckpointForShard(newPrimary.shardAllocationId, oldPrimary.getGlobalCheckpoint());
ReplicationTracker.PrimaryContext primaryContext = oldPrimary.startRelocationHandoff(newPrimary.shardAllocationId);
if (randomBoolean()) {
// cluster state update after primary context handoff
if (randomBoolean()) {
clusterState = randomUpdateClusterState(allocationIds, clusterState);
clusterState.apply(oldPrimary);
clusterState.apply(newPrimary);
}
// abort handoff, check that we can continue updates and retry handoff
oldPrimary.abortRelocationHandoff();
if (rarely()) {
clusterState = randomUpdateClusterState(allocationIds, clusterState);
clusterState.apply(oldPrimary);
clusterState.apply(newPrimary);
}
if (randomBoolean()) {
randomLocalCheckpointUpdate(oldPrimary);
}
if (randomBoolean()) {
randomMarkInSync(oldPrimary, newPrimary);
}
// do another handoff
primaryContext = oldPrimary.startRelocationHandoff(newPrimary.shardAllocationId);
}
// send primary context through the wire
BytesStreamOutput output = new BytesStreamOutput();
primaryContext.writeTo(output);
StreamInput streamInput = output.bytes().streamInput();
primaryContext = new ReplicationTracker.PrimaryContext(streamInput);
switch(randomInt(3)) {
case 0:
{
// apply cluster state update on old primary while primary context is being transferred
clusterState = randomUpdateClusterState(allocationIds, clusterState);
clusterState.apply(oldPrimary);
// activate new primary
newPrimary.activateWithPrimaryContext(primaryContext);
// apply cluster state update on new primary so that the states on old and new primary are comparable
clusterState.apply(newPrimary);
break;
}
case 1:
{
// apply cluster state update on new primary while primary context is being transferred
clusterState = randomUpdateClusterState(allocationIds, clusterState);
clusterState.apply(newPrimary);
// activate new primary
newPrimary.activateWithPrimaryContext(primaryContext);
// apply cluster state update on old primary so that the states on old and new primary are comparable
clusterState.apply(oldPrimary);
break;
}
case 2:
{
// apply cluster state update on both copies while primary context is being transferred
clusterState = randomUpdateClusterState(allocationIds, clusterState);
clusterState.apply(oldPrimary);
clusterState.apply(newPrimary);
newPrimary.activateWithPrimaryContext(primaryContext);
break;
}
case 3:
{
// no cluster state update
newPrimary.activateWithPrimaryContext(primaryContext);
break;
}
}
assertTrue(oldPrimary.primaryMode);
assertTrue(newPrimary.primaryMode);
assertThat(newPrimary.appliedClusterStateVersion, equalTo(oldPrimary.appliedClusterStateVersion));
/*
* We can not assert on shared knowledge of the global checkpoint between the old primary and the new primary as the new primary
* will update its global checkpoint state without the old primary learning of it, and the old primary could have updated its
* global checkpoint state after the primary context was transferred.
*/
Map<String, ReplicationTracker.CheckpointState> oldPrimaryCheckpointsCopy = new HashMap<>(oldPrimary.checkpoints);
oldPrimaryCheckpointsCopy.remove(oldPrimary.shardAllocationId);
oldPrimaryCheckpointsCopy.remove(newPrimary.shardAllocationId);
Map<String, ReplicationTracker.CheckpointState> newPrimaryCheckpointsCopy = new HashMap<>(newPrimary.checkpoints);
newPrimaryCheckpointsCopy.remove(oldPrimary.shardAllocationId);
newPrimaryCheckpointsCopy.remove(newPrimary.shardAllocationId);
assertThat(newPrimaryCheckpointsCopy, equalTo(oldPrimaryCheckpointsCopy));
// we can however assert that shared knowledge of the local checkpoint and in-sync status is equal
assertThat(oldPrimary.checkpoints.get(oldPrimary.shardAllocationId).localCheckpoint, equalTo(newPrimary.checkpoints.get(oldPrimary.shardAllocationId).localCheckpoint));
assertThat(oldPrimary.checkpoints.get(newPrimary.shardAllocationId).localCheckpoint, equalTo(newPrimary.checkpoints.get(newPrimary.shardAllocationId).localCheckpoint));
assertThat(oldPrimary.checkpoints.get(oldPrimary.shardAllocationId).inSync, equalTo(newPrimary.checkpoints.get(oldPrimary.shardAllocationId).inSync));
assertThat(oldPrimary.checkpoints.get(newPrimary.shardAllocationId).inSync, equalTo(newPrimary.checkpoints.get(newPrimary.shardAllocationId).inSync));
assertThat(newPrimary.getGlobalCheckpoint(), equalTo(oldPrimary.getGlobalCheckpoint()));
assertThat(newPrimary.routingTable, equalTo(oldPrimary.routingTable));
assertThat(newPrimary.replicationGroup, equalTo(oldPrimary.replicationGroup));
assertFalse(oldPrimary.relocated);
oldPrimary.completeRelocationHandoff();
assertFalse(oldPrimary.primaryMode);
assertTrue(oldPrimary.relocated);
}
use of org.opensearch.action.support.replication.ReplicationResponse in project OpenSearch by opensearch-project.
the class RetentionLeaseIT method testRetentionLeaseSyncedOnRemove.
public void testRetentionLeaseSyncedOnRemove() throws Exception {
final int numberOfReplicas = 2 - scaledRandomIntBetween(0, 2);
internalCluster().ensureAtLeastNumDataNodes(1 + numberOfReplicas);
final Settings settings = Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", numberOfReplicas).build();
createIndex("index", settings);
ensureGreen("index");
final String primaryShardNodeId = clusterService().state().routingTable().index("index").shard(0).primaryShard().currentNodeId();
final String primaryShardNodeName = clusterService().state().nodes().get(primaryShardNodeId).getName();
final IndexShard primary = internalCluster().getInstance(IndicesService.class, primaryShardNodeName).getShardOrNull(new ShardId(resolveIndex("index"), 0));
final int length = randomIntBetween(1, 8);
final Map<String, RetentionLease> currentRetentionLeases = new LinkedHashMap<>();
for (int i = 0; i < length; i++) {
final String id = randomValueOtherThanMany(currentRetentionLeases.keySet()::contains, () -> randomAlphaOfLength(8));
final long retainingSequenceNumber = randomLongBetween(0, Long.MAX_VALUE);
final String source = randomAlphaOfLength(8);
final CountDownLatch latch = new CountDownLatch(1);
final ActionListener<ReplicationResponse> listener = countDownLatchListener(latch);
// simulate a peer recovery which locks the soft deletes policy on the primary
final Closeable retentionLock = randomBoolean() ? primary.acquireHistoryRetentionLock() : () -> {
};
currentRetentionLeases.put(id, primary.addRetentionLease(id, retainingSequenceNumber, source, listener));
latch.await();
retentionLock.close();
}
for (int i = 0; i < length; i++) {
final String id = randomFrom(currentRetentionLeases.keySet());
final CountDownLatch latch = new CountDownLatch(1);
primary.removeRetentionLease(id, countDownLatchListener(latch));
// simulate a peer recovery which locks the soft deletes policy on the primary
final Closeable retentionLock = randomBoolean() ? primary.acquireHistoryRetentionLock() : () -> {
};
currentRetentionLeases.remove(id);
latch.await();
retentionLock.close();
// check retention leases have been written on the primary
assertThat(currentRetentionLeases, equalTo(RetentionLeaseUtils.toMapExcludingPeerRecoveryRetentionLeases(primary.loadRetentionLeases())));
// check current retention leases have been synced to all replicas
for (final ShardRouting replicaShard : clusterService().state().routingTable().index("index").shard(0).replicaShards()) {
final String replicaShardNodeId = replicaShard.currentNodeId();
final String replicaShardNodeName = clusterService().state().nodes().get(replicaShardNodeId).getName();
final IndexShard replica = internalCluster().getInstance(IndicesService.class, replicaShardNodeName).getShardOrNull(new ShardId(resolveIndex("index"), 0));
final Map<String, RetentionLease> retentionLeasesOnReplica = RetentionLeaseUtils.toMapExcludingPeerRecoveryRetentionLeases(replica.getRetentionLeases());
assertThat(retentionLeasesOnReplica, equalTo(currentRetentionLeases));
// check retention leases have been written on the replica
assertThat(currentRetentionLeases, equalTo(RetentionLeaseUtils.toMapExcludingPeerRecoveryRetentionLeases(replica.loadRetentionLeases())));
}
}
}
use of org.opensearch.action.support.replication.ReplicationResponse in project OpenSearch by opensearch-project.
the class RetentionLeaseIT method runWaitForShardsTest.
private void runWaitForShardsTest(final String idForInitialRetentionLease, final long initialRetainingSequenceNumber, final BiConsumer<IndexShard, ActionListener<ReplicationResponse>> primaryConsumer, final Consumer<IndexShard> afterSync) throws InterruptedException {
final int numDataNodes = internalCluster().numDataNodes();
final Settings settings = Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", numDataNodes).put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true).put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), TimeValue.timeValueSeconds(1)).build();
assertAcked(prepareCreate("index").setSettings(settings));
ensureYellowAndNoInitializingShards("index");
assertFalse(client().admin().cluster().prepareHealth("index").setWaitForActiveShards(numDataNodes).get().isTimedOut());
final String primaryShardNodeId = clusterService().state().routingTable().index("index").shard(0).primaryShard().currentNodeId();
final String primaryShardNodeName = clusterService().state().nodes().get(primaryShardNodeId).getName();
final IndexShard primary = internalCluster().getInstance(IndicesService.class, primaryShardNodeName).getShardOrNull(new ShardId(resolveIndex("index"), 0));
final String source = randomAlphaOfLength(8);
final CountDownLatch latch = new CountDownLatch(1);
final ActionListener<ReplicationResponse> listener = countDownLatchListener(latch);
primary.addRetentionLease(idForInitialRetentionLease, initialRetainingSequenceNumber, source, listener);
latch.await();
final String waitForActiveValue = randomBoolean() ? "all" : Integer.toString(numDataNodes + 1);
client().admin().indices().prepareUpdateSettings("index").setSettings(Settings.builder().put("index.write.wait_for_active_shards", waitForActiveValue).build()).get();
final CountDownLatch actionLatch = new CountDownLatch(1);
final AtomicBoolean success = new AtomicBoolean();
primaryConsumer.accept(primary, new ActionListener<ReplicationResponse>() {
@Override
public void onResponse(final ReplicationResponse replicationResponse) {
success.set(true);
actionLatch.countDown();
}
@Override
public void onFailure(final Exception e) {
failWithException(e);
}
});
actionLatch.await();
assertTrue(success.get());
afterSync.accept(primary);
}
use of org.opensearch.action.support.replication.ReplicationResponse in project OpenSearch by opensearch-project.
the class RetentionLeaseIT method testCanRenewRetentionLeaseUnderBlock.
public void testCanRenewRetentionLeaseUnderBlock() throws InterruptedException {
final String idForInitialRetentionLease = randomAlphaOfLength(8);
final long initialRetainingSequenceNumber = randomLongBetween(0, Long.MAX_VALUE);
final AtomicReference<RetentionLease> retentionLease = new AtomicReference<>();
runUnderBlockTest(idForInitialRetentionLease, initialRetainingSequenceNumber, (primary, listener) -> {
final long nextRetainingSequenceNumber = randomLongBetween(initialRetainingSequenceNumber, Long.MAX_VALUE);
final String nextSource = randomAlphaOfLength(8);
retentionLease.set(primary.renewRetentionLease(idForInitialRetentionLease, nextRetainingSequenceNumber, nextSource));
listener.onResponse(new ReplicationResponse());
}, primary -> {
try {
/*
* If the background renew was able to execute, then the retention leases were persisted to disk. There is no other
* way for the current retention leases to end up written to disk so we assume that if they are written to disk, it
* implies that the background sync was able to execute under a block.
*/
assertBusy(() -> assertThat(RetentionLeaseUtils.toMapExcludingPeerRecoveryRetentionLeases(primary.loadRetentionLeases()).values(), contains(retentionLease.get())));
} catch (final Exception e) {
failWithException(e);
}
});
}
use of org.opensearch.action.support.replication.ReplicationResponse in project OpenSearch by opensearch-project.
the class RetentionLeaseIT method testRetentionLeasesSyncedOnAdd.
public void testRetentionLeasesSyncedOnAdd() throws Exception {
final int numberOfReplicas = 2 - scaledRandomIntBetween(0, 2);
internalCluster().ensureAtLeastNumDataNodes(1 + numberOfReplicas);
final Settings settings = Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", numberOfReplicas).put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true).build();
createIndex("index", settings);
ensureGreen("index");
final String primaryShardNodeId = clusterService().state().routingTable().index("index").shard(0).primaryShard().currentNodeId();
final String primaryShardNodeName = clusterService().state().nodes().get(primaryShardNodeId).getName();
final IndexShard primary = internalCluster().getInstance(IndicesService.class, primaryShardNodeName).getShardOrNull(new ShardId(resolveIndex("index"), 0));
// we will add multiple retention leases and expect to see them synced to all replicas
final int length = randomIntBetween(1, 8);
final Map<String, RetentionLease> currentRetentionLeases = new LinkedHashMap<>();
for (int i = 0; i < length; i++) {
final String id = randomValueOtherThanMany(currentRetentionLeases.keySet()::contains, () -> randomAlphaOfLength(8));
final long retainingSequenceNumber = randomLongBetween(0, Long.MAX_VALUE);
final String source = randomAlphaOfLength(8);
final CountDownLatch latch = new CountDownLatch(1);
final ActionListener<ReplicationResponse> listener = countDownLatchListener(latch);
// simulate a peer recovery which locks the soft deletes policy on the primary
final Closeable retentionLock = randomBoolean() ? primary.acquireHistoryRetentionLock() : () -> {
};
currentRetentionLeases.put(id, primary.addRetentionLease(id, retainingSequenceNumber, source, listener));
latch.await();
retentionLock.close();
// check retention leases have been written on the primary
assertThat(currentRetentionLeases, equalTo(RetentionLeaseUtils.toMapExcludingPeerRecoveryRetentionLeases(primary.loadRetentionLeases())));
// check current retention leases have been synced to all replicas
for (final ShardRouting replicaShard : clusterService().state().routingTable().index("index").shard(0).replicaShards()) {
final String replicaShardNodeId = replicaShard.currentNodeId();
final String replicaShardNodeName = clusterService().state().nodes().get(replicaShardNodeId).getName();
final IndexShard replica = internalCluster().getInstance(IndicesService.class, replicaShardNodeName).getShardOrNull(new ShardId(resolveIndex("index"), 0));
final Map<String, RetentionLease> retentionLeasesOnReplica = RetentionLeaseUtils.toMapExcludingPeerRecoveryRetentionLeases(replica.getRetentionLeases());
assertThat(retentionLeasesOnReplica, equalTo(currentRetentionLeases));
// check retention leases have been written on the replica
assertThat(currentRetentionLeases, equalTo(RetentionLeaseUtils.toMapExcludingPeerRecoveryRetentionLeases(replica.loadRetentionLeases())));
}
}
}
Aggregations