use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.
the class ReplicationTrackerRetentionLeaseTests method testRemoveRetentionLeaseCausesRetentionLeaseSync.
public void testRemoveRetentionLeaseCausesRetentionLeaseSync() {
final AllocationId allocationId = AllocationId.newInitializing();
final Map<String, Long> retainingSequenceNumbers = new HashMap<>();
final AtomicBoolean invoked = new AtomicBoolean();
final AtomicReference<ReplicationTracker> reference = new AtomicReference<>();
final ReplicationTracker replicationTracker = new ReplicationTracker(new ShardId("test", "_na", 0), allocationId.getId(), IndexSettingsModule.newIndexSettings("test", Settings.EMPTY), randomNonNegativeLong(), UNASSIGNED_SEQ_NO, value -> {
}, () -> 0L, (leases, listener) -> {
// we do not want to hold a lock on the replication tracker in the callback!
assertFalse(Thread.holdsLock(reference.get()));
invoked.set(true);
assertThat(leases.leases().stream().collect(Collectors.toMap(RetentionLease::id, RetentionLease::retainingSequenceNumber)), equalTo(retainingSequenceNumbers));
}, OPS_BASED_RECOVERY_ALWAYS_REASONABLE);
reference.set(replicationTracker);
replicationTracker.updateFromMaster(randomNonNegativeLong(), Collections.singleton(allocationId.getId()), routingTable(Collections.emptySet(), allocationId));
replicationTracker.activatePrimaryMode(SequenceNumbers.NO_OPS_PERFORMED);
retainingSequenceNumbers.put(ReplicationTracker.getPeerRecoveryRetentionLeaseId(nodeIdFromAllocationId(allocationId)), 0L);
final int length = randomIntBetween(0, 8);
for (int i = 0; i < length; i++) {
final String id = randomAlphaOfLength(8);
final long retainingSequenceNumber = randomLongBetween(SequenceNumbers.NO_OPS_PERFORMED, Long.MAX_VALUE);
retainingSequenceNumbers.put(id, retainingSequenceNumber);
replicationTracker.addRetentionLease(id, retainingSequenceNumber, "test", ActionListener.wrap(() -> {
}));
// assert that the new retention lease callback was invoked
assertTrue(invoked.get());
// reset the invocation marker so that we can assert the callback was not invoked when removing the lease
invoked.set(false);
retainingSequenceNumbers.remove(id);
replicationTracker.removeRetentionLease(id, ActionListener.wrap(() -> {
}));
assertTrue(invoked.get());
}
}
use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.
the class ReplicationTrackerRetentionLeaseTests method testCloneRetentionLease.
public void testCloneRetentionLease() {
final AllocationId allocationId = AllocationId.newInitializing();
final AtomicReference<ReplicationTracker> replicationTrackerRef = new AtomicReference<>();
final AtomicLong timeReference = new AtomicLong();
final AtomicBoolean synced = new AtomicBoolean();
final ReplicationTracker replicationTracker = new ReplicationTracker(new ShardId("test", "_na", 0), allocationId.getId(), IndexSettingsModule.newIndexSettings("test", Settings.EMPTY), randomLongBetween(1, Long.MAX_VALUE), UNASSIGNED_SEQ_NO, value -> {
}, timeReference::get, (leases, listener) -> {
assertFalse(Thread.holdsLock(replicationTrackerRef.get()));
assertTrue(synced.compareAndSet(false, true));
listener.onResponse(new ReplicationResponse());
}, OPS_BASED_RECOVERY_ALWAYS_REASONABLE);
replicationTrackerRef.set(replicationTracker);
replicationTracker.updateFromMaster(randomNonNegativeLong(), Collections.singleton(allocationId.getId()), routingTable(Collections.emptySet(), allocationId));
replicationTracker.activatePrimaryMode(SequenceNumbers.NO_OPS_PERFORMED);
final long addTime = randomLongBetween(timeReference.get(), Long.MAX_VALUE);
timeReference.set(addTime);
final long minimumRetainingSequenceNumber = randomLongBetween(SequenceNumbers.NO_OPS_PERFORMED, Long.MAX_VALUE);
final PlainActionFuture<ReplicationResponse> addFuture = new PlainActionFuture<>();
replicationTracker.addRetentionLease("source", minimumRetainingSequenceNumber, "test-source", addFuture);
addFuture.actionGet();
assertTrue(synced.get());
synced.set(false);
final long cloneTime = randomLongBetween(timeReference.get(), Long.MAX_VALUE);
timeReference.set(cloneTime);
final PlainActionFuture<ReplicationResponse> cloneFuture = new PlainActionFuture<>();
final RetentionLease clonedLease = replicationTracker.cloneRetentionLease("source", "target", cloneFuture);
cloneFuture.actionGet();
assertTrue(synced.get());
synced.set(false);
assertThat(clonedLease.id(), equalTo("target"));
assertThat(clonedLease.retainingSequenceNumber(), equalTo(minimumRetainingSequenceNumber));
assertThat(clonedLease.timestamp(), equalTo(cloneTime));
assertThat(clonedLease.source(), equalTo("test-source"));
assertThat(replicationTracker.getRetentionLeases().get("target"), equalTo(clonedLease));
}
use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.
the class ReplicationTrackerTests method testMissingInSyncIdsPreventAdvance.
public void testMissingInSyncIdsPreventAdvance() {
final Map<AllocationId, Long> active = randomAllocationsWithLocalCheckpoints(1, 5);
final Map<AllocationId, Long> initializing = randomAllocationsWithLocalCheckpoints(2, 5);
logger.info("active: {}, initializing: {}", active, initializing);
AllocationId primaryId = active.keySet().iterator().next();
final ReplicationTracker tracker = newTracker(primaryId);
tracker.updateFromMaster(randomNonNegativeLong(), ids(active.keySet()), routingTable(initializing.keySet(), primaryId));
tracker.activatePrimaryMode(NO_OPS_PERFORMED);
randomSubsetOf(randomIntBetween(1, initializing.size() - 1), initializing.keySet()).forEach(aId -> markAsTrackingAndInSyncQuietly(tracker, aId.getId(), NO_OPS_PERFORMED));
active.forEach((aid, localCP) -> updateLocalCheckpoint(tracker, aid.getId(), localCP));
assertThat(tracker.getGlobalCheckpoint(), equalTo(NO_OPS_PERFORMED));
assertThat(updatedGlobalCheckpoint.get(), equalTo(NO_OPS_PERFORMED));
// update again
initializing.forEach((aid, localCP) -> updateLocalCheckpoint(tracker, aid.getId(), localCP));
assertThat(tracker.getGlobalCheckpoint(), not(equalTo(UNASSIGNED_SEQ_NO)));
assertThat(updatedGlobalCheckpoint.get(), not(equalTo(UNASSIGNED_SEQ_NO)));
}
use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.
the class ReplicationTrackerTests method testWaitForAllocationIdToBeInSyncCanBeInterrupted.
public void testWaitForAllocationIdToBeInSyncCanBeInterrupted() throws BrokenBarrierException, InterruptedException {
final int localCheckpoint = randomIntBetween(1, 32);
final int globalCheckpoint = randomIntBetween(localCheckpoint + 1, 64);
final CyclicBarrier barrier = new CyclicBarrier(2);
final AtomicBoolean interrupted = new AtomicBoolean();
final AllocationId inSyncAllocationId = AllocationId.newInitializing();
final AllocationId trackingAllocationId = AllocationId.newInitializing();
final ReplicationTracker tracker = newTracker(inSyncAllocationId);
tracker.updateFromMaster(randomNonNegativeLong(), Collections.singleton(inSyncAllocationId.getId()), routingTable(Collections.singleton(trackingAllocationId), inSyncAllocationId));
tracker.activatePrimaryMode(globalCheckpoint);
addPeerRecoveryRetentionLease(tracker, trackingAllocationId);
final Thread thread = new Thread(() -> {
try {
// synchronize starting with the test thread
barrier.await();
} catch (final BrokenBarrierException | InterruptedException e) {
throw new RuntimeException(e);
}
try {
tracker.initiateTracking(trackingAllocationId.getId());
tracker.markAllocationIdAsInSync(trackingAllocationId.getId(), localCheckpoint);
} catch (final InterruptedException e) {
interrupted.set(true);
// synchronize with the test thread checking if we are interrupted
}
try {
barrier.await();
} catch (final BrokenBarrierException | InterruptedException e) {
throw new RuntimeException(e);
}
});
thread.start();
// synchronize starting with the waiting thread
barrier.await();
thread.interrupt();
// synchronize with the waiting thread to mark that it is complete
barrier.await();
assertTrue(interrupted.get());
thread.join();
}
use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.
the class ReplicationTrackerTests method testUpdateAllocationIdsFromMaster.
public void testUpdateAllocationIdsFromMaster() throws Exception {
final long initialClusterStateVersion = randomNonNegativeLong();
final int numberOfActiveAllocationsIds = randomIntBetween(2, 16);
final int numberOfInitializingIds = randomIntBetween(2, 16);
final Tuple<Set<AllocationId>, Set<AllocationId>> activeAndInitializingAllocationIds = randomActiveAndInitializingAllocationIds(numberOfActiveAllocationsIds, numberOfInitializingIds);
final Set<AllocationId> activeAllocationIds = activeAndInitializingAllocationIds.v1();
final Set<AllocationId> initializingIds = activeAndInitializingAllocationIds.v2();
AllocationId primaryId = activeAllocationIds.iterator().next();
IndexShardRoutingTable routingTable = routingTable(initializingIds, primaryId);
final ReplicationTracker tracker = newTracker(primaryId);
tracker.updateFromMaster(initialClusterStateVersion, ids(activeAllocationIds), routingTable);
tracker.activatePrimaryMode(NO_OPS_PERFORMED);
assertThat(tracker.getReplicationGroup().getInSyncAllocationIds(), equalTo(ids(activeAllocationIds)));
assertThat(tracker.getReplicationGroup().getRoutingTable(), equalTo(routingTable));
// first we assert that the in-sync and tracking sets are set up correctly
assertTrue(activeAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
assertTrue(activeAllocationIds.stream().filter(a -> a.equals(primaryId) == false).allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).getLocalCheckpoint() == SequenceNumbers.UNASSIGNED_SEQ_NO));
assertTrue(initializingIds.stream().noneMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
assertTrue(initializingIds.stream().filter(a -> a.equals(primaryId) == false).allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).getLocalCheckpoint() == SequenceNumbers.UNASSIGNED_SEQ_NO));
// now we will remove some allocation IDs from these and ensure that they propagate through
final Set<AllocationId> removingActiveAllocationIds = new HashSet<>(randomSubsetOf(activeAllocationIds));
removingActiveAllocationIds.remove(primaryId);
final Set<AllocationId> newActiveAllocationIds = activeAllocationIds.stream().filter(a -> !removingActiveAllocationIds.contains(a)).collect(Collectors.toSet());
final List<AllocationId> removingInitializingAllocationIds = randomSubsetOf(initializingIds);
final Set<AllocationId> newInitializingAllocationIds = initializingIds.stream().filter(a -> !removingInitializingAllocationIds.contains(a)).collect(Collectors.toSet());
routingTable = routingTable(newInitializingAllocationIds, primaryId);
tracker.updateFromMaster(initialClusterStateVersion + 1, ids(newActiveAllocationIds), routingTable);
assertTrue(newActiveAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
assertTrue(removingActiveAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()) == null));
assertTrue(newInitializingAllocationIds.stream().noneMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
assertTrue(removingInitializingAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()) == null));
assertThat(tracker.getReplicationGroup().getInSyncAllocationIds(), equalTo(ids(Sets.difference(Sets.union(activeAllocationIds, newActiveAllocationIds), removingActiveAllocationIds))));
assertThat(tracker.getReplicationGroup().getRoutingTable(), equalTo(routingTable));
/*
* Now we will add an allocation ID to each of active and initializing and ensure they propagate through. Using different lengths
* than we have been using above ensures that we can not collide with a previous allocation ID
*/
newInitializingAllocationIds.add(AllocationId.newInitializing());
tracker.updateFromMaster(initialClusterStateVersion + 2, ids(newActiveAllocationIds), routingTable(newInitializingAllocationIds, primaryId));
assertTrue(newActiveAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
assertTrue(newActiveAllocationIds.stream().filter(a -> a.equals(primaryId) == false).allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).getLocalCheckpoint() == SequenceNumbers.UNASSIGNED_SEQ_NO));
assertTrue(newInitializingAllocationIds.stream().noneMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
assertTrue(newInitializingAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).getLocalCheckpoint() == SequenceNumbers.UNASSIGNED_SEQ_NO));
// the tracking allocation IDs should play no role in determining the global checkpoint
final Map<AllocationId, Integer> activeLocalCheckpoints = newActiveAllocationIds.stream().collect(Collectors.toMap(Function.identity(), a -> randomIntBetween(1, 1024)));
activeLocalCheckpoints.forEach((a, l) -> updateLocalCheckpoint(tracker, a.getId(), l));
final Map<AllocationId, Integer> initializingLocalCheckpoints = newInitializingAllocationIds.stream().collect(Collectors.toMap(Function.identity(), a -> randomIntBetween(1, 1024)));
initializingLocalCheckpoints.forEach((a, l) -> updateLocalCheckpoint(tracker, a.getId(), l));
assertTrue(activeLocalCheckpoints.entrySet().stream().allMatch(e -> tracker.getTrackedLocalCheckpointForShard(e.getKey().getId()).getLocalCheckpoint() == e.getValue()));
assertTrue(initializingLocalCheckpoints.entrySet().stream().allMatch(e -> tracker.getTrackedLocalCheckpointForShard(e.getKey().getId()).getLocalCheckpoint() == e.getValue()));
final long minimumActiveLocalCheckpoint = (long) activeLocalCheckpoints.values().stream().min(Integer::compareTo).get();
assertThat(tracker.getGlobalCheckpoint(), equalTo(minimumActiveLocalCheckpoint));
assertThat(updatedGlobalCheckpoint.get(), equalTo(minimumActiveLocalCheckpoint));
final long minimumInitailizingLocalCheckpoint = (long) initializingLocalCheckpoints.values().stream().min(Integer::compareTo).get();
// now we are going to add a new allocation ID and bring it in sync which should move it to the in-sync allocation IDs
final long localCheckpoint = randomIntBetween(0, Math.toIntExact(Math.min(minimumActiveLocalCheckpoint, minimumInitailizingLocalCheckpoint) - 1));
// using a different length than we have been using above ensures that we can not collide with a previous allocation ID
final AllocationId newSyncingAllocationId = AllocationId.newInitializing();
newInitializingAllocationIds.add(newSyncingAllocationId);
tracker.updateFromMaster(initialClusterStateVersion + 3, ids(newActiveAllocationIds), routingTable(newInitializingAllocationIds, primaryId));
addPeerRecoveryRetentionLease(tracker, newSyncingAllocationId);
final CyclicBarrier barrier = new CyclicBarrier(2);
final Thread thread = new Thread(() -> {
try {
barrier.await();
tracker.initiateTracking(newSyncingAllocationId.getId());
tracker.markAllocationIdAsInSync(newSyncingAllocationId.getId(), localCheckpoint);
barrier.await();
} catch (final BrokenBarrierException | InterruptedException e) {
throw new RuntimeException(e);
}
});
thread.start();
barrier.await();
assertBusy(() -> {
assertTrue(tracker.pendingInSync.contains(newSyncingAllocationId.getId()));
assertFalse(tracker.getTrackedLocalCheckpointForShard(newSyncingAllocationId.getId()).inSync);
});
tracker.updateLocalCheckpoint(newSyncingAllocationId.getId(), randomIntBetween(Math.toIntExact(minimumActiveLocalCheckpoint), 1024));
barrier.await();
assertFalse(tracker.pendingInSync.contains(newSyncingAllocationId.getId()));
assertTrue(tracker.getTrackedLocalCheckpointForShard(newSyncingAllocationId.getId()).inSync);
/*
* The new in-sync allocation ID is in the in-sync set now yet the master does not know this; the allocation ID should still be in
* the in-sync set even if we receive a cluster state update that does not reflect this.
*
*/
tracker.updateFromMaster(initialClusterStateVersion + 4, ids(newActiveAllocationIds), routingTable(newInitializingAllocationIds, primaryId));
assertTrue(tracker.getTrackedLocalCheckpointForShard(newSyncingAllocationId.getId()).inSync);
assertFalse(tracker.pendingInSync.contains(newSyncingAllocationId.getId()));
}
Aggregations