Search in sources :

Example 1 with AllocationId

use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.

the class ReplicationTrackerRetentionLeaseTests method testRemoveRetentionLeaseCausesRetentionLeaseSync.

public void testRemoveRetentionLeaseCausesRetentionLeaseSync() {
    final AllocationId allocationId = AllocationId.newInitializing();
    final Map<String, Long> retainingSequenceNumbers = new HashMap<>();
    final AtomicBoolean invoked = new AtomicBoolean();
    final AtomicReference<ReplicationTracker> reference = new AtomicReference<>();
    final ReplicationTracker replicationTracker = new ReplicationTracker(new ShardId("test", "_na", 0), allocationId.getId(), IndexSettingsModule.newIndexSettings("test", Settings.EMPTY), randomNonNegativeLong(), UNASSIGNED_SEQ_NO, value -> {
    }, () -> 0L, (leases, listener) -> {
        // we do not want to hold a lock on the replication tracker in the callback!
        assertFalse(Thread.holdsLock(reference.get()));
        invoked.set(true);
        assertThat(leases.leases().stream().collect(Collectors.toMap(RetentionLease::id, RetentionLease::retainingSequenceNumber)), equalTo(retainingSequenceNumbers));
    }, OPS_BASED_RECOVERY_ALWAYS_REASONABLE);
    reference.set(replicationTracker);
    replicationTracker.updateFromMaster(randomNonNegativeLong(), Collections.singleton(allocationId.getId()), routingTable(Collections.emptySet(), allocationId));
    replicationTracker.activatePrimaryMode(SequenceNumbers.NO_OPS_PERFORMED);
    retainingSequenceNumbers.put(ReplicationTracker.getPeerRecoveryRetentionLeaseId(nodeIdFromAllocationId(allocationId)), 0L);
    final int length = randomIntBetween(0, 8);
    for (int i = 0; i < length; i++) {
        final String id = randomAlphaOfLength(8);
        final long retainingSequenceNumber = randomLongBetween(SequenceNumbers.NO_OPS_PERFORMED, Long.MAX_VALUE);
        retainingSequenceNumbers.put(id, retainingSequenceNumber);
        replicationTracker.addRetentionLease(id, retainingSequenceNumber, "test", ActionListener.wrap(() -> {
        }));
        // assert that the new retention lease callback was invoked
        assertTrue(invoked.get());
        // reset the invocation marker so that we can assert the callback was not invoked when removing the lease
        invoked.set(false);
        retainingSequenceNumbers.remove(id);
        replicationTracker.removeRetentionLease(id, ActionListener.wrap(() -> {
        }));
        assertTrue(invoked.get());
    }
}
Also used : HashMap(java.util.HashMap) AllocationId(org.opensearch.cluster.routing.AllocationId) AtomicReference(java.util.concurrent.atomic.AtomicReference) Matchers.hasToString(org.hamcrest.Matchers.hasToString) Matchers.containsString(org.hamcrest.Matchers.containsString) ShardId(org.opensearch.index.shard.ShardId) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AtomicLong(java.util.concurrent.atomic.AtomicLong)

Example 2 with AllocationId

use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.

the class ReplicationTrackerRetentionLeaseTests method testCloneRetentionLease.

public void testCloneRetentionLease() {
    final AllocationId allocationId = AllocationId.newInitializing();
    final AtomicReference<ReplicationTracker> replicationTrackerRef = new AtomicReference<>();
    final AtomicLong timeReference = new AtomicLong();
    final AtomicBoolean synced = new AtomicBoolean();
    final ReplicationTracker replicationTracker = new ReplicationTracker(new ShardId("test", "_na", 0), allocationId.getId(), IndexSettingsModule.newIndexSettings("test", Settings.EMPTY), randomLongBetween(1, Long.MAX_VALUE), UNASSIGNED_SEQ_NO, value -> {
    }, timeReference::get, (leases, listener) -> {
        assertFalse(Thread.holdsLock(replicationTrackerRef.get()));
        assertTrue(synced.compareAndSet(false, true));
        listener.onResponse(new ReplicationResponse());
    }, OPS_BASED_RECOVERY_ALWAYS_REASONABLE);
    replicationTrackerRef.set(replicationTracker);
    replicationTracker.updateFromMaster(randomNonNegativeLong(), Collections.singleton(allocationId.getId()), routingTable(Collections.emptySet(), allocationId));
    replicationTracker.activatePrimaryMode(SequenceNumbers.NO_OPS_PERFORMED);
    final long addTime = randomLongBetween(timeReference.get(), Long.MAX_VALUE);
    timeReference.set(addTime);
    final long minimumRetainingSequenceNumber = randomLongBetween(SequenceNumbers.NO_OPS_PERFORMED, Long.MAX_VALUE);
    final PlainActionFuture<ReplicationResponse> addFuture = new PlainActionFuture<>();
    replicationTracker.addRetentionLease("source", minimumRetainingSequenceNumber, "test-source", addFuture);
    addFuture.actionGet();
    assertTrue(synced.get());
    synced.set(false);
    final long cloneTime = randomLongBetween(timeReference.get(), Long.MAX_VALUE);
    timeReference.set(cloneTime);
    final PlainActionFuture<ReplicationResponse> cloneFuture = new PlainActionFuture<>();
    final RetentionLease clonedLease = replicationTracker.cloneRetentionLease("source", "target", cloneFuture);
    cloneFuture.actionGet();
    assertTrue(synced.get());
    synced.set(false);
    assertThat(clonedLease.id(), equalTo("target"));
    assertThat(clonedLease.retainingSequenceNumber(), equalTo(minimumRetainingSequenceNumber));
    assertThat(clonedLease.timestamp(), equalTo(cloneTime));
    assertThat(clonedLease.source(), equalTo("test-source"));
    assertThat(replicationTracker.getRetentionLeases().get("target"), equalTo(clonedLease));
}
Also used : AllocationId(org.opensearch.cluster.routing.AllocationId) AtomicReference(java.util.concurrent.atomic.AtomicReference) ReplicationResponse(org.opensearch.action.support.replication.ReplicationResponse) ShardId(org.opensearch.index.shard.ShardId) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AtomicLong(java.util.concurrent.atomic.AtomicLong) PlainActionFuture(org.opensearch.action.support.PlainActionFuture)

Example 3 with AllocationId

use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.

the class ReplicationTrackerTests method testMissingInSyncIdsPreventAdvance.

public void testMissingInSyncIdsPreventAdvance() {
    final Map<AllocationId, Long> active = randomAllocationsWithLocalCheckpoints(1, 5);
    final Map<AllocationId, Long> initializing = randomAllocationsWithLocalCheckpoints(2, 5);
    logger.info("active: {}, initializing: {}", active, initializing);
    AllocationId primaryId = active.keySet().iterator().next();
    final ReplicationTracker tracker = newTracker(primaryId);
    tracker.updateFromMaster(randomNonNegativeLong(), ids(active.keySet()), routingTable(initializing.keySet(), primaryId));
    tracker.activatePrimaryMode(NO_OPS_PERFORMED);
    randomSubsetOf(randomIntBetween(1, initializing.size() - 1), initializing.keySet()).forEach(aId -> markAsTrackingAndInSyncQuietly(tracker, aId.getId(), NO_OPS_PERFORMED));
    active.forEach((aid, localCP) -> updateLocalCheckpoint(tracker, aid.getId(), localCP));
    assertThat(tracker.getGlobalCheckpoint(), equalTo(NO_OPS_PERFORMED));
    assertThat(updatedGlobalCheckpoint.get(), equalTo(NO_OPS_PERFORMED));
    // update again
    initializing.forEach((aid, localCP) -> updateLocalCheckpoint(tracker, aid.getId(), localCP));
    assertThat(tracker.getGlobalCheckpoint(), not(equalTo(UNASSIGNED_SEQ_NO)));
    assertThat(updatedGlobalCheckpoint.get(), not(equalTo(UNASSIGNED_SEQ_NO)));
}
Also used : AllocationId(org.opensearch.cluster.routing.AllocationId) AtomicLong(java.util.concurrent.atomic.AtomicLong)

Example 4 with AllocationId

use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.

the class ReplicationTrackerTests method testWaitForAllocationIdToBeInSyncCanBeInterrupted.

public void testWaitForAllocationIdToBeInSyncCanBeInterrupted() throws BrokenBarrierException, InterruptedException {
    final int localCheckpoint = randomIntBetween(1, 32);
    final int globalCheckpoint = randomIntBetween(localCheckpoint + 1, 64);
    final CyclicBarrier barrier = new CyclicBarrier(2);
    final AtomicBoolean interrupted = new AtomicBoolean();
    final AllocationId inSyncAllocationId = AllocationId.newInitializing();
    final AllocationId trackingAllocationId = AllocationId.newInitializing();
    final ReplicationTracker tracker = newTracker(inSyncAllocationId);
    tracker.updateFromMaster(randomNonNegativeLong(), Collections.singleton(inSyncAllocationId.getId()), routingTable(Collections.singleton(trackingAllocationId), inSyncAllocationId));
    tracker.activatePrimaryMode(globalCheckpoint);
    addPeerRecoveryRetentionLease(tracker, trackingAllocationId);
    final Thread thread = new Thread(() -> {
        try {
            // synchronize starting with the test thread
            barrier.await();
        } catch (final BrokenBarrierException | InterruptedException e) {
            throw new RuntimeException(e);
        }
        try {
            tracker.initiateTracking(trackingAllocationId.getId());
            tracker.markAllocationIdAsInSync(trackingAllocationId.getId(), localCheckpoint);
        } catch (final InterruptedException e) {
            interrupted.set(true);
        // synchronize with the test thread checking if we are interrupted
        }
        try {
            barrier.await();
        } catch (final BrokenBarrierException | InterruptedException e) {
            throw new RuntimeException(e);
        }
    });
    thread.start();
    // synchronize starting with the waiting thread
    barrier.await();
    thread.interrupt();
    // synchronize with the waiting thread to mark that it is complete
    barrier.await();
    assertTrue(interrupted.get());
    thread.join();
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) AllocationId(org.opensearch.cluster.routing.AllocationId) CyclicBarrier(java.util.concurrent.CyclicBarrier)

Example 5 with AllocationId

use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.

the class ReplicationTrackerTests method testUpdateAllocationIdsFromMaster.

public void testUpdateAllocationIdsFromMaster() throws Exception {
    final long initialClusterStateVersion = randomNonNegativeLong();
    final int numberOfActiveAllocationsIds = randomIntBetween(2, 16);
    final int numberOfInitializingIds = randomIntBetween(2, 16);
    final Tuple<Set<AllocationId>, Set<AllocationId>> activeAndInitializingAllocationIds = randomActiveAndInitializingAllocationIds(numberOfActiveAllocationsIds, numberOfInitializingIds);
    final Set<AllocationId> activeAllocationIds = activeAndInitializingAllocationIds.v1();
    final Set<AllocationId> initializingIds = activeAndInitializingAllocationIds.v2();
    AllocationId primaryId = activeAllocationIds.iterator().next();
    IndexShardRoutingTable routingTable = routingTable(initializingIds, primaryId);
    final ReplicationTracker tracker = newTracker(primaryId);
    tracker.updateFromMaster(initialClusterStateVersion, ids(activeAllocationIds), routingTable);
    tracker.activatePrimaryMode(NO_OPS_PERFORMED);
    assertThat(tracker.getReplicationGroup().getInSyncAllocationIds(), equalTo(ids(activeAllocationIds)));
    assertThat(tracker.getReplicationGroup().getRoutingTable(), equalTo(routingTable));
    // first we assert that the in-sync and tracking sets are set up correctly
    assertTrue(activeAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
    assertTrue(activeAllocationIds.stream().filter(a -> a.equals(primaryId) == false).allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).getLocalCheckpoint() == SequenceNumbers.UNASSIGNED_SEQ_NO));
    assertTrue(initializingIds.stream().noneMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
    assertTrue(initializingIds.stream().filter(a -> a.equals(primaryId) == false).allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).getLocalCheckpoint() == SequenceNumbers.UNASSIGNED_SEQ_NO));
    // now we will remove some allocation IDs from these and ensure that they propagate through
    final Set<AllocationId> removingActiveAllocationIds = new HashSet<>(randomSubsetOf(activeAllocationIds));
    removingActiveAllocationIds.remove(primaryId);
    final Set<AllocationId> newActiveAllocationIds = activeAllocationIds.stream().filter(a -> !removingActiveAllocationIds.contains(a)).collect(Collectors.toSet());
    final List<AllocationId> removingInitializingAllocationIds = randomSubsetOf(initializingIds);
    final Set<AllocationId> newInitializingAllocationIds = initializingIds.stream().filter(a -> !removingInitializingAllocationIds.contains(a)).collect(Collectors.toSet());
    routingTable = routingTable(newInitializingAllocationIds, primaryId);
    tracker.updateFromMaster(initialClusterStateVersion + 1, ids(newActiveAllocationIds), routingTable);
    assertTrue(newActiveAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
    assertTrue(removingActiveAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()) == null));
    assertTrue(newInitializingAllocationIds.stream().noneMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
    assertTrue(removingInitializingAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()) == null));
    assertThat(tracker.getReplicationGroup().getInSyncAllocationIds(), equalTo(ids(Sets.difference(Sets.union(activeAllocationIds, newActiveAllocationIds), removingActiveAllocationIds))));
    assertThat(tracker.getReplicationGroup().getRoutingTable(), equalTo(routingTable));
    /*
         * Now we will add an allocation ID to each of active and initializing and ensure they propagate through. Using different lengths
         * than we have been using above ensures that we can not collide with a previous allocation ID
         */
    newInitializingAllocationIds.add(AllocationId.newInitializing());
    tracker.updateFromMaster(initialClusterStateVersion + 2, ids(newActiveAllocationIds), routingTable(newInitializingAllocationIds, primaryId));
    assertTrue(newActiveAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
    assertTrue(newActiveAllocationIds.stream().filter(a -> a.equals(primaryId) == false).allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).getLocalCheckpoint() == SequenceNumbers.UNASSIGNED_SEQ_NO));
    assertTrue(newInitializingAllocationIds.stream().noneMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
    assertTrue(newInitializingAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).getLocalCheckpoint() == SequenceNumbers.UNASSIGNED_SEQ_NO));
    // the tracking allocation IDs should play no role in determining the global checkpoint
    final Map<AllocationId, Integer> activeLocalCheckpoints = newActiveAllocationIds.stream().collect(Collectors.toMap(Function.identity(), a -> randomIntBetween(1, 1024)));
    activeLocalCheckpoints.forEach((a, l) -> updateLocalCheckpoint(tracker, a.getId(), l));
    final Map<AllocationId, Integer> initializingLocalCheckpoints = newInitializingAllocationIds.stream().collect(Collectors.toMap(Function.identity(), a -> randomIntBetween(1, 1024)));
    initializingLocalCheckpoints.forEach((a, l) -> updateLocalCheckpoint(tracker, a.getId(), l));
    assertTrue(activeLocalCheckpoints.entrySet().stream().allMatch(e -> tracker.getTrackedLocalCheckpointForShard(e.getKey().getId()).getLocalCheckpoint() == e.getValue()));
    assertTrue(initializingLocalCheckpoints.entrySet().stream().allMatch(e -> tracker.getTrackedLocalCheckpointForShard(e.getKey().getId()).getLocalCheckpoint() == e.getValue()));
    final long minimumActiveLocalCheckpoint = (long) activeLocalCheckpoints.values().stream().min(Integer::compareTo).get();
    assertThat(tracker.getGlobalCheckpoint(), equalTo(minimumActiveLocalCheckpoint));
    assertThat(updatedGlobalCheckpoint.get(), equalTo(minimumActiveLocalCheckpoint));
    final long minimumInitailizingLocalCheckpoint = (long) initializingLocalCheckpoints.values().stream().min(Integer::compareTo).get();
    // now we are going to add a new allocation ID and bring it in sync which should move it to the in-sync allocation IDs
    final long localCheckpoint = randomIntBetween(0, Math.toIntExact(Math.min(minimumActiveLocalCheckpoint, minimumInitailizingLocalCheckpoint) - 1));
    // using a different length than we have been using above ensures that we can not collide with a previous allocation ID
    final AllocationId newSyncingAllocationId = AllocationId.newInitializing();
    newInitializingAllocationIds.add(newSyncingAllocationId);
    tracker.updateFromMaster(initialClusterStateVersion + 3, ids(newActiveAllocationIds), routingTable(newInitializingAllocationIds, primaryId));
    addPeerRecoveryRetentionLease(tracker, newSyncingAllocationId);
    final CyclicBarrier barrier = new CyclicBarrier(2);
    final Thread thread = new Thread(() -> {
        try {
            barrier.await();
            tracker.initiateTracking(newSyncingAllocationId.getId());
            tracker.markAllocationIdAsInSync(newSyncingAllocationId.getId(), localCheckpoint);
            barrier.await();
        } catch (final BrokenBarrierException | InterruptedException e) {
            throw new RuntimeException(e);
        }
    });
    thread.start();
    barrier.await();
    assertBusy(() -> {
        assertTrue(tracker.pendingInSync.contains(newSyncingAllocationId.getId()));
        assertFalse(tracker.getTrackedLocalCheckpointForShard(newSyncingAllocationId.getId()).inSync);
    });
    tracker.updateLocalCheckpoint(newSyncingAllocationId.getId(), randomIntBetween(Math.toIntExact(minimumActiveLocalCheckpoint), 1024));
    barrier.await();
    assertFalse(tracker.pendingInSync.contains(newSyncingAllocationId.getId()));
    assertTrue(tracker.getTrackedLocalCheckpointForShard(newSyncingAllocationId.getId()).inSync);
    /*
         * The new in-sync allocation ID is in the in-sync set now yet the master does not know this; the allocation ID should still be in
         * the in-sync set even if we receive a cluster state update that does not reflect this.
         *
         */
    tracker.updateFromMaster(initialClusterStateVersion + 4, ids(newActiveAllocationIds), routingTable(newInitializingAllocationIds, primaryId));
    assertTrue(tracker.getTrackedLocalCheckpointForShard(newSyncingAllocationId.getId()).inSync);
    assertFalse(tracker.pendingInSync.contains(newSyncingAllocationId.getId()));
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) Matchers.not(org.hamcrest.Matchers.not) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) UNASSIGNED_SEQ_NO(org.opensearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO) Map(java.util.Map) ShardRoutingState(org.opensearch.cluster.routing.ShardRoutingState) BiConsumer(java.util.function.BiConsumer) ActionListener(org.opensearch.action.ActionListener) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) StreamInput(org.opensearch.common.io.stream.StreamInput) CyclicBarrier(java.util.concurrent.CyclicBarrier) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Collections.emptySet(java.util.Collections.emptySet) NO_OPS_PERFORMED(org.opensearch.index.seqno.SequenceNumbers.NO_OPS_PERFORMED) AllocationId(org.opensearch.cluster.routing.AllocationId) Matchers.lessThanOrEqualTo(org.hamcrest.Matchers.lessThanOrEqualTo) Collection(java.util.Collection) Set(java.util.Set) Settings(org.opensearch.common.settings.Settings) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) BytesStreamOutput(org.opensearch.common.io.stream.BytesStreamOutput) Collectors(java.util.stream.Collectors) Tuple(org.opensearch.common.collect.Tuple) LongConsumer(java.util.function.LongConsumer) ShardRouting(org.opensearch.cluster.routing.ShardRouting) ShardId(org.opensearch.index.shard.ShardId) Consumer(java.util.function.Consumer) TestShardRouting(org.opensearch.cluster.routing.TestShardRouting) AtomicLong(java.util.concurrent.atomic.AtomicLong) Sets(org.opensearch.common.util.set.Sets) List(java.util.List) Stream(java.util.stream.Stream) Matchers.hasItem(org.hamcrest.Matchers.hasItem) Randomness(org.opensearch.common.Randomness) Matchers.equalTo(org.hamcrest.Matchers.equalTo) IndexSettings(org.opensearch.index.IndexSettings) ReplicationResponse(org.opensearch.action.support.replication.ReplicationResponse) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Collections(java.util.Collections) IndexSettingsModule(org.opensearch.test.IndexSettingsModule) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) HashSet(java.util.HashSet) Collections.emptySet(java.util.Collections.emptySet) Set(java.util.Set) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) AllocationId(org.opensearch.cluster.routing.AllocationId) CyclicBarrier(java.util.concurrent.CyclicBarrier) HashSet(java.util.HashSet)

Aggregations

AllocationId (org.opensearch.cluster.routing.AllocationId)46 ShardId (org.opensearch.index.shard.ShardId)28 AtomicLong (java.util.concurrent.atomic.AtomicLong)21 IndexShardRoutingTable (org.opensearch.cluster.routing.IndexShardRoutingTable)17 HashSet (java.util.HashSet)16 BrokenBarrierException (java.util.concurrent.BrokenBarrierException)16 CyclicBarrier (java.util.concurrent.CyclicBarrier)16 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)16 ShardRouting (org.opensearch.cluster.routing.ShardRouting)15 TestShardRouting (org.opensearch.cluster.routing.TestShardRouting)14 ArrayList (java.util.ArrayList)13 Set (java.util.Set)13 Matchers.containsString (org.hamcrest.Matchers.containsString)13 Matchers.hasToString (org.hamcrest.Matchers.hasToString)13 Settings (org.opensearch.common.settings.Settings)13 IndexSettings (org.opensearch.index.IndexSettings)13 Collections (java.util.Collections)12 Collections.emptySet (java.util.Collections.emptySet)12 Collectors (java.util.stream.Collectors)12 Stream (java.util.stream.Stream)12