Search in sources :

Example 6 with AllocationId

use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.

the class ReplicationTrackerTests method testUpdateAllocationIdsFromMaster.

public void testUpdateAllocationIdsFromMaster() throws Exception {
    final long initialClusterStateVersion = randomNonNegativeLong();
    final int numberOfActiveAllocationsIds = randomIntBetween(2, 16);
    final int numberOfInitializingIds = randomIntBetween(2, 16);
    final Tuple<Set<AllocationId>, Set<AllocationId>> activeAndInitializingAllocationIds = randomActiveAndInitializingAllocationIds(numberOfActiveAllocationsIds, numberOfInitializingIds);
    final Set<AllocationId> activeAllocationIds = activeAndInitializingAllocationIds.v1();
    final Set<AllocationId> initializingIds = activeAndInitializingAllocationIds.v2();
    AllocationId primaryId = activeAllocationIds.iterator().next();
    IndexShardRoutingTable routingTable = routingTable(initializingIds, primaryId);
    final ReplicationTracker tracker = newTracker(primaryId);
    tracker.updateFromMaster(initialClusterStateVersion, ids(activeAllocationIds), routingTable);
    tracker.activatePrimaryMode(NO_OPS_PERFORMED);
    assertThat(tracker.getReplicationGroup().getInSyncAllocationIds(), equalTo(ids(activeAllocationIds)));
    assertThat(tracker.getReplicationGroup().getRoutingTable(), equalTo(routingTable));
    // first we assert that the in-sync and tracking sets are set up correctly
    assertTrue(activeAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
    assertTrue(activeAllocationIds.stream().filter(a -> a.equals(primaryId) == false).allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).getLocalCheckpoint() == SequenceNumbers.UNASSIGNED_SEQ_NO));
    assertTrue(initializingIds.stream().noneMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
    assertTrue(initializingIds.stream().filter(a -> a.equals(primaryId) == false).allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).getLocalCheckpoint() == SequenceNumbers.UNASSIGNED_SEQ_NO));
    // now we will remove some allocation IDs from these and ensure that they propagate through
    final Set<AllocationId> removingActiveAllocationIds = new HashSet<>(randomSubsetOf(activeAllocationIds));
    removingActiveAllocationIds.remove(primaryId);
    final Set<AllocationId> newActiveAllocationIds = activeAllocationIds.stream().filter(a -> !removingActiveAllocationIds.contains(a)).collect(Collectors.toSet());
    final List<AllocationId> removingInitializingAllocationIds = randomSubsetOf(initializingIds);
    final Set<AllocationId> newInitializingAllocationIds = initializingIds.stream().filter(a -> !removingInitializingAllocationIds.contains(a)).collect(Collectors.toSet());
    routingTable = routingTable(newInitializingAllocationIds, primaryId);
    tracker.updateFromMaster(initialClusterStateVersion + 1, ids(newActiveAllocationIds), routingTable);
    assertTrue(newActiveAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
    assertTrue(removingActiveAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()) == null));
    assertTrue(newInitializingAllocationIds.stream().noneMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
    assertTrue(removingInitializingAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()) == null));
    assertThat(tracker.getReplicationGroup().getInSyncAllocationIds(), equalTo(ids(Sets.difference(Sets.union(activeAllocationIds, newActiveAllocationIds), removingActiveAllocationIds))));
    assertThat(tracker.getReplicationGroup().getRoutingTable(), equalTo(routingTable));
    /*
         * Now we will add an allocation ID to each of active and initializing and ensure they propagate through. Using different lengths
         * than we have been using above ensures that we can not collide with a previous allocation ID
         */
    newInitializingAllocationIds.add(AllocationId.newInitializing());
    tracker.updateFromMaster(initialClusterStateVersion + 2, ids(newActiveAllocationIds), routingTable(newInitializingAllocationIds, primaryId));
    assertTrue(newActiveAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
    assertTrue(newActiveAllocationIds.stream().filter(a -> a.equals(primaryId) == false).allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).getLocalCheckpoint() == SequenceNumbers.UNASSIGNED_SEQ_NO));
    assertTrue(newInitializingAllocationIds.stream().noneMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).inSync));
    assertTrue(newInitializingAllocationIds.stream().allMatch(a -> tracker.getTrackedLocalCheckpointForShard(a.getId()).getLocalCheckpoint() == SequenceNumbers.UNASSIGNED_SEQ_NO));
    // the tracking allocation IDs should play no role in determining the global checkpoint
    final Map<AllocationId, Integer> activeLocalCheckpoints = newActiveAllocationIds.stream().collect(Collectors.toMap(Function.identity(), a -> randomIntBetween(1, 1024)));
    activeLocalCheckpoints.forEach((a, l) -> updateLocalCheckpoint(tracker, a.getId(), l));
    final Map<AllocationId, Integer> initializingLocalCheckpoints = newInitializingAllocationIds.stream().collect(Collectors.toMap(Function.identity(), a -> randomIntBetween(1, 1024)));
    initializingLocalCheckpoints.forEach((a, l) -> updateLocalCheckpoint(tracker, a.getId(), l));
    assertTrue(activeLocalCheckpoints.entrySet().stream().allMatch(e -> tracker.getTrackedLocalCheckpointForShard(e.getKey().getId()).getLocalCheckpoint() == e.getValue()));
    assertTrue(initializingLocalCheckpoints.entrySet().stream().allMatch(e -> tracker.getTrackedLocalCheckpointForShard(e.getKey().getId()).getLocalCheckpoint() == e.getValue()));
    final long minimumActiveLocalCheckpoint = (long) activeLocalCheckpoints.values().stream().min(Integer::compareTo).get();
    assertThat(tracker.getGlobalCheckpoint(), equalTo(minimumActiveLocalCheckpoint));
    assertThat(updatedGlobalCheckpoint.get(), equalTo(minimumActiveLocalCheckpoint));
    final long minimumInitailizingLocalCheckpoint = (long) initializingLocalCheckpoints.values().stream().min(Integer::compareTo).get();
    // now we are going to add a new allocation ID and bring it in sync which should move it to the in-sync allocation IDs
    final long localCheckpoint = randomIntBetween(0, Math.toIntExact(Math.min(minimumActiveLocalCheckpoint, minimumInitailizingLocalCheckpoint) - 1));
    // using a different length than we have been using above ensures that we can not collide with a previous allocation ID
    final AllocationId newSyncingAllocationId = AllocationId.newInitializing();
    newInitializingAllocationIds.add(newSyncingAllocationId);
    tracker.updateFromMaster(initialClusterStateVersion + 3, ids(newActiveAllocationIds), routingTable(newInitializingAllocationIds, primaryId));
    addPeerRecoveryRetentionLease(tracker, newSyncingAllocationId);
    final CyclicBarrier barrier = new CyclicBarrier(2);
    final Thread thread = new Thread(() -> {
        try {
            barrier.await();
            tracker.initiateTracking(newSyncingAllocationId.getId());
            tracker.markAllocationIdAsInSync(newSyncingAllocationId.getId(), localCheckpoint);
            barrier.await();
        } catch (final BrokenBarrierException | InterruptedException e) {
            throw new RuntimeException(e);
        }
    });
    thread.start();
    barrier.await();
    assertBusy(() -> {
        assertTrue(tracker.pendingInSync.contains(newSyncingAllocationId.getId()));
        assertFalse(tracker.getTrackedLocalCheckpointForShard(newSyncingAllocationId.getId()).inSync);
    });
    tracker.updateLocalCheckpoint(newSyncingAllocationId.getId(), randomIntBetween(Math.toIntExact(minimumActiveLocalCheckpoint), 1024));
    barrier.await();
    assertFalse(tracker.pendingInSync.contains(newSyncingAllocationId.getId()));
    assertTrue(tracker.getTrackedLocalCheckpointForShard(newSyncingAllocationId.getId()).inSync);
    /*
         * The new in-sync allocation ID is in the in-sync set now yet the master does not know this; the allocation ID should still be in
         * the in-sync set even if we receive a cluster state update that does not reflect this.
         *
         */
    tracker.updateFromMaster(initialClusterStateVersion + 4, ids(newActiveAllocationIds), routingTable(newInitializingAllocationIds, primaryId));
    assertTrue(tracker.getTrackedLocalCheckpointForShard(newSyncingAllocationId.getId()).inSync);
    assertFalse(tracker.pendingInSync.contains(newSyncingAllocationId.getId()));
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) Matchers.not(org.hamcrest.Matchers.not) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) UNASSIGNED_SEQ_NO(org.opensearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO) Map(java.util.Map) ShardRoutingState(org.opensearch.cluster.routing.ShardRoutingState) BiConsumer(java.util.function.BiConsumer) ActionListener(org.opensearch.action.ActionListener) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) StreamInput(org.opensearch.common.io.stream.StreamInput) CyclicBarrier(java.util.concurrent.CyclicBarrier) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Collections.emptySet(java.util.Collections.emptySet) NO_OPS_PERFORMED(org.opensearch.index.seqno.SequenceNumbers.NO_OPS_PERFORMED) AllocationId(org.opensearch.cluster.routing.AllocationId) Matchers.lessThanOrEqualTo(org.hamcrest.Matchers.lessThanOrEqualTo) Collection(java.util.Collection) Set(java.util.Set) Settings(org.opensearch.common.settings.Settings) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) BytesStreamOutput(org.opensearch.common.io.stream.BytesStreamOutput) Collectors(java.util.stream.Collectors) Tuple(org.opensearch.common.collect.Tuple) LongConsumer(java.util.function.LongConsumer) ShardRouting(org.opensearch.cluster.routing.ShardRouting) ShardId(org.opensearch.index.shard.ShardId) Consumer(java.util.function.Consumer) TestShardRouting(org.opensearch.cluster.routing.TestShardRouting) AtomicLong(java.util.concurrent.atomic.AtomicLong) Sets(org.opensearch.common.util.set.Sets) List(java.util.List) Stream(java.util.stream.Stream) Matchers.hasItem(org.hamcrest.Matchers.hasItem) Randomness(org.opensearch.common.Randomness) Matchers.equalTo(org.hamcrest.Matchers.equalTo) IndexSettings(org.opensearch.index.IndexSettings) ReplicationResponse(org.opensearch.action.support.replication.ReplicationResponse) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Collections(java.util.Collections) IndexSettingsModule(org.opensearch.test.IndexSettingsModule) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) HashSet(java.util.HashSet) Collections.emptySet(java.util.Collections.emptySet) Set(java.util.Set) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) AllocationId(org.opensearch.cluster.routing.AllocationId) CyclicBarrier(java.util.concurrent.CyclicBarrier) HashSet(java.util.HashSet)

Example 7 with AllocationId

use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.

the class ReplicationTrackerTests method testUpdateGlobalCheckpointOnReplica.

public void testUpdateGlobalCheckpointOnReplica() {
    final AllocationId active = AllocationId.newInitializing();
    final ReplicationTracker tracker = newTracker(active);
    final long globalCheckpoint = randomLongBetween(NO_OPS_PERFORMED, Long.MAX_VALUE - 1);
    tracker.updateGlobalCheckpointOnReplica(globalCheckpoint, "test");
    assertThat(updatedGlobalCheckpoint.get(), equalTo(globalCheckpoint));
    final long nonUpdate = randomLongBetween(NO_OPS_PERFORMED, globalCheckpoint);
    updatedGlobalCheckpoint.set(UNASSIGNED_SEQ_NO);
    tracker.updateGlobalCheckpointOnReplica(nonUpdate, "test");
    assertThat(updatedGlobalCheckpoint.get(), equalTo(UNASSIGNED_SEQ_NO));
    final long update = randomLongBetween(globalCheckpoint, Long.MAX_VALUE);
    tracker.updateGlobalCheckpointOnReplica(update, "test");
    assertThat(updatedGlobalCheckpoint.get(), equalTo(update));
}
Also used : AllocationId(org.opensearch.cluster.routing.AllocationId)

Example 8 with AllocationId

use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.

the class ReplicationTrackerTests method testInSyncIdsAreRemovedIfNotValidatedByMaster.

public void testInSyncIdsAreRemovedIfNotValidatedByMaster() {
    final long initialClusterStateVersion = randomNonNegativeLong();
    final Map<AllocationId, Long> activeToStay = randomAllocationsWithLocalCheckpoints(1, 5);
    final Map<AllocationId, Long> initializingToStay = randomAllocationsWithLocalCheckpoints(1, 5);
    final Map<AllocationId, Long> activeToBeRemoved = randomAllocationsWithLocalCheckpoints(1, 5);
    final Map<AllocationId, Long> initializingToBeRemoved = randomAllocationsWithLocalCheckpoints(1, 5);
    final Set<AllocationId> active = Sets.union(activeToStay.keySet(), activeToBeRemoved.keySet());
    final Set<AllocationId> initializing = Sets.union(initializingToStay.keySet(), initializingToBeRemoved.keySet());
    final Map<AllocationId, Long> allocations = new HashMap<>();
    final AllocationId primaryId = active.iterator().next();
    if (activeToBeRemoved.containsKey(primaryId)) {
        activeToStay.put(primaryId, activeToBeRemoved.remove(primaryId));
    }
    allocations.putAll(activeToStay);
    if (randomBoolean()) {
        allocations.putAll(activeToBeRemoved);
    }
    allocations.putAll(initializingToStay);
    if (randomBoolean()) {
        allocations.putAll(initializingToBeRemoved);
    }
    final ReplicationTracker tracker = newTracker(primaryId);
    tracker.updateFromMaster(initialClusterStateVersion, ids(active), routingTable(initializing, primaryId));
    tracker.activatePrimaryMode(NO_OPS_PERFORMED);
    if (randomBoolean()) {
        initializingToStay.keySet().forEach(k -> markAsTrackingAndInSyncQuietly(tracker, k.getId(), NO_OPS_PERFORMED));
    } else {
        initializing.forEach(k -> markAsTrackingAndInSyncQuietly(tracker, k.getId(), NO_OPS_PERFORMED));
    }
    if (randomBoolean()) {
        allocations.forEach((aid, localCP) -> updateLocalCheckpoint(tracker, aid.getId(), localCP));
    }
    // now remove shards
    if (randomBoolean()) {
        tracker.updateFromMaster(initialClusterStateVersion + 1, ids(activeToStay.keySet()), routingTable(initializingToStay.keySet(), primaryId));
        allocations.forEach((aid, ckp) -> updateLocalCheckpoint(tracker, aid.getId(), ckp + 10L));
    } else {
        allocations.forEach((aid, ckp) -> updateLocalCheckpoint(tracker, aid.getId(), ckp + 10L));
        tracker.updateFromMaster(initialClusterStateVersion + 2, ids(activeToStay.keySet()), routingTable(initializingToStay.keySet(), primaryId));
    }
    final long checkpoint = Stream.concat(activeToStay.values().stream(), initializingToStay.values().stream()).min(Long::compare).get() + // we added 10 to make sure it's advanced in the second time
    10;
    assertThat(tracker.getGlobalCheckpoint(), equalTo(checkpoint));
}
Also used : HashMap(java.util.HashMap) AllocationId(org.opensearch.cluster.routing.AllocationId) AtomicLong(java.util.concurrent.atomic.AtomicLong)

Example 9 with AllocationId

use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.

the class ReplicationTrackerTests method testPeerRecoveryRetentionLeaseCreationAndRenewal.

public void testPeerRecoveryRetentionLeaseCreationAndRenewal() {
    final int numberOfActiveAllocationsIds = randomIntBetween(1, 8);
    final int numberOfInitializingIds = randomIntBetween(0, 8);
    final Tuple<Set<AllocationId>, Set<AllocationId>> activeAndInitializingAllocationIds = randomActiveAndInitializingAllocationIds(numberOfActiveAllocationsIds, numberOfInitializingIds);
    final Set<AllocationId> activeAllocationIds = activeAndInitializingAllocationIds.v1();
    final Set<AllocationId> initializingAllocationIds = activeAndInitializingAllocationIds.v2();
    final AllocationId primaryId = activeAllocationIds.iterator().next();
    final long initialClusterStateVersion = randomNonNegativeLong();
    final AtomicLong currentTimeMillis = new AtomicLong(0L);
    final ReplicationTracker tracker = newTracker(primaryId, updatedGlobalCheckpoint::set, currentTimeMillis::get);
    final long retentionLeaseExpiryTimeMillis = tracker.indexSettings().getRetentionLeaseMillis();
    final long peerRecoveryRetentionLeaseRenewalTimeMillis = retentionLeaseExpiryTimeMillis / 2;
    final long maximumTestTimeMillis = 13 * retentionLeaseExpiryTimeMillis;
    final long testStartTimeMillis = randomLongBetween(0L, Long.MAX_VALUE - maximumTestTimeMillis);
    currentTimeMillis.set(testStartTimeMillis);
    final Function<AllocationId, RetentionLease> retentionLeaseFromAllocationId = allocationId -> new RetentionLease(ReplicationTracker.getPeerRecoveryRetentionLeaseId(nodeIdFromAllocationId(allocationId)), 0L, currentTimeMillis.get(), ReplicationTracker.PEER_RECOVERY_RETENTION_LEASE_SOURCE);
    final List<RetentionLease> initialLeases = new ArrayList<>();
    if (randomBoolean()) {
        initialLeases.add(retentionLeaseFromAllocationId.apply(primaryId));
    }
    for (final AllocationId replicaId : initializingAllocationIds) {
        if (randomBoolean()) {
            initialLeases.add(retentionLeaseFromAllocationId.apply(replicaId));
        }
    }
    for (int i = randomIntBetween(0, 5); i > 0; i--) {
        initialLeases.add(retentionLeaseFromAllocationId.apply(AllocationId.newInitializing()));
    }
    tracker.updateRetentionLeasesOnReplica(new RetentionLeases(randomNonNegativeLong(), randomNonNegativeLong(), initialLeases));
    IndexShardRoutingTable routingTable = routingTable(initializingAllocationIds, primaryId);
    tracker.updateFromMaster(initialClusterStateVersion, ids(activeAllocationIds), routingTable);
    tracker.activatePrimaryMode(NO_OPS_PERFORMED);
    assertTrue("primary's retention lease should exist", tracker.getRetentionLeases().contains(ReplicationTracker.getPeerRecoveryRetentionLeaseId(routingTable.primaryShard())));
    final Consumer<Runnable> assertAsTimePasses = assertion -> {
        final long startTime = currentTimeMillis.get();
        while (currentTimeMillis.get() < startTime + retentionLeaseExpiryTimeMillis * 2) {
            currentTimeMillis.addAndGet(randomLongBetween(0L, retentionLeaseExpiryTimeMillis * 2));
            tracker.renewPeerRecoveryRetentionLeases();
            tracker.getRetentionLeases(true);
            assertion.run();
        }
    };
    assertAsTimePasses.accept(() -> {
        // Leases for assigned replicas do not expire
        final RetentionLeases retentionLeases = tracker.getRetentionLeases();
        for (final AllocationId replicaId : initializingAllocationIds) {
            final String leaseId = retentionLeaseFromAllocationId.apply(replicaId).id();
            assertTrue("should not have removed lease for " + replicaId + " in " + retentionLeases, initialLeases.stream().noneMatch(l -> l.id().equals(leaseId)) || retentionLeases.contains(leaseId));
        }
    });
    // Leases that don't correspond to assigned replicas, however, are expired by this time.
    final Set<String> expectedLeaseIds = Stream.concat(Stream.of(primaryId), initializingAllocationIds.stream()).map(allocationId -> retentionLeaseFromAllocationId.apply(allocationId).id()).collect(Collectors.toSet());
    for (final RetentionLease retentionLease : tracker.getRetentionLeases().leases()) {
        assertThat(expectedLeaseIds, hasItem(retentionLease.id()));
    }
    for (AllocationId replicaId : initializingAllocationIds) {
        markAsTrackingAndInSyncQuietly(tracker, replicaId.getId(), NO_OPS_PERFORMED);
    }
    assertThat(tracker.getRetentionLeases().leases().stream().map(RetentionLease::id).collect(Collectors.toSet()), equalTo(expectedLeaseIds));
    assertAsTimePasses.accept(() -> {
        // Leases still don't expire
        assertThat(tracker.getRetentionLeases().leases().stream().map(RetentionLease::id).collect(Collectors.toSet()), equalTo(expectedLeaseIds));
        // Also leases are renewed before reaching half the expiry time
        // noinspection OptionalGetWithoutIsPresent
        assertThat(tracker.getRetentionLeases() + " renewed before too long", tracker.getRetentionLeases().leases().stream().mapToLong(RetentionLease::timestamp).min().getAsLong(), greaterThanOrEqualTo(currentTimeMillis.get() - peerRecoveryRetentionLeaseRenewalTimeMillis));
    });
    IndexShardRoutingTable.Builder routingTableBuilder = new IndexShardRoutingTable.Builder(routingTable);
    for (ShardRouting replicaShard : routingTable.replicaShards()) {
        routingTableBuilder.removeShard(replicaShard);
        routingTableBuilder.addShard(replicaShard.moveToStarted());
    }
    routingTable = routingTableBuilder.build();
    activeAllocationIds.addAll(initializingAllocationIds);
    tracker.updateFromMaster(initialClusterStateVersion + randomLongBetween(1, 10), ids(activeAllocationIds), routingTable);
    assertAsTimePasses.accept(() -> {
        // Leases still don't expire
        assertThat(tracker.getRetentionLeases().leases().stream().map(RetentionLease::id).collect(Collectors.toSet()), equalTo(expectedLeaseIds));
        // ... and any extra peer recovery retention leases are expired immediately since the shard is fully active
        tracker.addPeerRecoveryRetentionLease(randomAlphaOfLength(10), randomNonNegativeLong(), ActionListener.wrap(() -> {
        }));
    });
    tracker.renewPeerRecoveryRetentionLeases();
    assertTrue("expired extra lease", tracker.getRetentionLeases(true).v1());
    final AllocationId advancingAllocationId = initializingAllocationIds.isEmpty() || rarely() ? primaryId : randomFrom(initializingAllocationIds);
    final String advancingLeaseId = retentionLeaseFromAllocationId.apply(advancingAllocationId).id();
    final long initialGlobalCheckpoint = Math.max(NO_OPS_PERFORMED, tracker.getTrackedLocalCheckpointForShard(advancingAllocationId.getId()).globalCheckpoint);
    assertThat(tracker.getRetentionLeases().get(advancingLeaseId).retainingSequenceNumber(), equalTo(initialGlobalCheckpoint + 1));
    final long newGlobalCheckpoint = initialGlobalCheckpoint + randomLongBetween(1, 1000);
    tracker.updateGlobalCheckpointForShard(advancingAllocationId.getId(), newGlobalCheckpoint);
    tracker.renewPeerRecoveryRetentionLeases();
    assertThat("lease was renewed because the shard advanced its global checkpoint", tracker.getRetentionLeases().get(advancingLeaseId).retainingSequenceNumber(), equalTo(newGlobalCheckpoint + 1));
    final long initialVersion = tracker.getRetentionLeases().version();
    tracker.renewPeerRecoveryRetentionLeases();
    assertThat("immediate renewal is a no-op", tracker.getRetentionLeases().version(), equalTo(initialVersion));
    // noinspection OptionalGetWithoutIsPresent
    final long millisUntilFirstRenewal = tracker.getRetentionLeases().leases().stream().mapToLong(RetentionLease::timestamp).min().getAsLong() + peerRecoveryRetentionLeaseRenewalTimeMillis - currentTimeMillis.get();
    if (millisUntilFirstRenewal != 0) {
        final long shorterThanRenewalTime = randomLongBetween(0L, millisUntilFirstRenewal - 1);
        currentTimeMillis.addAndGet(shorterThanRenewalTime);
        tracker.renewPeerRecoveryRetentionLeases();
        assertThat("renewal is a no-op after a short time", tracker.getRetentionLeases().version(), equalTo(initialVersion));
        currentTimeMillis.addAndGet(millisUntilFirstRenewal - shorterThanRenewalTime);
    }
    tracker.renewPeerRecoveryRetentionLeases();
    assertThat("renewal happens after a sufficiently long time", tracker.getRetentionLeases().version(), greaterThan(initialVersion));
    assertTrue("all leases were renewed", tracker.getRetentionLeases().leases().stream().allMatch(l -> l.timestamp() == currentTimeMillis.get()));
    assertThat("test ran for too long, potentially leading to overflow", currentTimeMillis.get(), lessThanOrEqualTo(testStartTimeMillis + maximumTestTimeMillis));
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) Matchers.not(org.hamcrest.Matchers.not) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) UNASSIGNED_SEQ_NO(org.opensearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO) Map(java.util.Map) ShardRoutingState(org.opensearch.cluster.routing.ShardRoutingState) BiConsumer(java.util.function.BiConsumer) ActionListener(org.opensearch.action.ActionListener) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) StreamInput(org.opensearch.common.io.stream.StreamInput) CyclicBarrier(java.util.concurrent.CyclicBarrier) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Collections.emptySet(java.util.Collections.emptySet) NO_OPS_PERFORMED(org.opensearch.index.seqno.SequenceNumbers.NO_OPS_PERFORMED) AllocationId(org.opensearch.cluster.routing.AllocationId) Matchers.lessThanOrEqualTo(org.hamcrest.Matchers.lessThanOrEqualTo) Collection(java.util.Collection) Set(java.util.Set) Settings(org.opensearch.common.settings.Settings) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) BytesStreamOutput(org.opensearch.common.io.stream.BytesStreamOutput) Collectors(java.util.stream.Collectors) Tuple(org.opensearch.common.collect.Tuple) LongConsumer(java.util.function.LongConsumer) ShardRouting(org.opensearch.cluster.routing.ShardRouting) ShardId(org.opensearch.index.shard.ShardId) Consumer(java.util.function.Consumer) TestShardRouting(org.opensearch.cluster.routing.TestShardRouting) AtomicLong(java.util.concurrent.atomic.AtomicLong) Sets(org.opensearch.common.util.set.Sets) List(java.util.List) Stream(java.util.stream.Stream) Matchers.hasItem(org.hamcrest.Matchers.hasItem) Randomness(org.opensearch.common.Randomness) Matchers.equalTo(org.hamcrest.Matchers.equalTo) IndexSettings(org.opensearch.index.IndexSettings) ReplicationResponse(org.opensearch.action.support.replication.ReplicationResponse) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Collections(java.util.Collections) IndexSettingsModule(org.opensearch.test.IndexSettingsModule) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) HashSet(java.util.HashSet) Collections.emptySet(java.util.Collections.emptySet) Set(java.util.Set) AllocationId(org.opensearch.cluster.routing.AllocationId) ArrayList(java.util.ArrayList) AtomicLong(java.util.concurrent.atomic.AtomicLong) ShardRouting(org.opensearch.cluster.routing.ShardRouting) TestShardRouting(org.opensearch.cluster.routing.TestShardRouting)

Example 10 with AllocationId

use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.

the class ReplicationTrackerTests method testWaitForAllocationIdToBeInSync.

public void testWaitForAllocationIdToBeInSync() throws Exception {
    final int localCheckpoint = randomIntBetween(1, 32);
    final int globalCheckpoint = randomIntBetween(localCheckpoint + 1, 64);
    final CyclicBarrier barrier = new CyclicBarrier(2);
    final AtomicBoolean complete = new AtomicBoolean();
    final AllocationId inSyncAllocationId = AllocationId.newInitializing();
    final AllocationId trackingAllocationId = AllocationId.newInitializing();
    final ReplicationTracker tracker = newTracker(inSyncAllocationId);
    final long clusterStateVersion = randomNonNegativeLong();
    tracker.updateFromMaster(clusterStateVersion, Collections.singleton(inSyncAllocationId.getId()), routingTable(Collections.singleton(trackingAllocationId), inSyncAllocationId));
    tracker.activatePrimaryMode(globalCheckpoint);
    addPeerRecoveryRetentionLease(tracker, trackingAllocationId);
    final Thread thread = new Thread(() -> {
        try {
            // synchronize starting with the test thread
            barrier.await();
            tracker.initiateTracking(trackingAllocationId.getId());
            tracker.markAllocationIdAsInSync(trackingAllocationId.getId(), localCheckpoint);
            complete.set(true);
            // synchronize with the test thread checking if we are no longer waiting
            barrier.await();
        } catch (final BrokenBarrierException | InterruptedException e) {
            throw new RuntimeException(e);
        }
    });
    thread.start();
    // synchronize starting with the waiting thread
    barrier.await();
    final List<Integer> elements = IntStream.rangeClosed(0, globalCheckpoint - 1).boxed().collect(Collectors.toList());
    Randomness.shuffle(elements);
    for (int i = 0; i < elements.size(); i++) {
        updateLocalCheckpoint(tracker, trackingAllocationId.getId(), elements.get(i));
        assertFalse(complete.get());
        assertFalse(tracker.getTrackedLocalCheckpointForShard(trackingAllocationId.getId()).inSync);
        assertBusy(() -> assertTrue(tracker.pendingInSync.contains(trackingAllocationId.getId())));
    }
    if (randomBoolean()) {
        // normal path, shard catches up
        updateLocalCheckpoint(tracker, trackingAllocationId.getId(), randomIntBetween(globalCheckpoint, 64));
        // synchronize with the waiting thread to mark that it is complete
        barrier.await();
        assertTrue(complete.get());
        assertTrue(tracker.getTrackedLocalCheckpointForShard(trackingAllocationId.getId()).inSync);
    } else {
        // master changes its mind and cancels the allocation
        tracker.updateFromMaster(clusterStateVersion + 1, Collections.singleton(inSyncAllocationId.getId()), routingTable(emptySet(), inSyncAllocationId));
        barrier.await();
        assertTrue(complete.get());
        assertNull(tracker.getTrackedLocalCheckpointForShard(trackingAllocationId.getId()));
    }
    assertFalse(tracker.pendingInSync.contains(trackingAllocationId.getId()));
    thread.join();
}
Also used : BrokenBarrierException(java.util.concurrent.BrokenBarrierException) AllocationId(org.opensearch.cluster.routing.AllocationId) CyclicBarrier(java.util.concurrent.CyclicBarrier) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean)

Aggregations

AllocationId (org.opensearch.cluster.routing.AllocationId)47 ShardId (org.opensearch.index.shard.ShardId)29 AtomicLong (java.util.concurrent.atomic.AtomicLong)21 IndexShardRoutingTable (org.opensearch.cluster.routing.IndexShardRoutingTable)17 HashSet (java.util.HashSet)16 BrokenBarrierException (java.util.concurrent.BrokenBarrierException)16 CyclicBarrier (java.util.concurrent.CyclicBarrier)16 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)16 ShardRouting (org.opensearch.cluster.routing.ShardRouting)16 TestShardRouting (org.opensearch.cluster.routing.TestShardRouting)14 ArrayList (java.util.ArrayList)13 Set (java.util.Set)13 Matchers.containsString (org.hamcrest.Matchers.containsString)13 Matchers.hasToString (org.hamcrest.Matchers.hasToString)13 ShardRoutingState (org.opensearch.cluster.routing.ShardRoutingState)13 Settings (org.opensearch.common.settings.Settings)13 IndexSettings (org.opensearch.index.IndexSettings)13 IOException (java.io.IOException)12 Collections (java.util.Collections)12 Collections.emptySet (java.util.Collections.emptySet)12