Search in sources :

Example 46 with AllocationId

use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.

the class ReplicationTrackerTests method testRaceUpdatingGlobalCheckpoint.

/**
 * If we do not update the global checkpoint in {@link ReplicationTracker#markAllocationIdAsInSync(String, long)} after adding the
 * allocation ID to the in-sync set and removing it from pending, the local checkpoint update that freed the thread waiting for the
 * local checkpoint to advance could miss updating the global checkpoint in a race if the waiting thread did not add the allocation
 * ID to the in-sync set and remove it from the pending set before the local checkpoint updating thread executed the global checkpoint
 * update. This test fails without an additional call to {@code ReplicationTracker#updateGlobalCheckpointOnPrimary()} after
 * removing the allocation ID from the pending set in {@link ReplicationTracker#markAllocationIdAsInSync(String, long)} (even if a
 * call is added after notifying all waiters in {@link ReplicationTracker#updateLocalCheckpoint(String, long)}).
 *
 * @throws InterruptedException   if the main test thread was interrupted while waiting
 * @throws BrokenBarrierException if the barrier was broken while the main test thread was waiting
 */
public void testRaceUpdatingGlobalCheckpoint() throws InterruptedException, BrokenBarrierException {
    final AllocationId active = AllocationId.newInitializing();
    final AllocationId initializing = AllocationId.newInitializing();
    final CyclicBarrier barrier = new CyclicBarrier(4);
    final int activeLocalCheckpoint = randomIntBetween(0, Integer.MAX_VALUE - 1);
    final ReplicationTracker tracker = newTracker(active);
    tracker.updateFromMaster(randomNonNegativeLong(), Collections.singleton(active.getId()), routingTable(Collections.singleton(initializing), active));
    tracker.activatePrimaryMode(activeLocalCheckpoint);
    addPeerRecoveryRetentionLease(tracker, initializing);
    final int nextActiveLocalCheckpoint = randomIntBetween(activeLocalCheckpoint + 1, Integer.MAX_VALUE);
    final Thread activeThread = new Thread(() -> {
        try {
            barrier.await();
        } catch (final BrokenBarrierException | InterruptedException e) {
            throw new RuntimeException(e);
        }
        tracker.updateLocalCheckpoint(active.getId(), nextActiveLocalCheckpoint);
    });
    final int initializingLocalCheckpoint = randomIntBetween(0, nextActiveLocalCheckpoint - 1);
    final Thread initializingThread = new Thread(() -> {
        try {
            barrier.await();
        } catch (final BrokenBarrierException | InterruptedException e) {
            throw new RuntimeException(e);
        }
        tracker.updateLocalCheckpoint(initializing.getId(), nextActiveLocalCheckpoint);
    });
    final Thread markingThread = new Thread(() -> {
        try {
            barrier.await();
            tracker.initiateTracking(initializing.getId());
            tracker.markAllocationIdAsInSync(initializing.getId(), initializingLocalCheckpoint - 1);
        } catch (final BrokenBarrierException | InterruptedException e) {
            throw new RuntimeException(e);
        }
    });
    activeThread.start();
    initializingThread.start();
    markingThread.start();
    barrier.await();
    activeThread.join();
    initializingThread.join();
    markingThread.join();
    assertThat(tracker.getGlobalCheckpoint(), equalTo((long) nextActiveLocalCheckpoint));
}
Also used : BrokenBarrierException(java.util.concurrent.BrokenBarrierException) AllocationId(org.opensearch.cluster.routing.AllocationId) CyclicBarrier(java.util.concurrent.CyclicBarrier)

Example 47 with AllocationId

use of org.opensearch.cluster.routing.AllocationId in project OpenSearch by opensearch-project.

the class ReplicationTrackerTests method testPrimaryContextHandoff.

public void testPrimaryContextHandoff() throws IOException {
    final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY);
    final ShardId shardId = new ShardId("test", "_na_", 0);
    FakeClusterState clusterState = initialState();
    final AllocationId aId = clusterState.routingTable.primaryShard().allocationId();
    final LongConsumer onUpdate = updatedGlobalCheckpoint -> {
    };
    final long primaryTerm = randomNonNegativeLong();
    final long globalCheckpoint = UNASSIGNED_SEQ_NO;
    final BiConsumer<RetentionLeases, ActionListener<ReplicationResponse>> onNewRetentionLease = (leases, listener) -> {
    };
    ReplicationTracker oldPrimary = new ReplicationTracker(shardId, aId.getId(), indexSettings, primaryTerm, globalCheckpoint, onUpdate, () -> 0L, onNewRetentionLease, OPS_BASED_RECOVERY_ALWAYS_REASONABLE);
    ReplicationTracker newPrimary = new ReplicationTracker(shardId, aId.getRelocationId(), indexSettings, primaryTerm, globalCheckpoint, onUpdate, () -> 0L, onNewRetentionLease, OPS_BASED_RECOVERY_ALWAYS_REASONABLE);
    Set<String> allocationIds = new HashSet<>(Arrays.asList(oldPrimary.shardAllocationId, newPrimary.shardAllocationId));
    clusterState.apply(oldPrimary);
    clusterState.apply(newPrimary);
    oldPrimary.activatePrimaryMode(randomIntBetween(Math.toIntExact(NO_OPS_PERFORMED), 10));
    addPeerRecoveryRetentionLease(oldPrimary, newPrimary.shardAllocationId);
    newPrimary.updateRetentionLeasesOnReplica(oldPrimary.getRetentionLeases());
    final int numUpdates = randomInt(10);
    for (int i = 0; i < numUpdates; i++) {
        if (rarely()) {
            clusterState = randomUpdateClusterState(allocationIds, clusterState);
            clusterState.apply(oldPrimary);
            clusterState.apply(newPrimary);
        }
        if (randomBoolean()) {
            randomLocalCheckpointUpdate(oldPrimary);
        }
        if (randomBoolean()) {
            randomMarkInSync(oldPrimary, newPrimary);
        }
    }
    // simulate transferring the global checkpoint to the new primary after finalizing recovery before the handoff
    markAsTrackingAndInSyncQuietly(oldPrimary, newPrimary.shardAllocationId, Math.max(SequenceNumbers.NO_OPS_PERFORMED, oldPrimary.getGlobalCheckpoint() + randomInt(5)));
    oldPrimary.updateGlobalCheckpointForShard(newPrimary.shardAllocationId, oldPrimary.getGlobalCheckpoint());
    ReplicationTracker.PrimaryContext primaryContext = oldPrimary.startRelocationHandoff(newPrimary.shardAllocationId);
    if (randomBoolean()) {
        // cluster state update after primary context handoff
        if (randomBoolean()) {
            clusterState = randomUpdateClusterState(allocationIds, clusterState);
            clusterState.apply(oldPrimary);
            clusterState.apply(newPrimary);
        }
        // abort handoff, check that we can continue updates and retry handoff
        oldPrimary.abortRelocationHandoff();
        if (rarely()) {
            clusterState = randomUpdateClusterState(allocationIds, clusterState);
            clusterState.apply(oldPrimary);
            clusterState.apply(newPrimary);
        }
        if (randomBoolean()) {
            randomLocalCheckpointUpdate(oldPrimary);
        }
        if (randomBoolean()) {
            randomMarkInSync(oldPrimary, newPrimary);
        }
        // do another handoff
        primaryContext = oldPrimary.startRelocationHandoff(newPrimary.shardAllocationId);
    }
    // send primary context through the wire
    BytesStreamOutput output = new BytesStreamOutput();
    primaryContext.writeTo(output);
    StreamInput streamInput = output.bytes().streamInput();
    primaryContext = new ReplicationTracker.PrimaryContext(streamInput);
    switch(randomInt(3)) {
        case 0:
            {
                // apply cluster state update on old primary while primary context is being transferred
                clusterState = randomUpdateClusterState(allocationIds, clusterState);
                clusterState.apply(oldPrimary);
                // activate new primary
                newPrimary.activateWithPrimaryContext(primaryContext);
                // apply cluster state update on new primary so that the states on old and new primary are comparable
                clusterState.apply(newPrimary);
                break;
            }
        case 1:
            {
                // apply cluster state update on new primary while primary context is being transferred
                clusterState = randomUpdateClusterState(allocationIds, clusterState);
                clusterState.apply(newPrimary);
                // activate new primary
                newPrimary.activateWithPrimaryContext(primaryContext);
                // apply cluster state update on old primary so that the states on old and new primary are comparable
                clusterState.apply(oldPrimary);
                break;
            }
        case 2:
            {
                // apply cluster state update on both copies while primary context is being transferred
                clusterState = randomUpdateClusterState(allocationIds, clusterState);
                clusterState.apply(oldPrimary);
                clusterState.apply(newPrimary);
                newPrimary.activateWithPrimaryContext(primaryContext);
                break;
            }
        case 3:
            {
                // no cluster state update
                newPrimary.activateWithPrimaryContext(primaryContext);
                break;
            }
    }
    assertTrue(oldPrimary.primaryMode);
    assertTrue(newPrimary.primaryMode);
    assertThat(newPrimary.appliedClusterStateVersion, equalTo(oldPrimary.appliedClusterStateVersion));
    /*
         * We can not assert on shared knowledge of the global checkpoint between the old primary and the new primary as the new primary
         * will update its global checkpoint state without the old primary learning of it, and the old primary could have updated its
         * global checkpoint state after the primary context was transferred.
         */
    Map<String, ReplicationTracker.CheckpointState> oldPrimaryCheckpointsCopy = new HashMap<>(oldPrimary.checkpoints);
    oldPrimaryCheckpointsCopy.remove(oldPrimary.shardAllocationId);
    oldPrimaryCheckpointsCopy.remove(newPrimary.shardAllocationId);
    Map<String, ReplicationTracker.CheckpointState> newPrimaryCheckpointsCopy = new HashMap<>(newPrimary.checkpoints);
    newPrimaryCheckpointsCopy.remove(oldPrimary.shardAllocationId);
    newPrimaryCheckpointsCopy.remove(newPrimary.shardAllocationId);
    assertThat(newPrimaryCheckpointsCopy, equalTo(oldPrimaryCheckpointsCopy));
    // we can however assert that shared knowledge of the local checkpoint and in-sync status is equal
    assertThat(oldPrimary.checkpoints.get(oldPrimary.shardAllocationId).localCheckpoint, equalTo(newPrimary.checkpoints.get(oldPrimary.shardAllocationId).localCheckpoint));
    assertThat(oldPrimary.checkpoints.get(newPrimary.shardAllocationId).localCheckpoint, equalTo(newPrimary.checkpoints.get(newPrimary.shardAllocationId).localCheckpoint));
    assertThat(oldPrimary.checkpoints.get(oldPrimary.shardAllocationId).inSync, equalTo(newPrimary.checkpoints.get(oldPrimary.shardAllocationId).inSync));
    assertThat(oldPrimary.checkpoints.get(newPrimary.shardAllocationId).inSync, equalTo(newPrimary.checkpoints.get(newPrimary.shardAllocationId).inSync));
    assertThat(newPrimary.getGlobalCheckpoint(), equalTo(oldPrimary.getGlobalCheckpoint()));
    assertThat(newPrimary.routingTable, equalTo(oldPrimary.routingTable));
    assertThat(newPrimary.replicationGroup, equalTo(oldPrimary.replicationGroup));
    assertFalse(oldPrimary.relocated);
    oldPrimary.completeRelocationHandoff();
    assertFalse(oldPrimary.primaryMode);
    assertTrue(oldPrimary.relocated);
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) Matchers.not(org.hamcrest.Matchers.not) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) UNASSIGNED_SEQ_NO(org.opensearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO) Map(java.util.Map) ShardRoutingState(org.opensearch.cluster.routing.ShardRoutingState) BiConsumer(java.util.function.BiConsumer) ActionListener(org.opensearch.action.ActionListener) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) StreamInput(org.opensearch.common.io.stream.StreamInput) CyclicBarrier(java.util.concurrent.CyclicBarrier) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Collections.emptySet(java.util.Collections.emptySet) NO_OPS_PERFORMED(org.opensearch.index.seqno.SequenceNumbers.NO_OPS_PERFORMED) AllocationId(org.opensearch.cluster.routing.AllocationId) Matchers.lessThanOrEqualTo(org.hamcrest.Matchers.lessThanOrEqualTo) Collection(java.util.Collection) Set(java.util.Set) Settings(org.opensearch.common.settings.Settings) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) BytesStreamOutput(org.opensearch.common.io.stream.BytesStreamOutput) Collectors(java.util.stream.Collectors) Tuple(org.opensearch.common.collect.Tuple) LongConsumer(java.util.function.LongConsumer) ShardRouting(org.opensearch.cluster.routing.ShardRouting) ShardId(org.opensearch.index.shard.ShardId) Consumer(java.util.function.Consumer) TestShardRouting(org.opensearch.cluster.routing.TestShardRouting) AtomicLong(java.util.concurrent.atomic.AtomicLong) Sets(org.opensearch.common.util.set.Sets) List(java.util.List) Stream(java.util.stream.Stream) Matchers.hasItem(org.hamcrest.Matchers.hasItem) Randomness(org.opensearch.common.Randomness) Matchers.equalTo(org.hamcrest.Matchers.equalTo) IndexSettings(org.opensearch.index.IndexSettings) ReplicationResponse(org.opensearch.action.support.replication.ReplicationResponse) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Collections(java.util.Collections) IndexSettingsModule(org.opensearch.test.IndexSettingsModule) HashMap(java.util.HashMap) IndexSettings(org.opensearch.index.IndexSettings) AllocationId(org.opensearch.cluster.routing.AllocationId) BytesStreamOutput(org.opensearch.common.io.stream.BytesStreamOutput) ShardId(org.opensearch.index.shard.ShardId) LongConsumer(java.util.function.LongConsumer) ActionListener(org.opensearch.action.ActionListener) StreamInput(org.opensearch.common.io.stream.StreamInput) HashSet(java.util.HashSet)

Aggregations

AllocationId (org.opensearch.cluster.routing.AllocationId)47 ShardId (org.opensearch.index.shard.ShardId)29 AtomicLong (java.util.concurrent.atomic.AtomicLong)21 IndexShardRoutingTable (org.opensearch.cluster.routing.IndexShardRoutingTable)17 HashSet (java.util.HashSet)16 BrokenBarrierException (java.util.concurrent.BrokenBarrierException)16 CyclicBarrier (java.util.concurrent.CyclicBarrier)16 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)16 ShardRouting (org.opensearch.cluster.routing.ShardRouting)16 TestShardRouting (org.opensearch.cluster.routing.TestShardRouting)14 ArrayList (java.util.ArrayList)13 Set (java.util.Set)13 Matchers.containsString (org.hamcrest.Matchers.containsString)13 Matchers.hasToString (org.hamcrest.Matchers.hasToString)13 ShardRoutingState (org.opensearch.cluster.routing.ShardRoutingState)13 Settings (org.opensearch.common.settings.Settings)13 IndexSettings (org.opensearch.index.IndexSettings)13 IOException (java.io.IOException)12 Collections (java.util.Collections)12 Collections.emptySet (java.util.Collections.emptySet)12