Search in sources :

Example 6 with MockHelixParticipant

use of com.github.ambry.clustermap.MockHelixParticipant in project ambry by linkedin.

the class ReplicationTest method replicaFromInactiveToOfflineTest.

/**
 * Test INACTIVE -> OFFLINE transition on existing replica (both success and failure cases)
 */
@Test
public void replicaFromInactiveToOfflineTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    MockHelixParticipant.metricRegistry = new MetricRegistry();
    MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
    Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
    StorageManager storageManager = managers.getFirst();
    MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
    // 1. test replica not found case
    try {
        mockHelixParticipant.onPartitionBecomeOfflineFromInactive("-1");
        fail("should fail because of invalid partition");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", ReplicaNotFound, e.getErrorCode());
    }
    // 2. test store not started case
    PartitionId existingPartition = replicationManager.partitionToPartitionInfo.keySet().iterator().next();
    storageManager.shutdownBlobStore(existingPartition);
    try {
        mockHelixParticipant.onPartitionBecomeOfflineFromInactive(existingPartition.toPathString());
        fail("should fail because store is not started");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
    }
    storageManager.startBlobStore(existingPartition);
    // before testing success case, let's write a blob (size = 100) into local store and add a delete record for new blob
    Store localStore = storageManager.getStore(existingPartition);
    MockId id = new MockId(TestUtils.getRandomString(10), Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM));
    long crc = (new Random()).nextLong();
    long blobSize = 100;
    MessageInfo info = new MessageInfo(id, blobSize, false, false, Utils.Infinite_Time, crc, id.getAccountId(), id.getContainerId(), Utils.Infinite_Time);
    List<MessageInfo> infos = new ArrayList<>();
    List<ByteBuffer> buffers = new ArrayList<>();
    ByteBuffer buffer = ByteBuffer.wrap(TestUtils.getRandomBytes((int) blobSize));
    infos.add(info);
    buffers.add(buffer);
    localStore.put(new MockMessageWriteSet(infos, buffers));
    // delete the blob
    int deleteRecordSize = (int) (new DeleteMessageFormatInputStream(id, (short) 0, (short) 0, 0).getSize());
    MessageInfo deleteInfo = new MessageInfo(id, deleteRecordSize, id.getAccountId(), id.getContainerId(), time.milliseconds());
    localStore.delete(Collections.singletonList(deleteInfo));
    int sizeOfPutAndHeader = 100 + 18;
    int sizeOfWhole = sizeOfPutAndHeader + deleteRecordSize;
    // note that end offset of last PUT = 100 + 18 = 118, end offset of the store is sizeOfWhole
    // 3. test success case (create a new thread and trigger INACTIVE -> OFFLINE transition)
    ReplicaId localReplica = storageManager.getReplica(existingPartition.toPathString());
    // put a decommission-in-progress file into local store dir
    File decommissionFile = new File(localReplica.getReplicaPath(), "decommission_in_progress");
    assertTrue("Couldn't create decommission file in local store", decommissionFile.createNewFile());
    decommissionFile.deleteOnExit();
    assertNotSame("Before disconnection, the local store state shouldn't be OFFLINE", ReplicaState.OFFLINE, localStore.getCurrentState());
    mockHelixParticipant.registerPartitionStateChangeListener(StateModelListenerType.ReplicationManagerListener, replicationManager.replicationListener);
    CountDownLatch participantLatch = new CountDownLatch(1);
    replicationManager.listenerExecutionLatch = new CountDownLatch(1);
    Utils.newThread(() -> {
        mockHelixParticipant.onPartitionBecomeOfflineFromInactive(existingPartition.toPathString());
        participantLatch.countDown();
    }, false).start();
    assertTrue("Partition state change listener in ReplicationManager didn't get called within 1 sec", replicationManager.listenerExecutionLatch.await(1, TimeUnit.SECONDS));
    // the state of local store should be updated to OFFLINE
    assertEquals("Local store state is not expected", ReplicaState.OFFLINE, localStore.getCurrentState());
    // update replication lag between local and peer replicas
    List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(existingPartition).getRemoteReplicaInfos();
    ReplicaId peerReplica1 = remoteReplicaInfos.get(0).getReplicaId();
    ReplicaId peerReplica2 = remoteReplicaInfos.get(1).getReplicaId();
    // peer1 catches up with last PUT, peer2 catches up with end offset of local store. In this case, SyncUp is not complete
    replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica1.getDataNodeId().getHostname(), peerReplica1.getReplicaPath(), sizeOfPutAndHeader);
    replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica2.getDataNodeId().getHostname(), peerReplica2.getReplicaPath(), sizeOfWhole);
    assertFalse("Only one peer replica has fully caught up with end offset so sync-up should not complete", mockHelixParticipant.getReplicaSyncUpManager().isSyncUpComplete(localReplica));
    // make peer1 catch up with end offset
    replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica1.getDataNodeId().getHostname(), peerReplica1.getReplicaPath(), sizeOfWhole);
    // Now, sync-up should complete and transition should be able to proceed.
    assertTrue("Inactive-To-Offline transition didn't complete within 1 sec", participantLatch.await(1, TimeUnit.SECONDS));
    assertFalse("Local store should be stopped after transition", localStore.isStarted());
    storageManager.shutdown();
}
Also used : StorageManager(com.github.ambry.store.StorageManager) ArrayList(java.util.ArrayList) Store(com.github.ambry.store.Store) MockId(com.github.ambry.store.MockId) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) Random(java.util.Random) MetricRegistry(com.codahale.metrics.MetricRegistry) DeleteMessageFormatInputStream(com.github.ambry.messageformat.DeleteMessageFormatInputStream) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) CountDownLatch(java.util.concurrent.CountDownLatch) ByteBuffer(java.nio.ByteBuffer) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) MessageInfo(com.github.ambry.store.MessageInfo) MockMessageWriteSet(com.github.ambry.store.MockMessageWriteSet) File(java.io.File) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) Test(org.junit.Test)

Example 7 with MockHelixParticipant

use of com.github.ambry.clustermap.MockHelixParticipant in project ambry by linkedin.

the class ReplicationTest method replicaFromStandbyToLeaderTest.

/**
 * Test state transition in replication manager from STANDBY to LEADER
 * Test setup: When creating partitions, make sure that there is exactly one replica in LEADER STATE on each data center
 * Test condition: When a partition on current node moves from standby to leader, verify that in-memory map storing
 * partition to peer leader replicas is updated correctly
 * @throws Exception
 */
@Test
public void replicaFromStandbyToLeaderTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    MockHelixParticipant.metricRegistry = new MetricRegistry();
    MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
    ReplicationConfig initialReplicationConfig = replicationConfig;
    properties.setProperty("replication.model.across.datacenters", "LEADER_BASED");
    replicationConfig = new ReplicationConfig(new VerifiableProperties(properties));
    Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
    StorageManager storageManager = managers.getFirst();
    MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
    List<ReplicaId> replicaIds = clusterMap.getReplicaIds(replicationManager.dataNodeId);
    for (ReplicaId replicaId : replicaIds) {
        MockReplicaId mockReplicaId = (MockReplicaId) replicaId;
        if (mockReplicaId.getReplicaState() == ReplicaState.LEADER) {
            PartitionId existingPartition = mockReplicaId.getPartitionId();
            mockHelixParticipant.onPartitionBecomeLeaderFromStandby(existingPartition.toPathString());
            Set<ReplicaId> peerLeaderReplicasInReplicationManager = replicationManager.leaderBasedReplicationAdmin.getLeaderPartitionToPeerLeaderReplicas().get(existingPartition.toPathString());
            Set<ReplicaId> peerLeaderReplicasInClusterMap = new HashSet<>(existingPartition.getReplicaIdsByState(ReplicaState.LEADER, null));
            peerLeaderReplicasInClusterMap.remove(mockReplicaId);
            assertThat("Mismatch in list of leader peer replicas stored by partition in replication manager and cluster map", peerLeaderReplicasInReplicationManager, is(peerLeaderReplicasInClusterMap));
        }
    }
    storageManager.shutdown();
    replicationConfig = initialReplicationConfig;
}
Also used : ReplicationConfig(com.github.ambry.config.ReplicationConfig) VerifiableProperties(com.github.ambry.config.VerifiableProperties) MetricRegistry(com.codahale.metrics.MetricRegistry) StorageManager(com.github.ambry.store.StorageManager) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 8 with MockHelixParticipant

use of com.github.ambry.clustermap.MockHelixParticipant in project ambry by linkedin.

the class ReplicationTest method replicaFromOfflineToBootstrapTest.

/**
 * Test that state transition in replication manager from OFFLINE to BOOTSTRAP
 * @throws Exception
 */
@Test
public void replicaFromOfflineToBootstrapTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    MockHelixParticipant.metricRegistry = new MetricRegistry();
    MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
    DataNodeId currentNode = clusterMap.getDataNodeIds().get(0);
    Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
    StorageManager storageManager = managers.getFirst();
    MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
    assertTrue("State change listener in cluster participant should contain replication manager listener", mockHelixParticipant.getPartitionStateChangeListeners().containsKey(StateModelListenerType.ReplicationManagerListener));
    // 1. test partition not found case (should throw exception)
    try {
        mockHelixParticipant.onPartitionBecomeBootstrapFromOffline("-1");
        fail("should fail because replica is not found");
    } catch (StateTransitionException e) {
        assertEquals("Transition error doesn't match", ReplicaNotFound, e.getErrorCode());
    }
    // 2. create a new partition and test replica addition success case
    ReplicaId newReplicaToAdd = getNewReplicaToAdd(clusterMap);
    PartitionId newPartition = newReplicaToAdd.getPartitionId();
    assertTrue("Adding new replica to Storage Manager should succeed", storageManager.addBlobStore(newReplicaToAdd));
    assertFalse("partitionToPartitionInfo should not contain new partition", replicationManager.partitionToPartitionInfo.containsKey(newPartition));
    mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
    assertTrue("partitionToPartitionInfo should contain new partition", replicationManager.partitionToPartitionInfo.containsKey(newPartition));
    // 3. test replica addition failure case
    replicationManager.partitionToPartitionInfo.remove(newPartition);
    replicationManager.addReplicaReturnVal = false;
    try {
        mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
        fail("should fail due to replica addition failure");
    } catch (StateTransitionException e) {
        assertEquals("Transition error doesn't match", ReplicaOperationFailure, e.getErrorCode());
    }
    replicationManager.addReplicaReturnVal = null;
    // 4. test OFFLINE -> BOOTSTRAP on existing replica (should be no-op)
    ReplicaId existingReplica = clusterMap.getReplicaIds(currentNode).get(0);
    assertTrue("partitionToPartitionInfo should contain existing partition", replicationManager.partitionToPartitionInfo.containsKey(existingReplica.getPartitionId()));
    mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(existingReplica.getPartitionId().toPathString());
    storageManager.shutdown();
}
Also used : MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) MetricRegistry(com.codahale.metrics.MetricRegistry) StorageManager(com.github.ambry.store.StorageManager) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) Test(org.junit.Test)

Example 9 with MockHelixParticipant

use of com.github.ambry.clustermap.MockHelixParticipant in project ambry by linkedin.

the class ReplicationTest method replicaFromLeaderToStandbyTest.

/**
 * Test state transition in replication manager from LEADER to STANDBY
 * Test setup: When creating partitions, make sure that there is exactly one replica in LEADER STATE on each data center
 * Test condition: When a partition on the current node moves from leader to standby, verify that in-memory map storing
 * partition to peer leader replicas is updated correctly
 * @throws Exception
 */
@Test
public void replicaFromLeaderToStandbyTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    MockHelixParticipant.metricRegistry = new MetricRegistry();
    MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
    ReplicationConfig initialReplicationConfig = replicationConfig;
    properties.setProperty("replication.model.across.datacenters", "LEADER_BASED");
    replicationConfig = new ReplicationConfig(new VerifiableProperties(properties));
    Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
    StorageManager storageManager = managers.getFirst();
    MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
    PartitionId existingPartition = replicationManager.partitionToPartitionInfo.keySet().iterator().next();
    mockHelixParticipant.onPartitionBecomeLeaderFromStandby(existingPartition.toPathString());
    Map<String, Set<ReplicaId>> peerLeaderReplicasByPartition = replicationManager.leaderBasedReplicationAdmin.getLeaderPartitionToPeerLeaderReplicas();
    assertTrue("Partition is not present in the map of partition to peer leader replicas after it moved from standby to leader", peerLeaderReplicasByPartition.containsKey(existingPartition.toPathString()));
    mockHelixParticipant.onPartitionBecomeStandbyFromLeader(existingPartition.toPathString());
    assertFalse("Partition is still present in the map of partition to peer leader replicas after it moved from leader to standby", peerLeaderReplicasByPartition.containsKey(existingPartition.toPathString()));
    storageManager.shutdown();
    replicationConfig = initialReplicationConfig;
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) MockMessageWriteSet(com.github.ambry.store.MockMessageWriteSet) ReplicationConfig(com.github.ambry.config.ReplicationConfig) VerifiableProperties(com.github.ambry.config.VerifiableProperties) MetricRegistry(com.codahale.metrics.MetricRegistry) StorageManager(com.github.ambry.store.StorageManager) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) Test(org.junit.Test)

Example 10 with MockHelixParticipant

use of com.github.ambry.clustermap.MockHelixParticipant in project ambry by linkedin.

the class LeaderBasedReplicationTest method replicaThreadLeaderBasedReplicationStandByCrossColoFetchTest.

/**
 * Test leader based replication to verify cross colo gets for standby replicas after they have have timed out
 * waiting for missing keys.
 * @throws Exception
 */
@Test
public void replicaThreadLeaderBasedReplicationStandByCrossColoFetchTest() throws Exception {
    Map<DataNodeId, MockHost> hosts = new HashMap<>();
    hosts.put(remoteNodeInLocalDC, remoteHostInLocalDC);
    hosts.put(remoteNodeInRemoteDC, remoteHostInRemoteDC);
    int batchSize = 5;
    int numOfMessagesOnRemoteNodeInLocalDC = 3;
    int numOfMessagesOnRemoteNodeInRemoteDC = 10;
    ConnectionPool mockConnectionPool = new MockConnectionPool(hosts, clusterMap, batchSize);
    Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant, mockConnectionPool);
    StorageManager storageManager = managers.getFirst();
    MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
    // set mock local stores on all remoteReplicaInfos which will used during replication.
    for (PartitionId partitionId : replicationManager.partitionToPartitionInfo.keySet()) {
        localHost.addStore(partitionId, null);
        Store localStore = localHost.getStore(partitionId);
        localStore.start();
        List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(partitionId).getRemoteReplicaInfos();
        remoteReplicaInfos.forEach(remoteReplicaInfo -> remoteReplicaInfo.setLocalStore(localStore));
    }
    // get remote replicas and replica thread for remote host on local datacenter
    ReplicaThread intraColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInLocalDC);
    List<RemoteReplicaInfo> remoteReplicaInfosForLocalDC = intraColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInLocalDC);
    // get remote replicas and replica thread for remote host on remote datacenter
    ReplicaThread crossColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInRemoteDC);
    List<RemoteReplicaInfo> remoteReplicaInfosForRemoteDC = crossColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInRemoteDC);
    // mock helix transition state from standby to leader for local leader partitions
    List<? extends ReplicaId> replicaIds = clusterMap.getReplicaIds(replicationManager.dataNodeId);
    for (ReplicaId replicaId : replicaIds) {
        MockReplicaId mockReplicaId = (MockReplicaId) replicaId;
        if (mockReplicaId.getReplicaState() == ReplicaState.LEADER) {
            MockPartitionId mockPartitionId = (MockPartitionId) replicaId.getPartitionId();
            mockHelixParticipant.onPartitionBecomeLeaderFromStandby(mockPartitionId.toPathString());
        }
    }
    // Add put messages to all partitions on remoteHost1 and remoteHost2
    List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
    for (PartitionId partitionId : partitionIds) {
        // add 3 put messages to the remoteNodeInLocalDC and remoteNodeInRemoteDC from which local host will replicate.
        addPutMessagesToReplicasOfPartition(partitionId, Arrays.asList(remoteHostInLocalDC, remoteHostInRemoteDC), numOfMessagesOnRemoteNodeInLocalDC);
        // add 1 put message to the remoteNodeInRemoteDC only. Since this message is not present in remoteNodeInLocalDC, it
        // doesn't come to local node via intra-dc replication. We should see time out for remote standby replicas waiting for this
        // message and see a cross colo fetch happening.
        addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHostInRemoteDC), numOfMessagesOnRemoteNodeInRemoteDC - numOfMessagesOnRemoteNodeInLocalDC);
    }
    // Choose partitions that are leaders on both local and remote nodes
    Set<ReplicaId> leaderReplicasOnLocalAndRemoteNodes = getRemoteLeaderReplicasWithLeaderPartitionsOnLocalNode(clusterMap, replicationManager.dataNodeId, remoteNodeInRemoteDC);
    // replicate with remote node in remote DC
    crossColoReplicaThread.replicate();
    // missing messages are not fetched yet.
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        if (leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
            assertEquals("remote token mismatch for leader replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), batchSize - 1);
        } else {
            assertEquals("remote token should not move forward for standby replicas until missing keys are fetched", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), 0);
        }
    }
    // Replicate with remote node in local dc
    intraColoReplicaThread.replicate();
    // verify that remote token will be moved for all replicas as it is intra-dc replication
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForLocalDC) {
        assertEquals("mismatch in remote token set for intra colo replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), numOfMessagesOnRemoteNodeInLocalDC - 1);
    }
    // via intra-dc replication
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        crossColoReplicaThread.processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
    }
    // verify that the remote token will remain 0 for standby replicas as one message in its missing set is not fetched yet.
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        if (!leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
            assertTrue("missing store messages should still exist for standby replicas", crossColoReplicaThread.containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo));
            assertEquals("remote token should not move forward for standby replicas until missing keys are fetched", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), 0);
            assertEquals("incorrect number of missing store messages found for standby replicas", remoteReplicaInfo.getExchangeMetadataResponse().missingStoreMessages.size(), batchSize - numOfMessagesOnRemoteNodeInLocalDC);
        }
    }
    // Attempt replication with remoteNodeInRemoteDC, we should not see any replication attempt for standby replicas
    // and their remote token stays as 0.
    crossColoReplicaThread.replicate();
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        if (!leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
            assertEquals("remote token should not move forward for standby replicas until missing keys are fetched", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), 0);
            assertTrue("missing store messages should still exist for standby replicas", crossColoReplicaThread.containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo));
        }
    }
    // Move time forward by replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds+1 seconds and attempt replication.
    // We should see cross colo fetch for standby replicas now since missing keys haven't arrived for
    // replicationConfig.replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds.
    time.sleep((replicationConfig.replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds + 1) * 1000);
    // verify that we get the list of standby replicas that timed out on no progress
    Set<RemoteReplicaInfo> allStandbyReplicas = remoteReplicaInfosForRemoteDC.stream().filter(info -> !leaderReplicasOnLocalAndRemoteNodes.contains(info.getReplicaId())).collect(Collectors.toSet());
    assertEquals("mismatch in list of standby replicas timed out on no progress", new HashSet<>(crossColoReplicaThread.getRemoteStandbyReplicasTimedOutOnNoProgress(remoteReplicaInfosForRemoteDC)), allStandbyReplicas);
    crossColoReplicaThread.replicate();
    // token index for all standby replicas will move forward after fetching missing keys themselves
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        if (!leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
            assertEquals("mismatch in remote token set for standby cross colo replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), batchSize - 1);
            assertFalse("missing store messages should be empty for standby replicas now", crossColoReplicaThread.containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo));
        }
    }
    // verify replication metrics to track number of cross colo get requests for standby replicas. If all replicas are
    // leaders, we should have 0 cross colo get requests.
    String remoteDataCenter = remoteReplicaInfosForRemoteDC.get(0).getReplicaId().getDataNodeId().getDatacenterName();
    assertEquals("mismatch in number of cross colo get requests tracked for standby replicas", crossColoReplicaThread.getReplicationMetrics().interColoReplicationGetRequestCountForStandbyReplicas.get(remoteDataCenter).getCount(), leaderReplicasOnLocalAndRemoteNodes.size() != remoteReplicaInfosForRemoteDC.size() ? 1 : 0);
    storageManager.shutdown();
}
Also used : ConnectionPool(com.github.ambry.network.ConnectionPool) CoreMatchers(org.hamcrest.CoreMatchers) Arrays(java.util.Arrays) StorageManager(com.github.ambry.store.StorageManager) ClusterMapChangeListener(com.github.ambry.clustermap.ClusterMapChangeListener) DataNodeId(com.github.ambry.clustermap.DataNodeId) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) Map(java.util.Map) Parameterized(org.junit.runners.Parameterized) ReplicationConfig(com.github.ambry.config.ReplicationConfig) ReplicaState(com.github.ambry.clustermap.ReplicaState) MetricRegistry(com.codahale.metrics.MetricRegistry) Pair(com.github.ambry.utils.Pair) VerifiableProperties(com.github.ambry.config.VerifiableProperties) ConnectionPool(com.github.ambry.network.ConnectionPool) Set(java.util.Set) IOException(java.io.IOException) Test(org.junit.Test) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) Collectors(java.util.stream.Collectors) Store(com.github.ambry.store.Store) StoreKey(com.github.ambry.store.StoreKey) List(java.util.List) ReplicaId(com.github.ambry.clustermap.ReplicaId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) Assert(org.junit.Assert) Collections(java.util.Collections) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) PartitionId(com.github.ambry.clustermap.PartitionId) HashMap(java.util.HashMap) StorageManager(com.github.ambry.store.StorageManager) Store(com.github.ambry.store.Store) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) DataNodeId(com.github.ambry.clustermap.DataNodeId) Test(org.junit.Test)

Aggregations

MetricRegistry (com.codahale.metrics.MetricRegistry)11 MockHelixParticipant (com.github.ambry.clustermap.MockHelixParticipant)11 ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)11 MockClusterMap (com.github.ambry.clustermap.MockClusterMap)10 Test (org.junit.Test)10 StorageManager (com.github.ambry.store.StorageManager)9 ReplicaId (com.github.ambry.clustermap.ReplicaId)8 MockPartitionId (com.github.ambry.clustermap.MockPartitionId)7 MockReplicaId (com.github.ambry.clustermap.MockReplicaId)7 PartitionId (com.github.ambry.clustermap.PartitionId)7 StateTransitionException (com.github.ambry.clustermap.StateTransitionException)6 ReplicationConfig (com.github.ambry.config.ReplicationConfig)5 VerifiableProperties (com.github.ambry.config.VerifiableProperties)5 DataNodeId (com.github.ambry.clustermap.DataNodeId)4 ArrayList (java.util.ArrayList)4 CountDownLatch (java.util.concurrent.CountDownLatch)4 MockMessageWriteSet (com.github.ambry.store.MockMessageWriteSet)3 Store (com.github.ambry.store.Store)3 File (java.io.File)3 HashSet (java.util.HashSet)3