Search in sources :

Example 16 with MockReplicaId

use of com.github.ambry.clustermap.MockReplicaId in project ambry by linkedin.

the class CloudStorageManagerTest method getStoreTest.

/**
 * Test {@code CloudStorageManager#getStore}
 * @throws IOException
 */
@Test
public void getStoreTest() throws IOException {
    CloudStorageManager cloudStorageManager = createNewCloudStorageManager();
    ReplicaId mockReplicaId = clusterMap.getReplicaIds(clusterMap.getDataNodeIds().get(0)).get(0);
    PartitionId partitionId = mockReplicaId.getPartitionId();
    // try get for a partition that doesn't exist
    Store store = cloudStorageManager.getStore(partitionId);
    Assert.assertNotNull(store);
    Assert.assertTrue(store.isStarted());
    // add and start a replica to the store
    Assert.assertTrue(cloudStorageManager.addBlobStore(mockReplicaId));
    // try get for an added replica
    store = cloudStorageManager.getStore(partitionId);
    Assert.assertNotNull(store);
    Assert.assertTrue(store.isStarted());
    // try get for a removed replica
    Assert.assertTrue(cloudStorageManager.removeBlobStore(partitionId));
    store = cloudStorageManager.getStore(partitionId);
    Assert.assertNotNull(store);
    Assert.assertTrue(store.isStarted());
}
Also used : Store(com.github.ambry.store.Store) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) Test(org.junit.Test)

Example 17 with MockReplicaId

use of com.github.ambry.clustermap.MockReplicaId in project ambry by linkedin.

the class ReplicationTest method replicaTokenTest.

/**
 * Tests that replica tokens are set correctly and go through different stages correctly.
 * @throws InterruptedException
 */
@Test
public void replicaTokenTest() throws InterruptedException {
    final long tokenPersistInterval = 100;
    Time time = new MockTime();
    MockFindToken token1 = new MockFindToken(0, 0);
    RemoteReplicaInfo remoteReplicaInfo = new RemoteReplicaInfo(new MockReplicaId(ReplicaType.DISK_BACKED), new MockReplicaId(ReplicaType.DISK_BACKED), new InMemoryStore(null, Collections.emptyList(), Collections.emptyList(), null), token1, tokenPersistInterval, time, new Port(5000, PortType.PLAINTEXT));
    // The equality check is for the reference, which is fine.
    // Initially, the current token and the token to persist are the same.
    assertEquals(token1, remoteReplicaInfo.getToken());
    assertEquals(token1, remoteReplicaInfo.getTokenToPersist());
    MockFindToken token2 = new MockFindToken(100, 100);
    remoteReplicaInfo.initializeTokens(token2);
    // Both tokens should be the newly initialized token.
    assertEquals(token2, remoteReplicaInfo.getToken());
    assertEquals(token2, remoteReplicaInfo.getTokenToPersist());
    remoteReplicaInfo.onTokenPersisted();
    MockFindToken token3 = new MockFindToken(200, 200);
    remoteReplicaInfo.setToken(token3);
    // Token to persist should still be the old token.
    assertEquals(token3, remoteReplicaInfo.getToken());
    assertEquals(token2, remoteReplicaInfo.getTokenToPersist());
    remoteReplicaInfo.onTokenPersisted();
    // Sleep for shorter than token persist interval.
    time.sleep(tokenPersistInterval - 1);
    // Token to persist should still be the old token.
    assertEquals(token3, remoteReplicaInfo.getToken());
    assertEquals(token2, remoteReplicaInfo.getTokenToPersist());
    remoteReplicaInfo.onTokenPersisted();
    MockFindToken token4 = new MockFindToken(200, 200);
    remoteReplicaInfo.setToken(token4);
    time.sleep(2);
    // Token to persist should be the most recent token as of currentTime - tokenToPersistInterval
    // which is token3 at this time.
    assertEquals(token4, remoteReplicaInfo.getToken());
    assertEquals(token3, remoteReplicaInfo.getTokenToPersist());
    remoteReplicaInfo.onTokenPersisted();
    time.sleep(tokenPersistInterval + 1);
    // The most recently set token as of currentTime - tokenToPersistInterval is token4
    assertEquals(token4, remoteReplicaInfo.getToken());
    assertEquals(token4, remoteReplicaInfo.getTokenToPersist());
    remoteReplicaInfo.onTokenPersisted();
}
Also used : Port(com.github.ambry.network.Port) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) MockTime(com.github.ambry.utils.MockTime) Time(com.github.ambry.utils.Time) MockTime(com.github.ambry.utils.MockTime) Test(org.junit.Test)

Example 18 with MockReplicaId

use of com.github.ambry.clustermap.MockReplicaId in project ambry by linkedin.

the class ReplicationTest method onReplicaAddedOrRemovedCallbackTest.

/**
 * Test cluster map change callback in {@link ReplicationManager} when any remote replicas are added or removed.
 * Test setup: attempt to add 3 replicas and remove 3 replicas respectively. The three replicas are picked as follows:
 *   (1) 1st replica on current node (should skip)
 *   (2) 2nd replica on remote node sharing partition with current one (should be added or removed)
 *   (3) 3rd replica on remote node but doesn't share partition with current one (should skip)
 * @throws Exception
 */
@Test
public void onReplicaAddedOrRemovedCallbackTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    StoreConfig storeConfig = new StoreConfig(verifiableProperties);
    // pick a node with no special partition as current node
    Set<DataNodeId> specialPartitionNodes = clusterMap.getSpecialPartition().getReplicaIds().stream().map(ReplicaId::getDataNodeId).collect(Collectors.toSet());
    DataNodeId currentNode = clusterMap.getDataNodes().stream().filter(d -> !specialPartitionNodes.contains(d)).findFirst().get();
    MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
    storeKeyConverterFactory.setConversionMap(new HashMap<>());
    StorageManager storageManager = new StorageManager(storeConfig, new DiskManagerConfig(verifiableProperties), Utils.newScheduler(1, true), new MetricRegistry(), null, clusterMap, currentNode, null, null, new MockTime(), null, new InMemAccountService(false, false));
    storageManager.start();
    MockReplicationManager replicationManager = new MockReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, clusterMap, currentNode, storeKeyConverterFactory, null);
    ClusterMapChangeListener clusterMapChangeListener = clusterMap.getClusterMapChangeListener();
    // find the special partition (not on current node) and get an irrelevant replica from it
    PartitionId absentPartition = clusterMap.getSpecialPartition();
    ReplicaId irrelevantReplica = absentPartition.getReplicaIds().get(0);
    // find an existing replica on current node and one of its peer replicas on remote node
    ReplicaId existingReplica = clusterMap.getReplicaIds(currentNode).get(0);
    ReplicaId peerReplicaToRemove = existingReplica.getPartitionId().getReplicaIds().stream().filter(r -> r != existingReplica).findFirst().get();
    // create a new node and place a peer of existing replica on it.
    MockDataNodeId remoteNode = createDataNode(getListOfPorts(PLAIN_TEXT_PORT_START_NUMBER + 10, SSL_PORT_START_NUMBER + 10, HTTP2_PORT_START_NUMBER + 10), clusterMap.getDatacenterName((byte) 0), 3);
    ReplicaId addedReplica = new MockReplicaId(remoteNode.getPort(), (MockPartitionId) existingReplica.getPartitionId(), remoteNode, 0);
    // populate added replica and removed replica lists
    List<ReplicaId> replicasToAdd = new ArrayList<>(Arrays.asList(existingReplica, addedReplica, irrelevantReplica));
    List<ReplicaId> replicasToRemove = new ArrayList<>(Arrays.asList(existingReplica, peerReplicaToRemove, irrelevantReplica));
    PartitionInfo partitionInfo = replicationManager.getPartitionToPartitionInfoMap().get(existingReplica.getPartitionId());
    assertNotNull("PartitionInfo is not found", partitionInfo);
    RemoteReplicaInfo peerReplicaInfo = partitionInfo.getRemoteReplicaInfos().stream().filter(info -> info.getReplicaId() == peerReplicaToRemove).findFirst().get();
    // get the replica-thread for this peer replica
    ReplicaThread peerReplicaThread = peerReplicaInfo.getReplicaThread();
    // Test Case 1: replication manager encountered exception during startup (remote replica addition/removal will be skipped)
    replicationManager.startWithException();
    clusterMapChangeListener.onReplicaAddedOrRemoved(replicasToAdd, replicasToRemove);
    // verify that PartitionInfo stays unchanged
    verifyRemoteReplicaInfo(partitionInfo, addedReplica, false);
    verifyRemoteReplicaInfo(partitionInfo, peerReplicaToRemove, true);
    // Test Case 2: startup latch is interrupted
    CountDownLatch initialLatch = replicationManager.startupLatch;
    CountDownLatch mockLatch = Mockito.mock(CountDownLatch.class);
    doThrow(new InterruptedException()).when(mockLatch).await();
    replicationManager.startupLatch = mockLatch;
    try {
        clusterMapChangeListener.onReplicaAddedOrRemoved(replicasToAdd, replicasToRemove);
        fail("should fail because startup latch is interrupted");
    } catch (IllegalStateException e) {
    // expected
    }
    replicationManager.startupLatch = initialLatch;
    // Test Case 3: replication manager is successfully started
    replicationManager.start();
    clusterMapChangeListener.onReplicaAddedOrRemoved(replicasToAdd, replicasToRemove);
    // verify that PartitionInfo has latest remote replica infos
    verifyRemoteReplicaInfo(partitionInfo, addedReplica, true);
    verifyRemoteReplicaInfo(partitionInfo, peerReplicaToRemove, false);
    verifyRemoteReplicaInfo(partitionInfo, irrelevantReplica, false);
    // verify new added replica is assigned to a certain thread
    ReplicaThread replicaThread = replicationManager.getDataNodeIdToReplicaThreadMap().get(addedReplica.getDataNodeId());
    assertNotNull("There is no ReplicaThread assocated with new replica", replicaThread);
    Optional<RemoteReplicaInfo> findResult = replicaThread.getRemoteReplicaInfos().get(remoteNode).stream().filter(info -> info.getReplicaId() == addedReplica).findAny();
    assertTrue("New added remote replica info should exist in corresponding thread", findResult.isPresent());
    // verify the removed replica info's thread is null
    assertNull("Thread in removed replica info should be null", peerReplicaInfo.getReplicaThread());
    findResult = peerReplicaThread.getRemoteReplicaInfos().get(peerReplicaToRemove.getDataNodeId()).stream().filter(info -> info.getReplicaId() == peerReplicaToRemove).findAny();
    assertFalse("Previous replica thread should not contain RemoteReplicaInfo that is already removed", findResult.isPresent());
    storageManager.shutdown();
}
Also used : DiskManagerConfig(com.github.ambry.config.DiskManagerConfig) CoreMatchers(org.hamcrest.CoreMatchers) Arrays(java.util.Arrays) StorageManager(com.github.ambry.store.StorageManager) StoreKeyConverter(com.github.ambry.store.StoreKeyConverter) DataNodeId(com.github.ambry.clustermap.DataNodeId) Random(java.util.Random) ByteBuffer(java.nio.ByteBuffer) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) PortType(com.github.ambry.network.PortType) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TestUtils(com.github.ambry.utils.TestUtils) Map(java.util.Map) DeleteMessageFormatInputStream(com.github.ambry.messageformat.DeleteMessageFormatInputStream) Parameterized(org.junit.runners.Parameterized) ReplicationConfig(com.github.ambry.config.ReplicationConfig) Container(com.github.ambry.account.Container) DiskManagerConfig(com.github.ambry.config.DiskManagerConfig) Predicate(java.util.function.Predicate) ValidatingTransformer(com.github.ambry.messageformat.ValidatingTransformer) Collection(java.util.Collection) StoreKeyFactory(com.github.ambry.store.StoreKeyFactory) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) Collectors(java.util.stream.Collectors) ConnectedChannel(com.github.ambry.network.ConnectedChannel) CountDownLatch(java.util.concurrent.CountDownLatch) StoreKey(com.github.ambry.store.StoreKey) List(java.util.List) ReplicaMetadataResponse(com.github.ambry.protocol.ReplicaMetadataResponse) PartitionStateChangeListener(com.github.ambry.clustermap.PartitionStateChangeListener) MockTime(com.github.ambry.utils.MockTime) Account(com.github.ambry.account.Account) Optional(java.util.Optional) TransitionErrorCode(com.github.ambry.clustermap.StateTransitionException.TransitionErrorCode) MockId(com.github.ambry.store.MockId) InMemAccountService(com.github.ambry.account.InMemAccountService) AmbryReplicaSyncUpManager(com.github.ambry.clustermap.AmbryReplicaSyncUpManager) PartitionId(com.github.ambry.clustermap.PartitionId) BlobId(com.github.ambry.commons.BlobId) ResponseHandler(com.github.ambry.commons.ResponseHandler) ClusterMapChangeListener(com.github.ambry.clustermap.ClusterMapChangeListener) RunWith(org.junit.runner.RunWith) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Transformer(com.github.ambry.store.Transformer) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) CommonTestUtils(com.github.ambry.commons.CommonTestUtils) ReplicaMetadataResponseInfo(com.github.ambry.protocol.ReplicaMetadataResponseInfo) MockStoreKeyConverterFactory(com.github.ambry.store.MockStoreKeyConverterFactory) Time(com.github.ambry.utils.Time) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MockMessageWriteSet(com.github.ambry.store.MockMessageWriteSet) ReplicaState(com.github.ambry.clustermap.ReplicaState) StateModelListenerType(com.github.ambry.clustermap.StateModelListenerType) StoreConfig(com.github.ambry.config.StoreConfig) MetricRegistry(com.codahale.metrics.MetricRegistry) Properties(java.util.Properties) Pair(com.github.ambry.utils.Pair) Iterator(java.util.Iterator) ReplicaType(com.github.ambry.clustermap.ReplicaType) VerifiableProperties(com.github.ambry.config.VerifiableProperties) ClusterMap(com.github.ambry.clustermap.ClusterMap) Test(org.junit.Test) BlobIdFactory(com.github.ambry.commons.BlobIdFactory) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Store(com.github.ambry.store.Store) Mockito(org.mockito.Mockito) MessageInfo(com.github.ambry.store.MessageInfo) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) ReplicaId(com.github.ambry.clustermap.ReplicaId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) Port(com.github.ambry.network.Port) Comparator(java.util.Comparator) Assert(org.junit.Assert) Collections(java.util.Collections) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) ClusterMapChangeListener(com.github.ambry.clustermap.ClusterMapChangeListener) StorageManager(com.github.ambry.store.StorageManager) ArrayList(java.util.ArrayList) InMemAccountService(com.github.ambry.account.InMemAccountService) MockTime(com.github.ambry.utils.MockTime) MockStoreKeyConverterFactory(com.github.ambry.store.MockStoreKeyConverterFactory) MetricRegistry(com.codahale.metrics.MetricRegistry) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) CountDownLatch(java.util.concurrent.CountDownLatch) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) StoreConfig(com.github.ambry.config.StoreConfig) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) Test(org.junit.Test)

Example 19 with MockReplicaId

use of com.github.ambry.clustermap.MockReplicaId in project ambry by linkedin.

the class LeaderBasedReplicationTest method replicaThreadLeaderBasedReplicationStandByCrossColoFetchTest.

/**
 * Test leader based replication to verify cross colo gets for standby replicas after they have have timed out
 * waiting for missing keys.
 * @throws Exception
 */
@Test
public void replicaThreadLeaderBasedReplicationStandByCrossColoFetchTest() throws Exception {
    Map<DataNodeId, MockHost> hosts = new HashMap<>();
    hosts.put(remoteNodeInLocalDC, remoteHostInLocalDC);
    hosts.put(remoteNodeInRemoteDC, remoteHostInRemoteDC);
    int batchSize = 5;
    int numOfMessagesOnRemoteNodeInLocalDC = 3;
    int numOfMessagesOnRemoteNodeInRemoteDC = 10;
    ConnectionPool mockConnectionPool = new MockConnectionPool(hosts, clusterMap, batchSize);
    Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant, mockConnectionPool);
    StorageManager storageManager = managers.getFirst();
    MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
    // set mock local stores on all remoteReplicaInfos which will used during replication.
    for (PartitionId partitionId : replicationManager.partitionToPartitionInfo.keySet()) {
        localHost.addStore(partitionId, null);
        Store localStore = localHost.getStore(partitionId);
        localStore.start();
        List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(partitionId).getRemoteReplicaInfos();
        remoteReplicaInfos.forEach(remoteReplicaInfo -> remoteReplicaInfo.setLocalStore(localStore));
    }
    // get remote replicas and replica thread for remote host on local datacenter
    ReplicaThread intraColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInLocalDC);
    List<RemoteReplicaInfo> remoteReplicaInfosForLocalDC = intraColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInLocalDC);
    // get remote replicas and replica thread for remote host on remote datacenter
    ReplicaThread crossColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInRemoteDC);
    List<RemoteReplicaInfo> remoteReplicaInfosForRemoteDC = crossColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInRemoteDC);
    // mock helix transition state from standby to leader for local leader partitions
    List<? extends ReplicaId> replicaIds = clusterMap.getReplicaIds(replicationManager.dataNodeId);
    for (ReplicaId replicaId : replicaIds) {
        MockReplicaId mockReplicaId = (MockReplicaId) replicaId;
        if (mockReplicaId.getReplicaState() == ReplicaState.LEADER) {
            MockPartitionId mockPartitionId = (MockPartitionId) replicaId.getPartitionId();
            mockHelixParticipant.onPartitionBecomeLeaderFromStandby(mockPartitionId.toPathString());
        }
    }
    // Add put messages to all partitions on remoteHost1 and remoteHost2
    List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
    for (PartitionId partitionId : partitionIds) {
        // add 3 put messages to the remoteNodeInLocalDC and remoteNodeInRemoteDC from which local host will replicate.
        addPutMessagesToReplicasOfPartition(partitionId, Arrays.asList(remoteHostInLocalDC, remoteHostInRemoteDC), numOfMessagesOnRemoteNodeInLocalDC);
        // add 1 put message to the remoteNodeInRemoteDC only. Since this message is not present in remoteNodeInLocalDC, it
        // doesn't come to local node via intra-dc replication. We should see time out for remote standby replicas waiting for this
        // message and see a cross colo fetch happening.
        addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHostInRemoteDC), numOfMessagesOnRemoteNodeInRemoteDC - numOfMessagesOnRemoteNodeInLocalDC);
    }
    // Choose partitions that are leaders on both local and remote nodes
    Set<ReplicaId> leaderReplicasOnLocalAndRemoteNodes = getRemoteLeaderReplicasWithLeaderPartitionsOnLocalNode(clusterMap, replicationManager.dataNodeId, remoteNodeInRemoteDC);
    // replicate with remote node in remote DC
    crossColoReplicaThread.replicate();
    // missing messages are not fetched yet.
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        if (leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
            assertEquals("remote token mismatch for leader replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), batchSize - 1);
        } else {
            assertEquals("remote token should not move forward for standby replicas until missing keys are fetched", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), 0);
        }
    }
    // Replicate with remote node in local dc
    intraColoReplicaThread.replicate();
    // verify that remote token will be moved for all replicas as it is intra-dc replication
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForLocalDC) {
        assertEquals("mismatch in remote token set for intra colo replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), numOfMessagesOnRemoteNodeInLocalDC - 1);
    }
    // via intra-dc replication
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        crossColoReplicaThread.processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
    }
    // verify that the remote token will remain 0 for standby replicas as one message in its missing set is not fetched yet.
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        if (!leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
            assertTrue("missing store messages should still exist for standby replicas", crossColoReplicaThread.containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo));
            assertEquals("remote token should not move forward for standby replicas until missing keys are fetched", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), 0);
            assertEquals("incorrect number of missing store messages found for standby replicas", remoteReplicaInfo.getExchangeMetadataResponse().missingStoreMessages.size(), batchSize - numOfMessagesOnRemoteNodeInLocalDC);
        }
    }
    // Attempt replication with remoteNodeInRemoteDC, we should not see any replication attempt for standby replicas
    // and their remote token stays as 0.
    crossColoReplicaThread.replicate();
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        if (!leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
            assertEquals("remote token should not move forward for standby replicas until missing keys are fetched", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), 0);
            assertTrue("missing store messages should still exist for standby replicas", crossColoReplicaThread.containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo));
        }
    }
    // Move time forward by replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds+1 seconds and attempt replication.
    // We should see cross colo fetch for standby replicas now since missing keys haven't arrived for
    // replicationConfig.replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds.
    time.sleep((replicationConfig.replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds + 1) * 1000);
    // verify that we get the list of standby replicas that timed out on no progress
    Set<RemoteReplicaInfo> allStandbyReplicas = remoteReplicaInfosForRemoteDC.stream().filter(info -> !leaderReplicasOnLocalAndRemoteNodes.contains(info.getReplicaId())).collect(Collectors.toSet());
    assertEquals("mismatch in list of standby replicas timed out on no progress", new HashSet<>(crossColoReplicaThread.getRemoteStandbyReplicasTimedOutOnNoProgress(remoteReplicaInfosForRemoteDC)), allStandbyReplicas);
    crossColoReplicaThread.replicate();
    // token index for all standby replicas will move forward after fetching missing keys themselves
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        if (!leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
            assertEquals("mismatch in remote token set for standby cross colo replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), batchSize - 1);
            assertFalse("missing store messages should be empty for standby replicas now", crossColoReplicaThread.containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo));
        }
    }
    // verify replication metrics to track number of cross colo get requests for standby replicas. If all replicas are
    // leaders, we should have 0 cross colo get requests.
    String remoteDataCenter = remoteReplicaInfosForRemoteDC.get(0).getReplicaId().getDataNodeId().getDatacenterName();
    assertEquals("mismatch in number of cross colo get requests tracked for standby replicas", crossColoReplicaThread.getReplicationMetrics().interColoReplicationGetRequestCountForStandbyReplicas.get(remoteDataCenter).getCount(), leaderReplicasOnLocalAndRemoteNodes.size() != remoteReplicaInfosForRemoteDC.size() ? 1 : 0);
    storageManager.shutdown();
}
Also used : ConnectionPool(com.github.ambry.network.ConnectionPool) CoreMatchers(org.hamcrest.CoreMatchers) Arrays(java.util.Arrays) StorageManager(com.github.ambry.store.StorageManager) ClusterMapChangeListener(com.github.ambry.clustermap.ClusterMapChangeListener) DataNodeId(com.github.ambry.clustermap.DataNodeId) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) Map(java.util.Map) Parameterized(org.junit.runners.Parameterized) ReplicationConfig(com.github.ambry.config.ReplicationConfig) ReplicaState(com.github.ambry.clustermap.ReplicaState) MetricRegistry(com.codahale.metrics.MetricRegistry) Pair(com.github.ambry.utils.Pair) VerifiableProperties(com.github.ambry.config.VerifiableProperties) ConnectionPool(com.github.ambry.network.ConnectionPool) Set(java.util.Set) IOException(java.io.IOException) Test(org.junit.Test) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) Collectors(java.util.stream.Collectors) Store(com.github.ambry.store.Store) StoreKey(com.github.ambry.store.StoreKey) List(java.util.List) ReplicaId(com.github.ambry.clustermap.ReplicaId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) Assert(org.junit.Assert) Collections(java.util.Collections) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) PartitionId(com.github.ambry.clustermap.PartitionId) HashMap(java.util.HashMap) StorageManager(com.github.ambry.store.StorageManager) Store(com.github.ambry.store.Store) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) DataNodeId(com.github.ambry.clustermap.DataNodeId) Test(org.junit.Test)

Example 20 with MockReplicaId

use of com.github.ambry.clustermap.MockReplicaId in project ambry by linkedin.

the class LeaderBasedReplicationTest method replicaThreadLeaderBasedReplicationFetchMissingKeysInMultipleCyclesTest.

/**
 * Test leader based replication to ensure token is advanced correctly for standby replicas when missing messages
 * are fetched via intra-dc replication in multiple cycles.
 * @throws Exception
 */
@Test
public void replicaThreadLeaderBasedReplicationFetchMissingKeysInMultipleCyclesTest() throws Exception {
    Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
    StorageManager storageManager = managers.getFirst();
    MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
    // set mock local stores on all remoteReplicaInfos which will used during replication.
    for (PartitionId partitionId : replicationManager.partitionToPartitionInfo.keySet()) {
        localHost.addStore(partitionId, null);
        Store localStore = localHost.getStore(partitionId);
        localStore.start();
        List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(partitionId).getRemoteReplicaInfos();
        remoteReplicaInfos.forEach(remoteReplicaInfo -> remoteReplicaInfo.setLocalStore(localStore));
    }
    // get remote replicas and replica thread for remote host on local datacenter
    ReplicaThread intraColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInLocalDC);
    List<RemoteReplicaInfo> remoteReplicaInfosForLocalDC = intraColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInLocalDC);
    // get remote replicas and replica thread for remote host on remote datacenter
    ReplicaThread crossColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInRemoteDC);
    List<RemoteReplicaInfo> remoteReplicaInfosForRemoteDC = crossColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInRemoteDC);
    // mock helix transition state from standby to leader for local leader partitions
    List<? extends ReplicaId> replicaIds = clusterMap.getReplicaIds(replicationManager.dataNodeId);
    for (ReplicaId replicaId : replicaIds) {
        MockReplicaId mockReplicaId = (MockReplicaId) replicaId;
        if (mockReplicaId.getReplicaState() == ReplicaState.LEADER) {
            MockPartitionId mockPartitionId = (MockPartitionId) replicaId.getPartitionId();
            mockHelixParticipant.onPartitionBecomeLeaderFromStandby(mockPartitionId.toPathString());
        }
    }
    List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
    Map<PartitionId, List<StoreKey>> idsToBeIgnoredByPartition = new HashMap<>();
    Map<PartitionId, List<StoreKey>> idsToBeTtlUpdatedByPartition = new HashMap<>();
    int numOfMessages = 5;
    for (PartitionId id : partitionIds) {
        List<StoreKey> toBeTtlUpdated = new ArrayList<>();
        // Adding 5 put messages to remote hosts
        List<StoreKey> ids = addPutMessagesToReplicasOfPartition(id, Arrays.asList(remoteHostInLocalDC, remoteHostInRemoteDC), numOfMessages);
        // update the TTL of first put message
        StoreKey toTtlUpdateId = ids.get(0);
        addTtlUpdateMessagesToReplicasOfPartition(id, toTtlUpdateId, Arrays.asList(remoteHostInLocalDC, remoteHostInRemoteDC), UPDATED_EXPIRY_TIME_MS);
        toBeTtlUpdated.add(toTtlUpdateId);
        idsToBeTtlUpdatedByPartition.put(id, toBeTtlUpdated);
    }
    // Choose partitions that are leaders on both local and remote nodes
    Set<ReplicaId> remoteLeaderReplicasWithLeaderPartitionsOnLocalNode = getRemoteLeaderReplicasWithLeaderPartitionsOnLocalNode(clusterMap, replicationManager.dataNodeId, remoteNodeInRemoteDC);
    // fetch all messages in the metadata exchange from remote replica on remote datacenter by setting batchSize
    // in the connection to 6
    // There are 6 records in the remote host. 5 put records + 1 ttl update record. We will receive 5 messages in the
    // metadata exchange as ttl update will be merged with its put record and sent as single record.
    List<ReplicaThread.ExchangeMetadataResponse> responseForRemoteNodeInRemoteDC = crossColoReplicaThread.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHostInRemoteDC, numOfMessages), remoteReplicaInfosForRemoteDC);
    assertEquals("Response should contain a response for each replica", remoteReplicaInfosForRemoteDC.size(), responseForRemoteNodeInRemoteDC.size());
    for (ReplicaThread.ExchangeMetadataResponse exchangeMetadataResponse : responseForRemoteNodeInRemoteDC) {
        // we should have received 5 messages bo, b1, b2, b3, b4 (ttl_update for b0 will be merged with b0 put message)
        assertEquals("mismatch in number of messages received from remote replica", numOfMessages, exchangeMetadataResponse.missingStoreMessages.size());
    }
    // Filter leader replicas to fetch missing keys
    List<RemoteReplicaInfo> leaderReplicas = new ArrayList<>();
    List<ReplicaThread.ExchangeMetadataResponse> exchangeMetadataResponseListForLeaderReplicas = new ArrayList<>();
    crossColoReplicaThread.getLeaderReplicaList(remoteReplicaInfosForRemoteDC, responseForRemoteNodeInRemoteDC, leaderReplicas, exchangeMetadataResponseListForLeaderReplicas);
    // verify that only leader replicas in remoteHost2 are chosen for fetching missing messages.
    Set<ReplicaId> remoteReplicasToFetchInReplicaThread = leaderReplicas.stream().map(RemoteReplicaInfo::getReplicaId).collect(Collectors.toSet());
    assertThat("mismatch in leader remote replicas to fetch missing keys", remoteLeaderReplicasWithLeaderPartitionsOnLocalNode, is(remoteReplicasToFetchInReplicaThread));
    // fetch missing keys for leader replicas from remoteHost2
    if (leaderReplicas.size() > 0) {
        crossColoReplicaThread.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHostInRemoteDC, numOfMessages), leaderReplicas, exchangeMetadataResponseListForLeaderReplicas, false);
    }
    // missing messages are not fetched yet.
    for (int i = 0; i < remoteReplicaInfosForRemoteDC.size(); i++) {
        if (remoteLeaderReplicasWithLeaderPartitionsOnLocalNode.contains(remoteReplicaInfosForRemoteDC.get(i).getReplicaId())) {
            assertThat("remote Token should be updated for leader replica", remoteReplicaInfosForRemoteDC.get(i).getToken(), is(responseForRemoteNodeInRemoteDC.get(i).remoteToken));
        } else {
            assertThat("remote Token should not be updated for standby replica", remoteReplicaInfosForRemoteDC.get(i).getToken(), not(responseForRemoteNodeInRemoteDC.get(i).remoteToken));
        }
    }
    int numOfMessagesToBeFetchedFromRemoteHost1 = numOfMessages - 1;
    // Fetch only first 4 messages from remote host in local datacenter
    List<ReplicaThread.ExchangeMetadataResponse> responseForRemoteNodeInLocalDC = intraColoReplicaThread.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHostInLocalDC, numOfMessagesToBeFetchedFromRemoteHost1), remoteReplicaInfosForLocalDC);
    assertEquals("Response should contain a response for each replica", remoteReplicaInfosForLocalDC.size(), responseForRemoteNodeInLocalDC.size());
    // fetch missing keys from remoteHost1
    intraColoReplicaThread.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHostInLocalDC, numOfMessagesToBeFetchedFromRemoteHost1), remoteReplicaInfosForLocalDC, responseForRemoteNodeInLocalDC, false);
    // verify that remote token has moved forward for all partitions since it is intra-dc replication.
    for (int i = 0; i < responseForRemoteNodeInLocalDC.size(); i++) {
        assertEquals("token mismatch for intra-dc replication", remoteReplicaInfosForLocalDC.get(i).getToken(), responseForRemoteNodeInLocalDC.get(i).remoteToken);
    }
    // process metadata response for cross-colo replicas. Missing keys b0, b1, b2 and b3 must have come now via intra-dc replication
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        crossColoReplicaThread.processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
    }
    // verify that the missingStoreMessages size in standby replicas is 1 as b0, b1, b2 and b3 are fetched and b4 is pending
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        if (!remoteLeaderReplicasWithLeaderPartitionsOnLocalNode.contains(remoteReplicaInfo.getReplicaId())) {
            assertEquals("mismatch in number of missing messages for remote standby replicas after intra-dc replication", remoteReplicaInfo.getExchangeMetadataResponse().missingStoreMessages.size(), 1);
        }
    }
    // Fetch remaining one message (b4) from remote host in local datacenter
    responseForRemoteNodeInLocalDC = intraColoReplicaThread.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHostInLocalDC, numOfMessagesToBeFetchedFromRemoteHost1), remoteReplicaInfosForLocalDC);
    assertEquals("Response should contain a response for each replica", remoteReplicaInfosForLocalDC.size(), responseForRemoteNodeInLocalDC.size());
    // fetch missing keys from remoteHost1
    intraColoReplicaThread.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHostInLocalDC, numOfMessagesToBeFetchedFromRemoteHost1), remoteReplicaInfosForLocalDC, responseForRemoteNodeInLocalDC, false);
    // verify remote token for intra-dc replicas
    for (int i = 0; i < responseForRemoteNodeInLocalDC.size(); i++) {
        assertThat("mismatch in remote token after all missing messages are received", remoteReplicaInfosForLocalDC.get(i).getToken(), is(responseForRemoteNodeInLocalDC.get(i).remoteToken));
    }
    // process metadata response for cross-colo replicas. Missing keys b0, b1, b2 and b3 must have come now via intra-dc replication
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        crossColoReplicaThread.processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
    }
    // remote token for all replicas (leader and standby) should move forward.
    for (int i = 0; i < responseForRemoteNodeInRemoteDC.size(); i++) {
        assertThat("mismatch in remote token after all missing messages are received", remoteReplicaInfosForRemoteDC.get(i).getToken(), is(responseForRemoteNodeInRemoteDC.get(i).remoteToken));
    }
    // compare the final state of all messages and buffers are in sync at local node and remoteNodeInLocalDC
    checkBlobMessagesAreEqualInLocalAndRemoteHosts(localHost, remoteHostInLocalDC, idsToBeIgnoredByPartition, idsToBeTtlUpdatedByPartition);
    // compare the final state of all messages and buffers are in sync at local node and remoteNodeInRemoteDC
    checkBlobMessagesAreEqualInLocalAndRemoteHosts(localHost, remoteHostInRemoteDC, idsToBeIgnoredByPartition, idsToBeTtlUpdatedByPartition);
    storageManager.shutdown();
}
Also used : HashMap(java.util.HashMap) StorageManager(com.github.ambry.store.StorageManager) ArrayList(java.util.ArrayList) Store(com.github.ambry.store.Store) ArrayList(java.util.ArrayList) List(java.util.List) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) StoreKey(com.github.ambry.store.StoreKey) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) Test(org.junit.Test)

Aggregations

MockReplicaId (com.github.ambry.clustermap.MockReplicaId)27 ReplicaId (com.github.ambry.clustermap.ReplicaId)20 MockPartitionId (com.github.ambry.clustermap.MockPartitionId)19 Test (org.junit.Test)19 PartitionId (com.github.ambry.clustermap.PartitionId)13 ArrayList (java.util.ArrayList)10 MockClusterMap (com.github.ambry.clustermap.MockClusterMap)8 StorageManager (com.github.ambry.store.StorageManager)8 HashSet (java.util.HashSet)7 Port (com.github.ambry.network.Port)6 Store (com.github.ambry.store.Store)6 HashMap (java.util.HashMap)6 MockDataNodeId (com.github.ambry.clustermap.MockDataNodeId)5 VerifiableProperties (com.github.ambry.config.VerifiableProperties)5 MetricRegistry (com.codahale.metrics.MetricRegistry)4 ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)4 StoreKey (com.github.ambry.store.StoreKey)4 IOException (java.io.IOException)4 List (java.util.List)4 Counter (com.codahale.metrics.Counter)3