use of com.github.ambry.store.StorageManager in project ambry by linkedin.
the class ReplicationTest method onReplicaAddedOrRemovedCallbackTest.
/**
* Test cluster map change callback in {@link ReplicationManager} when any remote replicas are added or removed.
* Test setup: attempt to add 3 replicas and remove 3 replicas respectively. The three replicas are picked as follows:
* (1) 1st replica on current node (should skip)
* (2) 2nd replica on remote node sharing partition with current one (should be added or removed)
* (3) 3rd replica on remote node but doesn't share partition with current one (should skip)
* @throws Exception
*/
@Test
public void onReplicaAddedOrRemovedCallbackTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
StoreConfig storeConfig = new StoreConfig(verifiableProperties);
// pick a node with no special partition as current node
Set<DataNodeId> specialPartitionNodes = clusterMap.getSpecialPartition().getReplicaIds().stream().map(ReplicaId::getDataNodeId).collect(Collectors.toSet());
DataNodeId currentNode = clusterMap.getDataNodes().stream().filter(d -> !specialPartitionNodes.contains(d)).findFirst().get();
MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
storeKeyConverterFactory.setConversionMap(new HashMap<>());
StorageManager storageManager = new StorageManager(storeConfig, new DiskManagerConfig(verifiableProperties), Utils.newScheduler(1, true), new MetricRegistry(), null, clusterMap, currentNode, null, null, new MockTime(), null, new InMemAccountService(false, false));
storageManager.start();
MockReplicationManager replicationManager = new MockReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, clusterMap, currentNode, storeKeyConverterFactory, null);
ClusterMapChangeListener clusterMapChangeListener = clusterMap.getClusterMapChangeListener();
// find the special partition (not on current node) and get an irrelevant replica from it
PartitionId absentPartition = clusterMap.getSpecialPartition();
ReplicaId irrelevantReplica = absentPartition.getReplicaIds().get(0);
// find an existing replica on current node and one of its peer replicas on remote node
ReplicaId existingReplica = clusterMap.getReplicaIds(currentNode).get(0);
ReplicaId peerReplicaToRemove = existingReplica.getPartitionId().getReplicaIds().stream().filter(r -> r != existingReplica).findFirst().get();
// create a new node and place a peer of existing replica on it.
MockDataNodeId remoteNode = createDataNode(getListOfPorts(PLAIN_TEXT_PORT_START_NUMBER + 10, SSL_PORT_START_NUMBER + 10, HTTP2_PORT_START_NUMBER + 10), clusterMap.getDatacenterName((byte) 0), 3);
ReplicaId addedReplica = new MockReplicaId(remoteNode.getPort(), (MockPartitionId) existingReplica.getPartitionId(), remoteNode, 0);
// populate added replica and removed replica lists
List<ReplicaId> replicasToAdd = new ArrayList<>(Arrays.asList(existingReplica, addedReplica, irrelevantReplica));
List<ReplicaId> replicasToRemove = new ArrayList<>(Arrays.asList(existingReplica, peerReplicaToRemove, irrelevantReplica));
PartitionInfo partitionInfo = replicationManager.getPartitionToPartitionInfoMap().get(existingReplica.getPartitionId());
assertNotNull("PartitionInfo is not found", partitionInfo);
RemoteReplicaInfo peerReplicaInfo = partitionInfo.getRemoteReplicaInfos().stream().filter(info -> info.getReplicaId() == peerReplicaToRemove).findFirst().get();
// get the replica-thread for this peer replica
ReplicaThread peerReplicaThread = peerReplicaInfo.getReplicaThread();
// Test Case 1: replication manager encountered exception during startup (remote replica addition/removal will be skipped)
replicationManager.startWithException();
clusterMapChangeListener.onReplicaAddedOrRemoved(replicasToAdd, replicasToRemove);
// verify that PartitionInfo stays unchanged
verifyRemoteReplicaInfo(partitionInfo, addedReplica, false);
verifyRemoteReplicaInfo(partitionInfo, peerReplicaToRemove, true);
// Test Case 2: startup latch is interrupted
CountDownLatch initialLatch = replicationManager.startupLatch;
CountDownLatch mockLatch = Mockito.mock(CountDownLatch.class);
doThrow(new InterruptedException()).when(mockLatch).await();
replicationManager.startupLatch = mockLatch;
try {
clusterMapChangeListener.onReplicaAddedOrRemoved(replicasToAdd, replicasToRemove);
fail("should fail because startup latch is interrupted");
} catch (IllegalStateException e) {
// expected
}
replicationManager.startupLatch = initialLatch;
// Test Case 3: replication manager is successfully started
replicationManager.start();
clusterMapChangeListener.onReplicaAddedOrRemoved(replicasToAdd, replicasToRemove);
// verify that PartitionInfo has latest remote replica infos
verifyRemoteReplicaInfo(partitionInfo, addedReplica, true);
verifyRemoteReplicaInfo(partitionInfo, peerReplicaToRemove, false);
verifyRemoteReplicaInfo(partitionInfo, irrelevantReplica, false);
// verify new added replica is assigned to a certain thread
ReplicaThread replicaThread = replicationManager.getDataNodeIdToReplicaThreadMap().get(addedReplica.getDataNodeId());
assertNotNull("There is no ReplicaThread assocated with new replica", replicaThread);
Optional<RemoteReplicaInfo> findResult = replicaThread.getRemoteReplicaInfos().get(remoteNode).stream().filter(info -> info.getReplicaId() == addedReplica).findAny();
assertTrue("New added remote replica info should exist in corresponding thread", findResult.isPresent());
// verify the removed replica info's thread is null
assertNull("Thread in removed replica info should be null", peerReplicaInfo.getReplicaThread());
findResult = peerReplicaThread.getRemoteReplicaInfos().get(peerReplicaToRemove.getDataNodeId()).stream().filter(info -> info.getReplicaId() == peerReplicaToRemove).findAny();
assertFalse("Previous replica thread should not contain RemoteReplicaInfo that is already removed", findResult.isPresent());
storageManager.shutdown();
}
use of com.github.ambry.store.StorageManager in project ambry by linkedin.
the class LeaderBasedReplicationTest method replicaThreadLeaderBasedReplicationStandByCrossColoFetchTest.
/**
* Test leader based replication to verify cross colo gets for standby replicas after they have have timed out
* waiting for missing keys.
* @throws Exception
*/
@Test
public void replicaThreadLeaderBasedReplicationStandByCrossColoFetchTest() throws Exception {
Map<DataNodeId, MockHost> hosts = new HashMap<>();
hosts.put(remoteNodeInLocalDC, remoteHostInLocalDC);
hosts.put(remoteNodeInRemoteDC, remoteHostInRemoteDC);
int batchSize = 5;
int numOfMessagesOnRemoteNodeInLocalDC = 3;
int numOfMessagesOnRemoteNodeInRemoteDC = 10;
ConnectionPool mockConnectionPool = new MockConnectionPool(hosts, clusterMap, batchSize);
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant, mockConnectionPool);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
// set mock local stores on all remoteReplicaInfos which will used during replication.
for (PartitionId partitionId : replicationManager.partitionToPartitionInfo.keySet()) {
localHost.addStore(partitionId, null);
Store localStore = localHost.getStore(partitionId);
localStore.start();
List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(partitionId).getRemoteReplicaInfos();
remoteReplicaInfos.forEach(remoteReplicaInfo -> remoteReplicaInfo.setLocalStore(localStore));
}
// get remote replicas and replica thread for remote host on local datacenter
ReplicaThread intraColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInLocalDC);
List<RemoteReplicaInfo> remoteReplicaInfosForLocalDC = intraColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInLocalDC);
// get remote replicas and replica thread for remote host on remote datacenter
ReplicaThread crossColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInRemoteDC);
List<RemoteReplicaInfo> remoteReplicaInfosForRemoteDC = crossColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInRemoteDC);
// mock helix transition state from standby to leader for local leader partitions
List<? extends ReplicaId> replicaIds = clusterMap.getReplicaIds(replicationManager.dataNodeId);
for (ReplicaId replicaId : replicaIds) {
MockReplicaId mockReplicaId = (MockReplicaId) replicaId;
if (mockReplicaId.getReplicaState() == ReplicaState.LEADER) {
MockPartitionId mockPartitionId = (MockPartitionId) replicaId.getPartitionId();
mockHelixParticipant.onPartitionBecomeLeaderFromStandby(mockPartitionId.toPathString());
}
}
// Add put messages to all partitions on remoteHost1 and remoteHost2
List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
for (PartitionId partitionId : partitionIds) {
// add 3 put messages to the remoteNodeInLocalDC and remoteNodeInRemoteDC from which local host will replicate.
addPutMessagesToReplicasOfPartition(partitionId, Arrays.asList(remoteHostInLocalDC, remoteHostInRemoteDC), numOfMessagesOnRemoteNodeInLocalDC);
// add 1 put message to the remoteNodeInRemoteDC only. Since this message is not present in remoteNodeInLocalDC, it
// doesn't come to local node via intra-dc replication. We should see time out for remote standby replicas waiting for this
// message and see a cross colo fetch happening.
addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHostInRemoteDC), numOfMessagesOnRemoteNodeInRemoteDC - numOfMessagesOnRemoteNodeInLocalDC);
}
// Choose partitions that are leaders on both local and remote nodes
Set<ReplicaId> leaderReplicasOnLocalAndRemoteNodes = getRemoteLeaderReplicasWithLeaderPartitionsOnLocalNode(clusterMap, replicationManager.dataNodeId, remoteNodeInRemoteDC);
// replicate with remote node in remote DC
crossColoReplicaThread.replicate();
// missing messages are not fetched yet.
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
if (leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
assertEquals("remote token mismatch for leader replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), batchSize - 1);
} else {
assertEquals("remote token should not move forward for standby replicas until missing keys are fetched", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), 0);
}
}
// Replicate with remote node in local dc
intraColoReplicaThread.replicate();
// verify that remote token will be moved for all replicas as it is intra-dc replication
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForLocalDC) {
assertEquals("mismatch in remote token set for intra colo replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), numOfMessagesOnRemoteNodeInLocalDC - 1);
}
// via intra-dc replication
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
crossColoReplicaThread.processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
}
// verify that the remote token will remain 0 for standby replicas as one message in its missing set is not fetched yet.
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
if (!leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
assertTrue("missing store messages should still exist for standby replicas", crossColoReplicaThread.containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo));
assertEquals("remote token should not move forward for standby replicas until missing keys are fetched", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), 0);
assertEquals("incorrect number of missing store messages found for standby replicas", remoteReplicaInfo.getExchangeMetadataResponse().missingStoreMessages.size(), batchSize - numOfMessagesOnRemoteNodeInLocalDC);
}
}
// Attempt replication with remoteNodeInRemoteDC, we should not see any replication attempt for standby replicas
// and their remote token stays as 0.
crossColoReplicaThread.replicate();
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
if (!leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
assertEquals("remote token should not move forward for standby replicas until missing keys are fetched", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), 0);
assertTrue("missing store messages should still exist for standby replicas", crossColoReplicaThread.containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo));
}
}
// Move time forward by replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds+1 seconds and attempt replication.
// We should see cross colo fetch for standby replicas now since missing keys haven't arrived for
// replicationConfig.replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds.
time.sleep((replicationConfig.replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds + 1) * 1000);
// verify that we get the list of standby replicas that timed out on no progress
Set<RemoteReplicaInfo> allStandbyReplicas = remoteReplicaInfosForRemoteDC.stream().filter(info -> !leaderReplicasOnLocalAndRemoteNodes.contains(info.getReplicaId())).collect(Collectors.toSet());
assertEquals("mismatch in list of standby replicas timed out on no progress", new HashSet<>(crossColoReplicaThread.getRemoteStandbyReplicasTimedOutOnNoProgress(remoteReplicaInfosForRemoteDC)), allStandbyReplicas);
crossColoReplicaThread.replicate();
// token index for all standby replicas will move forward after fetching missing keys themselves
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
if (!leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
assertEquals("mismatch in remote token set for standby cross colo replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), batchSize - 1);
assertFalse("missing store messages should be empty for standby replicas now", crossColoReplicaThread.containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo));
}
}
// verify replication metrics to track number of cross colo get requests for standby replicas. If all replicas are
// leaders, we should have 0 cross colo get requests.
String remoteDataCenter = remoteReplicaInfosForRemoteDC.get(0).getReplicaId().getDataNodeId().getDatacenterName();
assertEquals("mismatch in number of cross colo get requests tracked for standby replicas", crossColoReplicaThread.getReplicationMetrics().interColoReplicationGetRequestCountForStandbyReplicas.get(remoteDataCenter).getCount(), leaderReplicasOnLocalAndRemoteNodes.size() != remoteReplicaInfosForRemoteDC.size() ? 1 : 0);
storageManager.shutdown();
}
use of com.github.ambry.store.StorageManager in project ambry by linkedin.
the class LeaderBasedReplicationTest method replicaThreadLeaderBasedReplicationFetchMissingKeysInMultipleCyclesTest.
/**
* Test leader based replication to ensure token is advanced correctly for standby replicas when missing messages
* are fetched via intra-dc replication in multiple cycles.
* @throws Exception
*/
@Test
public void replicaThreadLeaderBasedReplicationFetchMissingKeysInMultipleCyclesTest() throws Exception {
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
// set mock local stores on all remoteReplicaInfos which will used during replication.
for (PartitionId partitionId : replicationManager.partitionToPartitionInfo.keySet()) {
localHost.addStore(partitionId, null);
Store localStore = localHost.getStore(partitionId);
localStore.start();
List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(partitionId).getRemoteReplicaInfos();
remoteReplicaInfos.forEach(remoteReplicaInfo -> remoteReplicaInfo.setLocalStore(localStore));
}
// get remote replicas and replica thread for remote host on local datacenter
ReplicaThread intraColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInLocalDC);
List<RemoteReplicaInfo> remoteReplicaInfosForLocalDC = intraColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInLocalDC);
// get remote replicas and replica thread for remote host on remote datacenter
ReplicaThread crossColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInRemoteDC);
List<RemoteReplicaInfo> remoteReplicaInfosForRemoteDC = crossColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInRemoteDC);
// mock helix transition state from standby to leader for local leader partitions
List<? extends ReplicaId> replicaIds = clusterMap.getReplicaIds(replicationManager.dataNodeId);
for (ReplicaId replicaId : replicaIds) {
MockReplicaId mockReplicaId = (MockReplicaId) replicaId;
if (mockReplicaId.getReplicaState() == ReplicaState.LEADER) {
MockPartitionId mockPartitionId = (MockPartitionId) replicaId.getPartitionId();
mockHelixParticipant.onPartitionBecomeLeaderFromStandby(mockPartitionId.toPathString());
}
}
List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
Map<PartitionId, List<StoreKey>> idsToBeIgnoredByPartition = new HashMap<>();
Map<PartitionId, List<StoreKey>> idsToBeTtlUpdatedByPartition = new HashMap<>();
int numOfMessages = 5;
for (PartitionId id : partitionIds) {
List<StoreKey> toBeTtlUpdated = new ArrayList<>();
// Adding 5 put messages to remote hosts
List<StoreKey> ids = addPutMessagesToReplicasOfPartition(id, Arrays.asList(remoteHostInLocalDC, remoteHostInRemoteDC), numOfMessages);
// update the TTL of first put message
StoreKey toTtlUpdateId = ids.get(0);
addTtlUpdateMessagesToReplicasOfPartition(id, toTtlUpdateId, Arrays.asList(remoteHostInLocalDC, remoteHostInRemoteDC), UPDATED_EXPIRY_TIME_MS);
toBeTtlUpdated.add(toTtlUpdateId);
idsToBeTtlUpdatedByPartition.put(id, toBeTtlUpdated);
}
// Choose partitions that are leaders on both local and remote nodes
Set<ReplicaId> remoteLeaderReplicasWithLeaderPartitionsOnLocalNode = getRemoteLeaderReplicasWithLeaderPartitionsOnLocalNode(clusterMap, replicationManager.dataNodeId, remoteNodeInRemoteDC);
// fetch all messages in the metadata exchange from remote replica on remote datacenter by setting batchSize
// in the connection to 6
// There are 6 records in the remote host. 5 put records + 1 ttl update record. We will receive 5 messages in the
// metadata exchange as ttl update will be merged with its put record and sent as single record.
List<ReplicaThread.ExchangeMetadataResponse> responseForRemoteNodeInRemoteDC = crossColoReplicaThread.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHostInRemoteDC, numOfMessages), remoteReplicaInfosForRemoteDC);
assertEquals("Response should contain a response for each replica", remoteReplicaInfosForRemoteDC.size(), responseForRemoteNodeInRemoteDC.size());
for (ReplicaThread.ExchangeMetadataResponse exchangeMetadataResponse : responseForRemoteNodeInRemoteDC) {
// we should have received 5 messages bo, b1, b2, b3, b4 (ttl_update for b0 will be merged with b0 put message)
assertEquals("mismatch in number of messages received from remote replica", numOfMessages, exchangeMetadataResponse.missingStoreMessages.size());
}
// Filter leader replicas to fetch missing keys
List<RemoteReplicaInfo> leaderReplicas = new ArrayList<>();
List<ReplicaThread.ExchangeMetadataResponse> exchangeMetadataResponseListForLeaderReplicas = new ArrayList<>();
crossColoReplicaThread.getLeaderReplicaList(remoteReplicaInfosForRemoteDC, responseForRemoteNodeInRemoteDC, leaderReplicas, exchangeMetadataResponseListForLeaderReplicas);
// verify that only leader replicas in remoteHost2 are chosen for fetching missing messages.
Set<ReplicaId> remoteReplicasToFetchInReplicaThread = leaderReplicas.stream().map(RemoteReplicaInfo::getReplicaId).collect(Collectors.toSet());
assertThat("mismatch in leader remote replicas to fetch missing keys", remoteLeaderReplicasWithLeaderPartitionsOnLocalNode, is(remoteReplicasToFetchInReplicaThread));
// fetch missing keys for leader replicas from remoteHost2
if (leaderReplicas.size() > 0) {
crossColoReplicaThread.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHostInRemoteDC, numOfMessages), leaderReplicas, exchangeMetadataResponseListForLeaderReplicas, false);
}
// missing messages are not fetched yet.
for (int i = 0; i < remoteReplicaInfosForRemoteDC.size(); i++) {
if (remoteLeaderReplicasWithLeaderPartitionsOnLocalNode.contains(remoteReplicaInfosForRemoteDC.get(i).getReplicaId())) {
assertThat("remote Token should be updated for leader replica", remoteReplicaInfosForRemoteDC.get(i).getToken(), is(responseForRemoteNodeInRemoteDC.get(i).remoteToken));
} else {
assertThat("remote Token should not be updated for standby replica", remoteReplicaInfosForRemoteDC.get(i).getToken(), not(responseForRemoteNodeInRemoteDC.get(i).remoteToken));
}
}
int numOfMessagesToBeFetchedFromRemoteHost1 = numOfMessages - 1;
// Fetch only first 4 messages from remote host in local datacenter
List<ReplicaThread.ExchangeMetadataResponse> responseForRemoteNodeInLocalDC = intraColoReplicaThread.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHostInLocalDC, numOfMessagesToBeFetchedFromRemoteHost1), remoteReplicaInfosForLocalDC);
assertEquals("Response should contain a response for each replica", remoteReplicaInfosForLocalDC.size(), responseForRemoteNodeInLocalDC.size());
// fetch missing keys from remoteHost1
intraColoReplicaThread.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHostInLocalDC, numOfMessagesToBeFetchedFromRemoteHost1), remoteReplicaInfosForLocalDC, responseForRemoteNodeInLocalDC, false);
// verify that remote token has moved forward for all partitions since it is intra-dc replication.
for (int i = 0; i < responseForRemoteNodeInLocalDC.size(); i++) {
assertEquals("token mismatch for intra-dc replication", remoteReplicaInfosForLocalDC.get(i).getToken(), responseForRemoteNodeInLocalDC.get(i).remoteToken);
}
// process metadata response for cross-colo replicas. Missing keys b0, b1, b2 and b3 must have come now via intra-dc replication
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
crossColoReplicaThread.processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
}
// verify that the missingStoreMessages size in standby replicas is 1 as b0, b1, b2 and b3 are fetched and b4 is pending
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
if (!remoteLeaderReplicasWithLeaderPartitionsOnLocalNode.contains(remoteReplicaInfo.getReplicaId())) {
assertEquals("mismatch in number of missing messages for remote standby replicas after intra-dc replication", remoteReplicaInfo.getExchangeMetadataResponse().missingStoreMessages.size(), 1);
}
}
// Fetch remaining one message (b4) from remote host in local datacenter
responseForRemoteNodeInLocalDC = intraColoReplicaThread.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHostInLocalDC, numOfMessagesToBeFetchedFromRemoteHost1), remoteReplicaInfosForLocalDC);
assertEquals("Response should contain a response for each replica", remoteReplicaInfosForLocalDC.size(), responseForRemoteNodeInLocalDC.size());
// fetch missing keys from remoteHost1
intraColoReplicaThread.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHostInLocalDC, numOfMessagesToBeFetchedFromRemoteHost1), remoteReplicaInfosForLocalDC, responseForRemoteNodeInLocalDC, false);
// verify remote token for intra-dc replicas
for (int i = 0; i < responseForRemoteNodeInLocalDC.size(); i++) {
assertThat("mismatch in remote token after all missing messages are received", remoteReplicaInfosForLocalDC.get(i).getToken(), is(responseForRemoteNodeInLocalDC.get(i).remoteToken));
}
// process metadata response for cross-colo replicas. Missing keys b0, b1, b2 and b3 must have come now via intra-dc replication
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
crossColoReplicaThread.processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
}
// remote token for all replicas (leader and standby) should move forward.
for (int i = 0; i < responseForRemoteNodeInRemoteDC.size(); i++) {
assertThat("mismatch in remote token after all missing messages are received", remoteReplicaInfosForRemoteDC.get(i).getToken(), is(responseForRemoteNodeInRemoteDC.get(i).remoteToken));
}
// compare the final state of all messages and buffers are in sync at local node and remoteNodeInLocalDC
checkBlobMessagesAreEqualInLocalAndRemoteHosts(localHost, remoteHostInLocalDC, idsToBeIgnoredByPartition, idsToBeTtlUpdatedByPartition);
// compare the final state of all messages and buffers are in sync at local node and remoteNodeInRemoteDC
checkBlobMessagesAreEqualInLocalAndRemoteHosts(localHost, remoteHostInRemoteDC, idsToBeIgnoredByPartition, idsToBeTtlUpdatedByPartition);
storageManager.shutdown();
}
use of com.github.ambry.store.StorageManager in project ambry by linkedin.
the class LeaderBasedReplicationTest method replicaThreadLeaderBasedReplicationForPUTMessagesTest.
/**
* Test leader based replication to ensure token is advanced correctly for standby replicas when missing PUT messages
* are fetched via intra-dc replication.
* @throws Exception
*/
@Test
public void replicaThreadLeaderBasedReplicationForPUTMessagesTest() throws Exception {
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
/*
Scenario:
we have 3 nodes that have replicas belonging to same partitions:
a) local node
b) remote node in local DC
c) remote node in remote DC
Each node have few of its partitions as leaders and others are standby. They are randomly assigned during creation
of replicas for mock partitions.
We have 4 PUT messages in each of the partitions in remote nodes of local DC and remote DC that needs to be
replicated at local node.
Steps:
1. Replicate (send metadata exchange and get messages) with remote node in remote DC (cross-colo replication).
Expectations:
a) We should see that metadata exchange is sent for all replicas while GET messages are only sent for leader replicas.
b) All the PUT messages should be replicated locally and remote token should be moved forward for leader partitions.
c) For non-leader replicas, metadata response should be stored locally.
2. Replicate (send metadata exchange and get messages) with remote node in local DC (intra-colo replication).
Expectations:
a) Metadata exchange and GET messages are sent for all replicas.
b) PUT messages should be replicated locally for all replicas.
c) Missing messages in stored metadata response of non-leader replicas for remoteNodeInRemoteDC should become empty
and remote token should be advanced.
*/
int batchSize = 4;
// set mock local stores on all remoteReplicaInfos which will used during replication.
for (PartitionId partitionId : replicationManager.partitionToPartitionInfo.keySet()) {
localHost.addStore(partitionId, null);
Store localStore = localHost.getStore(partitionId);
localStore.start();
List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(partitionId).getRemoteReplicaInfos();
remoteReplicaInfos.forEach(remoteReplicaInfo -> remoteReplicaInfo.setLocalStore(localStore));
}
// get remote replicas and replica thread for remote host on local datacenter
ReplicaThread intraColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInLocalDC);
List<RemoteReplicaInfo> remoteReplicaInfosForLocalDC = intraColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInLocalDC);
// get remote replicas and replica thread for remote host on remote datacenter
ReplicaThread crossColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInRemoteDC);
List<RemoteReplicaInfo> remoteReplicaInfosForRemoteDC = crossColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInRemoteDC);
// mock helix transition state from standby to leader for local leader partitions
List<? extends ReplicaId> replicaIds = clusterMap.getReplicaIds(replicationManager.dataNodeId);
for (ReplicaId replicaId : replicaIds) {
MockReplicaId mockReplicaId = (MockReplicaId) replicaId;
if (mockReplicaId.getReplicaState() == ReplicaState.LEADER) {
MockPartitionId mockPartitionId = (MockPartitionId) replicaId.getPartitionId();
mockHelixParticipant.onPartitionBecomeLeaderFromStandby(mockPartitionId.toPathString());
}
}
// Add put messages to all partitions on remoteHost1 and remoteHost2
List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
for (PartitionId partitionId : partitionIds) {
// add batchSize messages to the remoteHost1 and remote host 2 from which local host will replicate.
addPutMessagesToReplicasOfPartition(partitionId, Arrays.asList(remoteHostInLocalDC, remoteHostInRemoteDC), batchSize);
}
// Choose partitions that are leaders on both local and remote nodes
Set<ReplicaId> leaderReplicasOnLocalAndRemoteNodes = getRemoteLeaderReplicasWithLeaderPartitionsOnLocalNode(clusterMap, replicationManager.dataNodeId, remoteNodeInRemoteDC);
// Replicate with remoteHost2 in remote data center.
List<ReplicaThread.ExchangeMetadataResponse> responseListForRemoteNodeInRemoteDC = crossColoReplicaThread.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHostInRemoteDC, batchSize), remoteReplicaInfosForRemoteDC);
// Metadata requests should be sent to both leader and standby replicas.
assertEquals("Response should contain a response for each replica", remoteReplicaInfosForRemoteDC.size(), responseListForRemoteNodeInRemoteDC.size());
// verify that missing messages size equals to the min{batch size, number of PUT messages} placed on remote hosts
int expectedIndex = batchSize - 1;
for (ReplicaThread.ExchangeMetadataResponse exchangeMetadataResponse : responseListForRemoteNodeInRemoteDC) {
assertEquals("mismatch in number of missing messages", batchSize, exchangeMetadataResponse.missingStoreMessages.size());
}
// Filter leader replicas to fetch missing keys
List<RemoteReplicaInfo> leaderReplicas = new ArrayList<>();
List<ReplicaThread.ExchangeMetadataResponse> exchangeMetadataResponseListForLeaderReplicas = new ArrayList<>();
crossColoReplicaThread.getLeaderReplicaList(remoteReplicaInfosForRemoteDC, responseListForRemoteNodeInRemoteDC, leaderReplicas, exchangeMetadataResponseListForLeaderReplicas);
// verify that only leader replicas in remoteHost2 are chosen for fetching missing messages.
Set<ReplicaId> remoteReplicasToFetchInReplicaThread = leaderReplicas.stream().map(RemoteReplicaInfo::getReplicaId).collect(Collectors.toSet());
assertThat("mismatch in leader remote replicas to fetch missing keys", leaderReplicasOnLocalAndRemoteNodes, is(remoteReplicasToFetchInReplicaThread));
// fetch missing keys for leader replicas from remoteHost2
if (leaderReplicas.size() > 0) {
crossColoReplicaThread.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHostInRemoteDC, batchSize), leaderReplicas, exchangeMetadataResponseListForLeaderReplicas, false);
}
// missing messages are not fetched yet.
for (int i = 0; i < remoteReplicaInfosForRemoteDC.size(); i++) {
if (leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfosForRemoteDC.get(i).getReplicaId())) {
assertEquals("remote token mismatch for leader replicas", remoteReplicaInfosForRemoteDC.get(i).getToken(), responseListForRemoteNodeInRemoteDC.get(i).remoteToken);
} else {
assertEquals("missing keys in metadata response should be stored for standby replicas", remoteReplicaInfosForRemoteDC.get(i).getExchangeMetadataResponse().missingStoreMessages.size(), responseListForRemoteNodeInRemoteDC.get(i).missingStoreMessages.size());
assertThat("remote token should not move forward for standby replicas until missing keys are fetched", remoteReplicaInfosForRemoteDC.get(i).getToken(), not(responseListForRemoteNodeInRemoteDC.get(i).remoteToken));
}
}
// Replication with remoteHost1 in local data center
List<ReplicaThread.ExchangeMetadataResponse> responseForRemoteNodeInLocalDC = intraColoReplicaThread.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHostInLocalDC, batchSize), remoteReplicaInfosForLocalDC);
assertEquals("Response should contain a response for each replica", remoteReplicaInfosForLocalDC.size(), responseForRemoteNodeInLocalDC.size());
// fetch missing keys from remoteHost1
intraColoReplicaThread.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHostInLocalDC, batchSize), remoteReplicaInfosForLocalDC, responseForRemoteNodeInLocalDC, false);
for (int i = 0; i < responseForRemoteNodeInLocalDC.size(); i++) {
assertEquals("mismatch in remote token set for intra colo replicas", remoteReplicaInfosForLocalDC.get(i).getToken(), (responseForRemoteNodeInLocalDC.get(i).remoteToken));
}
// process missing keys for cross colo standby replicas from previous metadata exchange
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
crossColoReplicaThread.processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
}
// for standbys are received via intra-dc replication.
for (int i = 0; i < responseListForRemoteNodeInRemoteDC.size(); i++) {
assertEquals("mismatch in remote token set for cross colo replicas", remoteReplicaInfosForRemoteDC.get(i).getToken(), (responseListForRemoteNodeInRemoteDC.get(i).remoteToken));
}
// verify replication metrics to track number of cross colo get requests and cross colo bytes fetch rate for standby
// replicas should be 0 since all missing blobs are obtained via local leader in intra-dc replication.
String remoteDataCenter = remoteReplicaInfosForRemoteDC.get(0).getReplicaId().getDataNodeId().getDatacenterName();
assertEquals("mismatch in number of cross colo get requests tracked for standby replicas", crossColoReplicaThread.getReplicationMetrics().interColoReplicationGetRequestCountForStandbyReplicas.get(remoteDataCenter).getCount(), 0);
assertEquals("mismatch in bytes fetch rate for cross colo get requests tracked for standby replicas", crossColoReplicaThread.getReplicationMetrics().interColoReplicationFetchBytesRateForStandbyReplicas.get(remoteDataCenter).getCount(), 0);
storageManager.shutdown();
}
use of com.github.ambry.store.StorageManager in project ambry by linkedin.
the class LeaderBasedReplicationTest method replicaThreadLeaderBasedReplicationForTTLUpdatesDeleteAndUndeleteMessagesTest.
/**
* Test leader based replication to ensure token is advanced correctly and blob properties ttl
* _update, delete, undelete are applied correctly when missing messages in standby replicas
* are fetched via intra-dc replication.
* @throws Exception
*/
@Test
public void replicaThreadLeaderBasedReplicationForTTLUpdatesDeleteAndUndeleteMessagesTest() throws Exception {
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
int batchSize = 10;
// set mock local stores on all remoteReplicaInfos which will used during replication.
for (PartitionId partitionId : replicationManager.partitionToPartitionInfo.keySet()) {
localHost.addStore(partitionId, null);
Store localStore = localHost.getStore(partitionId);
localStore.start();
List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(partitionId).getRemoteReplicaInfos();
remoteReplicaInfos.forEach(remoteReplicaInfo -> remoteReplicaInfo.setLocalStore(localStore));
}
// get remote replicas and replica thread for remote host on local datacenter
ReplicaThread intraColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInLocalDC);
List<RemoteReplicaInfo> remoteReplicaInfosForLocalDC = intraColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInLocalDC);
// get remote replicas and replica thread for remote host on remote datacenter
ReplicaThread crossColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInRemoteDC);
List<RemoteReplicaInfo> remoteReplicaInfosForRemoteDC = crossColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInRemoteDC);
// mock helix transition state from standby to leader for local leader partitions
List<? extends ReplicaId> replicaIds = clusterMap.getReplicaIds(replicationManager.dataNodeId);
for (ReplicaId replicaId : replicaIds) {
MockReplicaId mockReplicaId = (MockReplicaId) replicaId;
if (mockReplicaId.getReplicaState() == ReplicaState.LEADER) {
MockPartitionId mockPartitionId = (MockPartitionId) replicaId.getPartitionId();
mockHelixParticipant.onPartitionBecomeLeaderFromStandby(mockPartitionId.toPathString());
}
}
List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
Map<PartitionId, List<StoreKey>> idsToBeIgnoredByPartition = new HashMap<>();
Map<PartitionId, List<StoreKey>> idsToBeTtlUpdatedByPartition = new HashMap<>();
for (PartitionId id : partitionIds) {
List<StoreKey> toBeIgnored = new ArrayList<>();
List<StoreKey> toBeUndeleted = new ArrayList<>();
// Adding 4 PUT messages b0, b1, b2, b3 to remoteNodeInLocalDC and remoteNodeInRemoteDC
List<StoreKey> ids = addPutMessagesToReplicasOfPartition(id, Arrays.asList(remoteHostInLocalDC, remoteHostInRemoteDC), 4);
// ttl update to be added to b1, b2, b3
List<StoreKey> toBeTtlUpdated = new ArrayList<>(ids);
toBeTtlUpdated.remove(ids.get(0));
// delete to be added to b2, b3
toBeIgnored.add(ids.get(2));
toBeIgnored.add(ids.get(3));
// un-delete to be added to b3
toBeUndeleted.add(ids.get(3));
toBeIgnored.remove(ids.get(3));
// Add TTLUpdate records for blobs b1,b2,b3 in remoteNodeInLocalDC and remoteNodeInRemoteDC
for (int j = 0; j < toBeTtlUpdated.size(); j++) {
addTtlUpdateMessagesToReplicasOfPartition(id, toBeTtlUpdated.get(j), Collections.singletonList(remoteHostInLocalDC), UPDATED_EXPIRY_TIME_MS);
addTtlUpdateMessagesToReplicasOfPartition(id, toBeTtlUpdated.get(toBeTtlUpdated.size() - 1 - j), Collections.singletonList(remoteHostInRemoteDC), UPDATED_EXPIRY_TIME_MS);
}
// Add delete records for blobs b2,b3 in remoteNodeInLocalDC and remoteNodeInRemoteDC
for (int j = 0; j < toBeIgnored.size(); j++) {
addDeleteMessagesToReplicasOfPartition(id, toBeIgnored.get(j), Collections.singletonList(remoteHostInLocalDC), (short) 0, EXPIRY_TIME_MS);
addDeleteMessagesToReplicasOfPartition(id, toBeIgnored.get(toBeIgnored.size() - 1 - j), Collections.singletonList(remoteHostInRemoteDC), (short) 0, EXPIRY_TIME_MS);
}
// Add un-delete records for blob b3 with life_version as 1 in remoteNodeInLocalDC and remoteNodeInRemoteDC
for (StoreKey storeKey : toBeUndeleted) {
addUndeleteMessagesToReplicasOfPartition(id, storeKey, Arrays.asList(remoteHostInLocalDC, remoteHostInRemoteDC), (short) 1);
}
// will be used later while comparing the final message records in local and remote nodes
idsToBeIgnoredByPartition.put(id, toBeIgnored);
idsToBeTtlUpdatedByPartition.put(id, toBeTtlUpdated);
}
// Inter-dc replication
// Send metadata request to remoteNodeInRemoteDC to fetch missing keys information.
List<ReplicaThread.ExchangeMetadataResponse> responseForRemoteNodeInRemoteDC = crossColoReplicaThread.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHostInRemoteDC, batchSize), remoteReplicaInfosForRemoteDC);
assertEquals("Response should contain a response for each replica", remoteReplicaInfosForRemoteDC.size(), responseForRemoteNodeInRemoteDC.size());
// Filter leader replicas to fetch missing keys
List<RemoteReplicaInfo> leaderReplicas = new ArrayList<>();
List<ReplicaThread.ExchangeMetadataResponse> exchangeMetadataResponseListForLeaderReplicas = new ArrayList<>();
crossColoReplicaThread.getLeaderReplicaList(remoteReplicaInfosForRemoteDC, responseForRemoteNodeInRemoteDC, leaderReplicas, exchangeMetadataResponseListForLeaderReplicas);
// verify that only leader partitions in local and remote nodes are chosen for fetching missing messages.
Set<ReplicaId> remoteLeaderReplicasWithLeaderPartitionsOnLocalNode = getRemoteLeaderReplicasWithLeaderPartitionsOnLocalNode(clusterMap, replicationManager.dataNodeId, remoteNodeInRemoteDC);
Set<ReplicaId> leaderReplicaSetInReplicaThread = leaderReplicas.stream().map(RemoteReplicaInfo::getReplicaId).collect(Collectors.toSet());
assertThat("mismatch in leader remote replicas to fetch missing keys", remoteLeaderReplicasWithLeaderPartitionsOnLocalNode, is(leaderReplicaSetInReplicaThread));
// fetch missing keys for leader replicas from remoteNodeInRemoteDC
if (leaderReplicas.size() > 0) {
crossColoReplicaThread.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHostInRemoteDC, batchSize), leaderReplicas, exchangeMetadataResponseListForLeaderReplicas, false);
}
// For standby replicas, token index will remain 0 and metadata information would be stored.
for (int i = 0; i < remoteReplicaInfosForRemoteDC.size(); i++) {
if (remoteLeaderReplicasWithLeaderPartitionsOnLocalNode.contains(remoteReplicaInfosForRemoteDC.get(i).getReplicaId())) {
assertEquals("remote Token should be updated for leader replica", remoteReplicaInfosForRemoteDC.get(i).getToken(), (responseForRemoteNodeInRemoteDC.get(i).remoteToken));
} else {
assertThat("remote Token should not be updated for standby replica", remoteReplicaInfosForRemoteDC.get(i).getToken(), not(responseForRemoteNodeInRemoteDC.get(i).remoteToken));
assertEquals("missing messages in metadata exchange should be stored for standby replica", remoteReplicaInfosForRemoteDC.get(i).getExchangeMetadataResponse().missingStoreMessages.size(), responseForRemoteNodeInRemoteDC.get(i).missingStoreMessages.size());
}
}
// Intra-dc replication
List<ReplicaThread.ExchangeMetadataResponse> responseForRemoteNodeInLocalDC = intraColoReplicaThread.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHostInLocalDC, batchSize), remoteReplicaInfosForLocalDC);
intraColoReplicaThread.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHostInLocalDC, batchSize), remoteReplicaInfosForLocalDC, responseForRemoteNodeInLocalDC, false);
// Verify that the remote token for all intra-colo replicas has been moved
for (int i = 0; i < responseForRemoteNodeInLocalDC.size(); i++) {
assertEquals(remoteReplicaInfosForLocalDC.get(i).getToken(), responseForRemoteNodeInLocalDC.get(i).remoteToken);
}
// arrived via intra-dc replication
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
crossColoReplicaThread.processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
}
// arrived via intra-dc replication
for (int i = 0; i < responseForRemoteNodeInLocalDC.size(); i++) {
assertEquals(remoteReplicaInfosForRemoteDC.get(i).getToken(), responseForRemoteNodeInRemoteDC.get(i).remoteToken);
}
// compare the messages and buffers are in sync at local host and remote host1
checkBlobMessagesAreEqualInLocalAndRemoteHosts(localHost, remoteHostInLocalDC, idsToBeIgnoredByPartition, idsToBeTtlUpdatedByPartition);
// compare the messages and buffers are in sync at local host and remote host1
checkBlobMessagesAreEqualInLocalAndRemoteHosts(localHost, remoteHostInRemoteDC, idsToBeIgnoredByPartition, idsToBeTtlUpdatedByPartition);
storageManager.shutdown();
}
Aggregations