use of com.github.ambry.clustermap.DataNodeId in project ambry by linkedin.
the class ReplicationTest method replicaThreadTestConverter.
/**
* Tests that replication between a local and remote server who have different
* blob IDs for the same blobs (via StoreKeyConverter)
* @throws Exception
*/
@Test
public void replicaThreadTestConverter() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
MockHost localHost = localAndRemoteHosts.getFirst();
MockHost remoteHost = localAndRemoteHosts.getSecond();
MockHost expectedLocalHost = new MockHost(localHost.dataNodeId, clusterMap);
MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
storeKeyConverterFactory.setConversionMap(new HashMap<>());
storeKeyConverterFactory.setReturnInputIfAbsent(true);
MockStoreKeyConverterFactory.MockStoreKeyConverter storeKeyConverter = storeKeyConverterFactory.getStoreKeyConverter();
List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
Map<PartitionId, List<StoreKey>> idsToBeIgnoredByPartition = new HashMap<>();
StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
Transformer transformer = new BlobIdTransformer(storeKeyFactory, storeKeyConverter);
int batchSize = 4;
Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost, storeKeyConverter, transformer, null, null);
Map<DataNodeId, List<RemoteReplicaInfo>> replicasToReplicate = replicasAndThread.getFirst();
ReplicaThread replicaThread = replicasAndThread.getSecond();
/*
STORE KEY CONVERTER MAPPING
Key Value
B0 B0'
B1 B1'
B2 null
BEFORE
Local Remote
B0' B0
B1
B2
AFTER
Local Remote
B0' B0
B1' B1
B2
B0 is B0' for local,
B1 is B1' for local,
B2 is null for local,
so it already has B0/B0'
B1 is transferred to B1'
and B2 is invalid for L
so it does not count as missing
Missing Keys: 1
*/
Map<PartitionId, List<BlobId>> partitionIdToDeleteBlobId = new HashMap<>();
Map<StoreKey, StoreKey> conversionMap = new HashMap<>();
Map<PartitionId, BlobId> expectedPartitionIdToDeleteBlobId = new HashMap<>();
for (int i = 0; i < partitionIds.size(); i++) {
PartitionId partitionId = partitionIds.get(i);
List<BlobId> deleteBlobIds = new ArrayList<>();
partitionIdToDeleteBlobId.put(partitionId, deleteBlobIds);
BlobId b0 = generateRandomBlobId(partitionId);
deleteBlobIds.add(b0);
BlobId b0p = generateRandomBlobId(partitionId);
expectedPartitionIdToDeleteBlobId.put(partitionId, b0p);
BlobId b1 = generateRandomBlobId(partitionId);
BlobId b1p = generateRandomBlobId(partitionId);
BlobId b2 = generateRandomBlobId(partitionId);
deleteBlobIds.add(b2);
conversionMap.put(b0, b0p);
conversionMap.put(b1, b1p);
conversionMap.put(b2, null);
// Convert current conversion map so that BlobIdTransformer can
// create b1p in expectedLocalHost
storeKeyConverter.setConversionMap(conversionMap);
storeKeyConverter.convert(conversionMap.keySet());
addPutMessagesToReplicasOfPartition(Arrays.asList(b0p), Arrays.asList(localHost));
addPutMessagesToReplicasOfPartition(Arrays.asList(b0, b1, b2), Arrays.asList(remoteHost));
addPutMessagesToReplicasOfPartition(Arrays.asList(b0p, b1), Arrays.asList(null, transformer), Arrays.asList(expectedLocalHost));
// Check that expected local host contains the correct blob ids
Set<BlobId> expectedLocalHostBlobIds = new HashSet<>();
expectedLocalHostBlobIds.add(b0p);
expectedLocalHostBlobIds.add(b1p);
for (MessageInfo messageInfo : expectedLocalHost.infosByPartition.get(partitionId)) {
assertTrue("Remove should never fail", expectedLocalHostBlobIds.remove(messageInfo.getStoreKey()));
}
assertTrue("expectedLocalHostBlobIds should now be empty", expectedLocalHostBlobIds.isEmpty());
}
storeKeyConverter.setConversionMap(conversionMap);
int expectedIndex = assertMissingKeysAndFixMissingStoreKeys(0, 2, batchSize, 1, replicaThread, remoteHost, replicasToReplicate);
// Check that there are no missing buffers between expectedLocalHost and LocalHost
Map<PartitionId, List<ByteBuffer>> missingBuffers = expectedLocalHost.getMissingBuffers(localHost.buffersByPartition);
assertTrue(missingBuffers.isEmpty());
missingBuffers = localHost.getMissingBuffers(expectedLocalHost.buffersByPartition);
assertTrue(missingBuffers.isEmpty());
// delete blob
for (int i = 0; i < partitionIds.size(); i++) {
PartitionId partitionId = partitionIds.get(i);
List<BlobId> deleteBlobIds = partitionIdToDeleteBlobId.get(partitionId);
for (BlobId deleteBlobId : deleteBlobIds) {
addDeleteMessagesToReplicasOfPartition(partitionId, deleteBlobId, Arrays.asList(remoteHost));
}
addDeleteMessagesToReplicasOfPartition(partitionId, expectedPartitionIdToDeleteBlobId.get(partitionId), Arrays.asList(expectedLocalHost));
}
expectedIndex = assertMissingKeysAndFixMissingStoreKeys(expectedIndex, 2, batchSize, 0, replicaThread, remoteHost, replicasToReplicate);
// Check that there are no missing buffers between expectedLocalHost and LocalHost
missingBuffers = expectedLocalHost.getMissingBuffers(localHost.buffersByPartition);
assertTrue(missingBuffers.isEmpty());
missingBuffers = localHost.getMissingBuffers(expectedLocalHost.buffersByPartition);
assertTrue(missingBuffers.isEmpty());
// 3 unconverted + 2 unconverted deleted expected missing buffers
verifyNoMoreMissingKeysAndExpectedMissingBufferCount(remoteHost, localHost, replicaThread, replicasToReplicate, idsToBeIgnoredByPartition, storeKeyConverter, expectedIndex, expectedIndex, 5);
}
use of com.github.ambry.clustermap.DataNodeId in project ambry by linkedin.
the class ReplicationTest method replicaThreadTest.
/**
* Tests {@link ReplicaThread#exchangeMetadata(ConnectedChannel, List)} and
* {@link ReplicaThread#fixMissingStoreKeys(ConnectedChannel, List, List, boolean)} for valid puts, deletes, expired keys and
* corrupt blobs.
* @throws Exception
*/
@Test
public void replicaThreadTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
MockHost localHost = localAndRemoteHosts.getFirst();
MockHost remoteHost = localAndRemoteHosts.getSecond();
MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
storeKeyConverterFactory.setConversionMap(new HashMap<>());
storeKeyConverterFactory.setReturnInputIfAbsent(true);
MockStoreKeyConverterFactory.MockStoreKeyConverter storeKeyConverter = storeKeyConverterFactory.getStoreKeyConverter();
short blobIdVersion = CommonTestUtils.getCurrentBlobIdVersion();
List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
Map<PartitionId, List<StoreKey>> idsToBeIgnoredByPartition = new HashMap<>();
for (int i = 0; i < partitionIds.size(); i++) {
List<StoreKey> idsToBeIgnored = new ArrayList<>();
PartitionId partitionId = partitionIds.get(i);
// add 6 messages to both hosts.
StoreKey toDeleteId = addPutMessagesToReplicasOfPartition(partitionId, Arrays.asList(localHost, remoteHost), 6).get(0);
short accountId = Utils.getRandomShort(TestUtils.RANDOM);
short containerId = Utils.getRandomShort(TestUtils.RANDOM);
boolean toEncrypt = TestUtils.RANDOM.nextBoolean();
// add an expired message to the remote host only
StoreKey id = new BlobId(blobIdVersion, BlobId.BlobIdType.NATIVE, ClusterMap.UNKNOWN_DATACENTER_ID, accountId, containerId, partitionId, toEncrypt, BlobId.BlobDataType.DATACHUNK);
PutMsgInfoAndBuffer msgInfoAndBuffer = createPutMessage(id, accountId, containerId, toEncrypt);
remoteHost.addMessage(partitionId, new MessageInfo(id, msgInfoAndBuffer.byteBuffer.remaining(), 1, accountId, containerId, msgInfoAndBuffer.messageInfo.getOperationTimeMs()), msgInfoAndBuffer.byteBuffer);
idsToBeIgnored.add(id);
// add 3 messages to the remote host only
addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), 3);
accountId = Utils.getRandomShort(TestUtils.RANDOM);
containerId = Utils.getRandomShort(TestUtils.RANDOM);
toEncrypt = TestUtils.RANDOM.nextBoolean();
// add a corrupt message to the remote host only
id = new BlobId(blobIdVersion, BlobId.BlobIdType.NATIVE, ClusterMap.UNKNOWN_DATACENTER_ID, accountId, containerId, partitionId, toEncrypt, BlobId.BlobDataType.DATACHUNK);
msgInfoAndBuffer = createPutMessage(id, accountId, containerId, toEncrypt);
byte[] data = msgInfoAndBuffer.byteBuffer.array();
// flip every bit in the array
for (int j = 0; j < data.length; j++) {
data[j] ^= 0xFF;
}
remoteHost.addMessage(partitionId, msgInfoAndBuffer.messageInfo, msgInfoAndBuffer.byteBuffer);
idsToBeIgnored.add(id);
// add 3 messages to the remote host only
addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), 3);
// add delete record for the very first blob in the remote host only
addDeleteMessagesToReplicasOfPartition(partitionId, toDeleteId, Collections.singletonList(remoteHost));
// PUT and DELETE a blob in the remote host only
id = addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), 1).get(0);
addDeleteMessagesToReplicasOfPartition(partitionId, id, Collections.singletonList(remoteHost));
idsToBeIgnored.add(id);
// add 2 or 3 messages (depending on whether partition is even-numbered or odd-numbered) to the remote host only
addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), i % 2 == 0 ? 2 : 3);
idsToBeIgnoredByPartition.put(partitionId, idsToBeIgnored);
// ensure that the first key is not deleted in the local host
assertNull(toDeleteId + " should not be deleted in the local host", getMessageInfo(toDeleteId, localHost.infosByPartition.get(partitionId), true, false, false));
}
StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
Transformer transformer = new BlobIdTransformer(storeKeyFactory, storeKeyConverter);
int batchSize = 4;
Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost, storeKeyConverter, transformer, null, null);
Map<DataNodeId, List<RemoteReplicaInfo>> replicasToReplicate = replicasAndThread.getFirst();
ReplicaThread replicaThread = replicasAndThread.getSecond();
Map<PartitionId, List<ByteBuffer>> missingBuffers = remoteHost.getMissingBuffers(localHost.buffersByPartition);
for (Map.Entry<PartitionId, List<ByteBuffer>> entry : missingBuffers.entrySet()) {
if (partitionIds.indexOf(entry.getKey()) % 2 == 0) {
assertEquals("Missing buffers count mismatch", 13, entry.getValue().size());
} else {
assertEquals("Missing buffers count mismatch", 14, entry.getValue().size());
}
}
// 1st and 2nd iterations - no keys missing because all data is in both hosts
// 3rd iteration - 3 missing keys (one expired)
// 4th iteration - 3 missing keys (one expired) - the corrupt key also shows up as missing but is ignored later
// 5th iteration - 1 missing key (1 key from prev cycle, 1 deleted key, 1 never present key but deleted in remote)
// 6th iteration - 2 missing keys (2 entries i.e put,delete of never present key)
int[] missingKeysCounts = { 0, 0, 3, 3, 1, 2 };
int[] missingBuffersCount = { 12, 12, 9, 7, 6, 4 };
int expectedIndex = 0;
int missingBuffersIndex = 0;
for (int missingKeysCount : missingKeysCounts) {
expectedIndex = assertMissingKeysAndFixMissingStoreKeys(expectedIndex, batchSize - 1, batchSize, missingKeysCount, replicaThread, remoteHost, replicasToReplicate);
missingBuffers = remoteHost.getMissingBuffers(localHost.buffersByPartition);
for (Map.Entry<PartitionId, List<ByteBuffer>> entry : missingBuffers.entrySet()) {
if (partitionIds.indexOf(entry.getKey()) % 2 == 0) {
assertEquals("Missing buffers count mismatch for iteration count " + missingBuffersIndex, missingBuffersCount[missingBuffersIndex], entry.getValue().size());
} else {
assertEquals("Missing buffers count mismatch for iteration count " + missingBuffersIndex, missingBuffersCount[missingBuffersIndex] + 1, entry.getValue().size());
}
}
missingBuffersIndex++;
}
// Test the case where some partitions have missing keys, but not all.
List<ReplicaThread.ExchangeMetadataResponse> response = replicaThread.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHost, batchSize), replicasToReplicate.get(remoteHost.dataNodeId));
List<RemoteReplicaInfo> remoteReplicaInfos = replicasToReplicate.get(remoteHost.dataNodeId);
assertEquals("Response should contain a response for each replica", remoteReplicaInfos.size(), response.size());
for (int i = 0; i < response.size(); i++) {
if (i % 2 == 0) {
assertEquals(0, response.get(i).missingStoreMessages.size());
assertEquals(expectedIndex, ((MockFindToken) response.get(i).remoteToken).getIndex());
} else {
assertEquals(1, response.get(i).missingStoreMessages.size());
assertEquals(expectedIndex + 1, ((MockFindToken) response.get(i).remoteToken).getIndex());
}
}
replicaThread.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHost, batchSize), replicasToReplicate.get(remoteHost.dataNodeId), response, false);
for (int i = 0; i < response.size(); i++) {
assertEquals("Token should have been set correctly in fixMissingStoreKeys()", response.get(i).remoteToken, replicasToReplicate.get(remoteHost.dataNodeId).get(i).getToken());
}
// 1 expired + 1 corrupt + 1 put (never present) + 1 deleted (never present) expected missing buffers
verifyNoMoreMissingKeysAndExpectedMissingBufferCount(remoteHost, localHost, replicaThread, replicasToReplicate, idsToBeIgnoredByPartition, storeKeyConverter, expectedIndex, expectedIndex + 1, 4);
}
use of com.github.ambry.clustermap.DataNodeId in project ambry by linkedin.
the class ReplicationTest method onReplicaAddedOrRemovedCallbackTest.
/**
* Test cluster map change callback in {@link ReplicationManager} when any remote replicas are added or removed.
* Test setup: attempt to add 3 replicas and remove 3 replicas respectively. The three replicas are picked as follows:
* (1) 1st replica on current node (should skip)
* (2) 2nd replica on remote node sharing partition with current one (should be added or removed)
* (3) 3rd replica on remote node but doesn't share partition with current one (should skip)
* @throws Exception
*/
@Test
public void onReplicaAddedOrRemovedCallbackTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
StoreConfig storeConfig = new StoreConfig(verifiableProperties);
// pick a node with no special partition as current node
Set<DataNodeId> specialPartitionNodes = clusterMap.getSpecialPartition().getReplicaIds().stream().map(ReplicaId::getDataNodeId).collect(Collectors.toSet());
DataNodeId currentNode = clusterMap.getDataNodes().stream().filter(d -> !specialPartitionNodes.contains(d)).findFirst().get();
MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
storeKeyConverterFactory.setConversionMap(new HashMap<>());
StorageManager storageManager = new StorageManager(storeConfig, new DiskManagerConfig(verifiableProperties), Utils.newScheduler(1, true), new MetricRegistry(), null, clusterMap, currentNode, null, null, new MockTime(), null, new InMemAccountService(false, false));
storageManager.start();
MockReplicationManager replicationManager = new MockReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, clusterMap, currentNode, storeKeyConverterFactory, null);
ClusterMapChangeListener clusterMapChangeListener = clusterMap.getClusterMapChangeListener();
// find the special partition (not on current node) and get an irrelevant replica from it
PartitionId absentPartition = clusterMap.getSpecialPartition();
ReplicaId irrelevantReplica = absentPartition.getReplicaIds().get(0);
// find an existing replica on current node and one of its peer replicas on remote node
ReplicaId existingReplica = clusterMap.getReplicaIds(currentNode).get(0);
ReplicaId peerReplicaToRemove = existingReplica.getPartitionId().getReplicaIds().stream().filter(r -> r != existingReplica).findFirst().get();
// create a new node and place a peer of existing replica on it.
MockDataNodeId remoteNode = createDataNode(getListOfPorts(PLAIN_TEXT_PORT_START_NUMBER + 10, SSL_PORT_START_NUMBER + 10, HTTP2_PORT_START_NUMBER + 10), clusterMap.getDatacenterName((byte) 0), 3);
ReplicaId addedReplica = new MockReplicaId(remoteNode.getPort(), (MockPartitionId) existingReplica.getPartitionId(), remoteNode, 0);
// populate added replica and removed replica lists
List<ReplicaId> replicasToAdd = new ArrayList<>(Arrays.asList(existingReplica, addedReplica, irrelevantReplica));
List<ReplicaId> replicasToRemove = new ArrayList<>(Arrays.asList(existingReplica, peerReplicaToRemove, irrelevantReplica));
PartitionInfo partitionInfo = replicationManager.getPartitionToPartitionInfoMap().get(existingReplica.getPartitionId());
assertNotNull("PartitionInfo is not found", partitionInfo);
RemoteReplicaInfo peerReplicaInfo = partitionInfo.getRemoteReplicaInfos().stream().filter(info -> info.getReplicaId() == peerReplicaToRemove).findFirst().get();
// get the replica-thread for this peer replica
ReplicaThread peerReplicaThread = peerReplicaInfo.getReplicaThread();
// Test Case 1: replication manager encountered exception during startup (remote replica addition/removal will be skipped)
replicationManager.startWithException();
clusterMapChangeListener.onReplicaAddedOrRemoved(replicasToAdd, replicasToRemove);
// verify that PartitionInfo stays unchanged
verifyRemoteReplicaInfo(partitionInfo, addedReplica, false);
verifyRemoteReplicaInfo(partitionInfo, peerReplicaToRemove, true);
// Test Case 2: startup latch is interrupted
CountDownLatch initialLatch = replicationManager.startupLatch;
CountDownLatch mockLatch = Mockito.mock(CountDownLatch.class);
doThrow(new InterruptedException()).when(mockLatch).await();
replicationManager.startupLatch = mockLatch;
try {
clusterMapChangeListener.onReplicaAddedOrRemoved(replicasToAdd, replicasToRemove);
fail("should fail because startup latch is interrupted");
} catch (IllegalStateException e) {
// expected
}
replicationManager.startupLatch = initialLatch;
// Test Case 3: replication manager is successfully started
replicationManager.start();
clusterMapChangeListener.onReplicaAddedOrRemoved(replicasToAdd, replicasToRemove);
// verify that PartitionInfo has latest remote replica infos
verifyRemoteReplicaInfo(partitionInfo, addedReplica, true);
verifyRemoteReplicaInfo(partitionInfo, peerReplicaToRemove, false);
verifyRemoteReplicaInfo(partitionInfo, irrelevantReplica, false);
// verify new added replica is assigned to a certain thread
ReplicaThread replicaThread = replicationManager.getDataNodeIdToReplicaThreadMap().get(addedReplica.getDataNodeId());
assertNotNull("There is no ReplicaThread assocated with new replica", replicaThread);
Optional<RemoteReplicaInfo> findResult = replicaThread.getRemoteReplicaInfos().get(remoteNode).stream().filter(info -> info.getReplicaId() == addedReplica).findAny();
assertTrue("New added remote replica info should exist in corresponding thread", findResult.isPresent());
// verify the removed replica info's thread is null
assertNull("Thread in removed replica info should be null", peerReplicaInfo.getReplicaThread());
findResult = peerReplicaThread.getRemoteReplicaInfos().get(peerReplicaToRemove.getDataNodeId()).stream().filter(info -> info.getReplicaId() == peerReplicaToRemove).findAny();
assertFalse("Previous replica thread should not contain RemoteReplicaInfo that is already removed", findResult.isPresent());
storageManager.shutdown();
}
use of com.github.ambry.clustermap.DataNodeId in project ambry by linkedin.
the class LeaderBasedReplicationTest method replicaThreadLeaderBasedReplicationStandByCrossColoFetchTest.
/**
* Test leader based replication to verify cross colo gets for standby replicas after they have have timed out
* waiting for missing keys.
* @throws Exception
*/
@Test
public void replicaThreadLeaderBasedReplicationStandByCrossColoFetchTest() throws Exception {
Map<DataNodeId, MockHost> hosts = new HashMap<>();
hosts.put(remoteNodeInLocalDC, remoteHostInLocalDC);
hosts.put(remoteNodeInRemoteDC, remoteHostInRemoteDC);
int batchSize = 5;
int numOfMessagesOnRemoteNodeInLocalDC = 3;
int numOfMessagesOnRemoteNodeInRemoteDC = 10;
ConnectionPool mockConnectionPool = new MockConnectionPool(hosts, clusterMap, batchSize);
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant, mockConnectionPool);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
// set mock local stores on all remoteReplicaInfos which will used during replication.
for (PartitionId partitionId : replicationManager.partitionToPartitionInfo.keySet()) {
localHost.addStore(partitionId, null);
Store localStore = localHost.getStore(partitionId);
localStore.start();
List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(partitionId).getRemoteReplicaInfos();
remoteReplicaInfos.forEach(remoteReplicaInfo -> remoteReplicaInfo.setLocalStore(localStore));
}
// get remote replicas and replica thread for remote host on local datacenter
ReplicaThread intraColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInLocalDC);
List<RemoteReplicaInfo> remoteReplicaInfosForLocalDC = intraColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInLocalDC);
// get remote replicas and replica thread for remote host on remote datacenter
ReplicaThread crossColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInRemoteDC);
List<RemoteReplicaInfo> remoteReplicaInfosForRemoteDC = crossColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInRemoteDC);
// mock helix transition state from standby to leader for local leader partitions
List<? extends ReplicaId> replicaIds = clusterMap.getReplicaIds(replicationManager.dataNodeId);
for (ReplicaId replicaId : replicaIds) {
MockReplicaId mockReplicaId = (MockReplicaId) replicaId;
if (mockReplicaId.getReplicaState() == ReplicaState.LEADER) {
MockPartitionId mockPartitionId = (MockPartitionId) replicaId.getPartitionId();
mockHelixParticipant.onPartitionBecomeLeaderFromStandby(mockPartitionId.toPathString());
}
}
// Add put messages to all partitions on remoteHost1 and remoteHost2
List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
for (PartitionId partitionId : partitionIds) {
// add 3 put messages to the remoteNodeInLocalDC and remoteNodeInRemoteDC from which local host will replicate.
addPutMessagesToReplicasOfPartition(partitionId, Arrays.asList(remoteHostInLocalDC, remoteHostInRemoteDC), numOfMessagesOnRemoteNodeInLocalDC);
// add 1 put message to the remoteNodeInRemoteDC only. Since this message is not present in remoteNodeInLocalDC, it
// doesn't come to local node via intra-dc replication. We should see time out for remote standby replicas waiting for this
// message and see a cross colo fetch happening.
addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHostInRemoteDC), numOfMessagesOnRemoteNodeInRemoteDC - numOfMessagesOnRemoteNodeInLocalDC);
}
// Choose partitions that are leaders on both local and remote nodes
Set<ReplicaId> leaderReplicasOnLocalAndRemoteNodes = getRemoteLeaderReplicasWithLeaderPartitionsOnLocalNode(clusterMap, replicationManager.dataNodeId, remoteNodeInRemoteDC);
// replicate with remote node in remote DC
crossColoReplicaThread.replicate();
// missing messages are not fetched yet.
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
if (leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
assertEquals("remote token mismatch for leader replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), batchSize - 1);
} else {
assertEquals("remote token should not move forward for standby replicas until missing keys are fetched", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), 0);
}
}
// Replicate with remote node in local dc
intraColoReplicaThread.replicate();
// verify that remote token will be moved for all replicas as it is intra-dc replication
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForLocalDC) {
assertEquals("mismatch in remote token set for intra colo replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), numOfMessagesOnRemoteNodeInLocalDC - 1);
}
// via intra-dc replication
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
crossColoReplicaThread.processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
}
// verify that the remote token will remain 0 for standby replicas as one message in its missing set is not fetched yet.
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
if (!leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
assertTrue("missing store messages should still exist for standby replicas", crossColoReplicaThread.containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo));
assertEquals("remote token should not move forward for standby replicas until missing keys are fetched", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), 0);
assertEquals("incorrect number of missing store messages found for standby replicas", remoteReplicaInfo.getExchangeMetadataResponse().missingStoreMessages.size(), batchSize - numOfMessagesOnRemoteNodeInLocalDC);
}
}
// Attempt replication with remoteNodeInRemoteDC, we should not see any replication attempt for standby replicas
// and their remote token stays as 0.
crossColoReplicaThread.replicate();
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
if (!leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
assertEquals("remote token should not move forward for standby replicas until missing keys are fetched", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), 0);
assertTrue("missing store messages should still exist for standby replicas", crossColoReplicaThread.containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo));
}
}
// Move time forward by replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds+1 seconds and attempt replication.
// We should see cross colo fetch for standby replicas now since missing keys haven't arrived for
// replicationConfig.replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds.
time.sleep((replicationConfig.replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds + 1) * 1000);
// verify that we get the list of standby replicas that timed out on no progress
Set<RemoteReplicaInfo> allStandbyReplicas = remoteReplicaInfosForRemoteDC.stream().filter(info -> !leaderReplicasOnLocalAndRemoteNodes.contains(info.getReplicaId())).collect(Collectors.toSet());
assertEquals("mismatch in list of standby replicas timed out on no progress", new HashSet<>(crossColoReplicaThread.getRemoteStandbyReplicasTimedOutOnNoProgress(remoteReplicaInfosForRemoteDC)), allStandbyReplicas);
crossColoReplicaThread.replicate();
// token index for all standby replicas will move forward after fetching missing keys themselves
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
if (!leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
assertEquals("mismatch in remote token set for standby cross colo replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), batchSize - 1);
assertFalse("missing store messages should be empty for standby replicas now", crossColoReplicaThread.containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo));
}
}
// verify replication metrics to track number of cross colo get requests for standby replicas. If all replicas are
// leaders, we should have 0 cross colo get requests.
String remoteDataCenter = remoteReplicaInfosForRemoteDC.get(0).getReplicaId().getDataNodeId().getDatacenterName();
assertEquals("mismatch in number of cross colo get requests tracked for standby replicas", crossColoReplicaThread.getReplicationMetrics().interColoReplicationGetRequestCountForStandbyReplicas.get(remoteDataCenter).getCount(), leaderReplicasOnLocalAndRemoteNodes.size() != remoteReplicaInfosForRemoteDC.size() ? 1 : 0);
storageManager.shutdown();
}
use of com.github.ambry.clustermap.DataNodeId in project ambry by linkedin.
the class MockConnectionPool method checkOutConnection.
@Override
public ConnectedChannel checkOutConnection(String host, Port port, long timeout) {
DataNodeId dataNodeId = clusterMap.getDataNodeId(host, port.getPort());
MockHost hostObj = hosts.get(dataNodeId);
return new MockConnection(hostObj, maxEntriesToReturn);
}
Aggregations