use of com.github.ambry.clustermap.ClusterMap in project ambry by linkedin.
the class ReplicationTest method dcLevelReplicationLagMetricsTest.
/**
* Test that metrics that track remote replicas lag behind local replicas in each dc.
* @throws Exception
*/
@Test
public void dcLevelReplicationLagMetricsTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, null);
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
Set<String> remoteDcNames = new HashSet<>(Arrays.asList("DC1", "DC2", "DC3"));
String localDcName = clusterMap.getDataNodeIds().get(0).getDatacenterName();
remoteDcNames.remove(localDcName);
// before updating replication lag, make sure avg lag in each dc is 0
MetricRegistry metricRegistry = replicationManager.getMetricRegistry();
String prefix = ReplicaThread.class.getName() + ".";
String avgMetricSuffix = "-avgReplicaLagFromLocalInBytes";
assertEquals("Average replication lag in local dc is not expected", 18.0, metricRegistry.getGauges().get(prefix + localDcName + avgMetricSuffix).getValue());
for (String remoteDc : remoteDcNames) {
assertEquals("Average replication lag in remote dc is not expected", 18.0, metricRegistry.getGauges().get(prefix + remoteDc + avgMetricSuffix).getValue());
}
// iterate over all partitions on current node and make sure all their peer replicas in local dc have fully caught up
for (Map.Entry<PartitionId, PartitionInfo> entry : replicationManager.partitionToPartitionInfo.entrySet()) {
PartitionId localPartition = entry.getKey();
PartitionInfo partitionInfo = entry.getValue();
List<RemoteReplicaInfo> remoteReplicaInfos = partitionInfo.getRemoteReplicaInfos().stream().filter(info -> info.getReplicaId().getDataNodeId().getDatacenterName().equals(localDcName)).collect(Collectors.toList());
for (RemoteReplicaInfo remoteReplicaInfoInLocalDc : remoteReplicaInfos) {
ReplicaId peerReplicaInLocalDc = remoteReplicaInfoInLocalDc.getReplicaId();
replicationManager.updateTotalBytesReadByRemoteReplica(localPartition, peerReplicaInLocalDc.getDataNodeId().getHostname(), peerReplicaInLocalDc.getReplicaPath(), 18);
}
}
// verify that after updating replication lag for all peer replicas in local dc, the avg lag in local dc has updated
assertEquals("Average replication lag in local dc is not expected", 0.0, metricRegistry.getGauges().get(prefix + localDcName + avgMetricSuffix).getValue());
// for remote dc, the avg lag is still 18.0
for (String remoteDc : remoteDcNames) {
assertEquals("Average replication lag in remote dc is not expected", 18.0, metricRegistry.getGauges().get(prefix + remoteDc + avgMetricSuffix).getValue());
}
}
use of com.github.ambry.clustermap.ClusterMap in project ambry by linkedin.
the class ReplicationTest method replicaThreadLifeVersionLocalLessThanRemote_MissingPuts.
/**
* Tests when the local store missing put records with lifeVersion greater than 0
*/
@Test
public void replicaThreadLifeVersionLocalLessThanRemote_MissingPuts() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
MockHost localHost = localAndRemoteHosts.getFirst();
MockHost remoteHost = localAndRemoteHosts.getSecond();
MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
storeKeyConverterFactory.setConversionMap(new HashMap<>());
storeKeyConverterFactory.setReturnInputIfAbsent(true);
MockStoreKeyConverterFactory.MockStoreKeyConverter storeKeyConverter = storeKeyConverterFactory.getStoreKeyConverter();
Map<StoreKey, StoreKey> conversionMap = new HashMap<>();
storeKeyConverter.setConversionMap(conversionMap);
StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
Transformer transformer = new BlobIdTransformer(storeKeyFactory, storeKeyConverter);
List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
Map<PartitionId, List<StoreKey>> idsToBeIgnoredByPartition = new HashMap<>();
Map<PartitionId, List<StoreKey>> idsToBeTtlUpdatedByPartition = new HashMap<>();
short lifeVersion = 1;
for (int i = 0; i < partitionIds.size(); i++) {
List<StoreKey> toBeIgnored = new ArrayList<>();
List<StoreKey> toBeTtlUpdated = new ArrayList<>();
PartitionId partitionId = partitionIds.get(i);
// Adding 1 put to remoteHost at lifeVersion 0
List<StoreKey> ids = addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), 1);
// Adding 1 put to remoteHost at lifeVersion 1
ids.addAll(addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), lifeVersion, 1));
// Adding one put to remoteHost at lifeVersion 1, which would be ttl updated later at lifeVersion 1
StoreKey toTtlUpdateId = addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), lifeVersion, 1).get(0);
ids.add(toTtlUpdateId);
addTtlUpdateMessagesToReplicasOfPartition(partitionId, toTtlUpdateId, Collections.singletonList(remoteHost), UPDATED_EXPIRY_TIME_MS, lifeVersion);
toBeTtlUpdated.add(toTtlUpdateId);
// Adding one put to remoteHost at lifeVersion 0, which would be ttl updated later at lifeVersion 1
toTtlUpdateId = addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), 1).get(0);
ids.add(toTtlUpdateId);
addTtlUpdateMessagesToReplicasOfPartition(partitionId, toTtlUpdateId, Collections.singletonList(remoteHost), UPDATED_EXPIRY_TIME_MS, lifeVersion);
toBeTtlUpdated.add(toTtlUpdateId);
// Adding one put to remoteHost, which would be deleted later
StoreKey toDeleteId = addPutMessagesToReplicasOfPartition(partitionId, Arrays.asList(remoteHost), lifeVersion, 1).get(0);
ids.add(toDeleteId);
addDeleteMessagesToReplicasOfPartition(partitionId, toDeleteId, Collections.singletonList(remoteHost), lifeVersion, EXPIRY_TIME_MS);
toBeIgnored.add(toDeleteId);
// Adding one put to remoteHost, which would be ttl updated and deleted later
StoreKey toDeleteAndTtlUpdateId = addPutMessagesToReplicasOfPartition(partitionId, Arrays.asList(remoteHost), lifeVersion, 1).get(0);
ids.add(toDeleteAndTtlUpdateId);
addTtlUpdateMessagesToReplicasOfPartition(partitionId, toDeleteAndTtlUpdateId, Collections.singletonList(remoteHost), UPDATED_EXPIRY_TIME_MS, lifeVersion);
toBeTtlUpdated.add(toDeleteAndTtlUpdateId);
addDeleteMessagesToReplicasOfPartition(partitionId, toDeleteAndTtlUpdateId, Collections.singletonList(remoteHost), lifeVersion, UPDATED_EXPIRY_TIME_MS);
toBeIgnored.add(toDeleteAndTtlUpdateId);
// Adding one put to remoteHost at lifeVersion 0, delete it and then add undelete at lifeVersion 1
StoreKey deleteAndUndeleteId = addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), 1).get(0);
ids.add(deleteAndUndeleteId);
addDeleteMessagesToReplicasOfPartition(partitionId, deleteAndUndeleteId, Collections.singletonList(remoteHost), (short) 0, EXPIRY_TIME_MS);
addUndeleteMessagesToReplicasOfPartition(partitionId, deleteAndUndeleteId, Collections.singletonList(remoteHost), lifeVersion);
idsToBeIgnoredByPartition.put(partitionId, toBeIgnored);
idsToBeTtlUpdatedByPartition.put(partitionId, toBeTtlUpdated);
// Adding one put to both remote and local host.
ids.addAll(addPutMessagesToReplicasOfPartition(partitionId, Arrays.asList(localHost, remoteHost), lifeVersion, 1));
}
int batchSize = 100;
Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost, storeKeyConverter, transformer, null, null);
List<RemoteReplicaInfo> remoteReplicaInfos = replicasAndThread.getFirst().get(remoteHost.dataNodeId);
ReplicaThread replicaThread = replicasAndThread.getSecond();
int missingKeyCount = 5;
List<ReplicaThread.ExchangeMetadataResponse> response = replicaThread.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHost, batchSize), remoteReplicaInfos);
assertEquals("Response should contain a response for each replica", remoteReplicaInfos.size(), response.size());
for (int i = 0; i < response.size(); i++) {
assertEquals(missingKeyCount, response.get(i).missingStoreMessages.size());
remoteReplicaInfos.get(i).setToken(response.get(i).remoteToken);
}
replicaThread.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHost, batchSize), remoteReplicaInfos, response, false);
for (int i = 0; i < response.size(); i++) {
assertEquals("Token should have been set correctly in fixMissingStoreKeys()", response.get(i).remoteToken, remoteReplicaInfos.get(i).getToken());
}
// Don't compare buffers here, PutBuffer might be different since we might change the lifeVersion.
for (Map.Entry<PartitionId, List<MessageInfo>> localInfoEntry : localHost.infosByPartition.entrySet()) {
assertEquals("MessageInfo number mismatch", 8, localInfoEntry.getValue().size());
}
checkBlobMessagesAreEqualInLocalAndRemoteHosts(localHost, remoteHost, idsToBeIgnoredByPartition, idsToBeTtlUpdatedByPartition);
}
use of com.github.ambry.clustermap.ClusterMap in project ambry by linkedin.
the class ReplicationTest method replicationAllPauseTest.
/**
* Tests pausing all partitions and makes sure that the replica thread pauses. Also tests that it resumes when one
* eligible partition is re-enabled and that replication completes successfully.
* @throws Exception
*/
@Test
public void replicationAllPauseTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
MockHost localHost = localAndRemoteHosts.getFirst();
MockHost remoteHost = localAndRemoteHosts.getSecond();
List<PartitionId> partitionIds = clusterMap.getAllPartitionIds(null);
for (PartitionId partitionId : partitionIds) {
// add 10 messages into each partition and place it on remote host only
addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), 10);
}
StoreKeyFactory storeKeyFactory = Utils.getObj("com.github.ambry.commons.BlobIdFactory", clusterMap);
MockStoreKeyConverterFactory mockStoreKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
mockStoreKeyConverterFactory.setReturnInputIfAbsent(true);
mockStoreKeyConverterFactory.setConversionMap(new HashMap<>());
int batchSize = 4;
StoreKeyConverter storeKeyConverter = mockStoreKeyConverterFactory.getStoreKeyConverter();
Transformer transformer = new ValidatingTransformer(storeKeyFactory, storeKeyConverter);
CountDownLatch readyToPause = new CountDownLatch(1);
CountDownLatch readyToProceed = new CountDownLatch(1);
AtomicReference<CountDownLatch> reachedLimitLatch = new AtomicReference<>(new CountDownLatch(1));
AtomicReference<Exception> exception = new AtomicReference<>();
Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost, storeKeyConverter, transformer, (store, messageInfos) -> {
try {
readyToPause.countDown();
readyToProceed.await();
if (store.messageInfos.size() == remoteHost.infosByPartition.get(store.id).size()) {
reachedLimitLatch.get().countDown();
}
} catch (Exception e) {
exception.set(e);
}
}, null);
ReplicaThread replicaThread = replicasAndThread.getSecond();
Thread thread = Utils.newThread(replicaThread, false);
thread.start();
assertEquals("There should be no disabled partitions", 0, replicaThread.getReplicationDisabledPartitions().size());
// wait to pause replication
readyToPause.await(10, TimeUnit.SECONDS);
replicaThread.controlReplicationForPartitions(clusterMap.getAllPartitionIds(null), false);
Set<PartitionId> expectedPaused = new HashSet<>(clusterMap.getAllPartitionIds(null));
assertEquals("Disabled partitions sets do not match", expectedPaused, replicaThread.getReplicationDisabledPartitions());
// signal the replica thread to move forward
readyToProceed.countDown();
// wait for the thread to go into waiting state
assertTrue("Replica thread did not go into waiting state", TestUtils.waitUntilExpectedState(thread, Thread.State.WAITING, 10000));
// unpause one partition
replicaThread.controlReplicationForPartitions(Collections.singletonList(partitionIds.get(0)), true);
expectedPaused.remove(partitionIds.get(0));
assertEquals("Disabled partitions sets do not match", expectedPaused, replicaThread.getReplicationDisabledPartitions());
// wait for it to catch up
reachedLimitLatch.get().await(10, TimeUnit.SECONDS);
// reset limit
reachedLimitLatch.set(new CountDownLatch(partitionIds.size() - 1));
// unpause all partitions
replicaThread.controlReplicationForPartitions(clusterMap.getAllPartitionIds(null), true);
assertEquals("There should be no disabled partitions", 0, replicaThread.getReplicationDisabledPartitions().size());
// wait until all catch up
reachedLimitLatch.get().await(10, TimeUnit.SECONDS);
// shutdown
replicaThread.shutdown();
if (exception.get() != null) {
throw exception.get();
}
Map<PartitionId, List<MessageInfo>> missingInfos = remoteHost.getMissingInfos(localHost.infosByPartition);
for (Map.Entry<PartitionId, List<MessageInfo>> entry : missingInfos.entrySet()) {
assertEquals("No infos should be missing", 0, entry.getValue().size());
}
Map<PartitionId, List<ByteBuffer>> missingBuffers = remoteHost.getMissingBuffers(localHost.buffersByPartition);
for (Map.Entry<PartitionId, List<ByteBuffer>> entry : missingBuffers.entrySet()) {
assertEquals("No buffers should be missing", 0, entry.getValue().size());
}
}
use of com.github.ambry.clustermap.ClusterMap in project ambry by linkedin.
the class ReplicationTest method replicaThreadTest.
/**
* Tests {@link ReplicaThread#exchangeMetadata(ConnectedChannel, List)} and
* {@link ReplicaThread#fixMissingStoreKeys(ConnectedChannel, List, List, boolean)} for valid puts, deletes, expired keys and
* corrupt blobs.
* @throws Exception
*/
@Test
public void replicaThreadTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
MockHost localHost = localAndRemoteHosts.getFirst();
MockHost remoteHost = localAndRemoteHosts.getSecond();
MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
storeKeyConverterFactory.setConversionMap(new HashMap<>());
storeKeyConverterFactory.setReturnInputIfAbsent(true);
MockStoreKeyConverterFactory.MockStoreKeyConverter storeKeyConverter = storeKeyConverterFactory.getStoreKeyConverter();
short blobIdVersion = CommonTestUtils.getCurrentBlobIdVersion();
List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
Map<PartitionId, List<StoreKey>> idsToBeIgnoredByPartition = new HashMap<>();
for (int i = 0; i < partitionIds.size(); i++) {
List<StoreKey> idsToBeIgnored = new ArrayList<>();
PartitionId partitionId = partitionIds.get(i);
// add 6 messages to both hosts.
StoreKey toDeleteId = addPutMessagesToReplicasOfPartition(partitionId, Arrays.asList(localHost, remoteHost), 6).get(0);
short accountId = Utils.getRandomShort(TestUtils.RANDOM);
short containerId = Utils.getRandomShort(TestUtils.RANDOM);
boolean toEncrypt = TestUtils.RANDOM.nextBoolean();
// add an expired message to the remote host only
StoreKey id = new BlobId(blobIdVersion, BlobId.BlobIdType.NATIVE, ClusterMap.UNKNOWN_DATACENTER_ID, accountId, containerId, partitionId, toEncrypt, BlobId.BlobDataType.DATACHUNK);
PutMsgInfoAndBuffer msgInfoAndBuffer = createPutMessage(id, accountId, containerId, toEncrypt);
remoteHost.addMessage(partitionId, new MessageInfo(id, msgInfoAndBuffer.byteBuffer.remaining(), 1, accountId, containerId, msgInfoAndBuffer.messageInfo.getOperationTimeMs()), msgInfoAndBuffer.byteBuffer);
idsToBeIgnored.add(id);
// add 3 messages to the remote host only
addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), 3);
accountId = Utils.getRandomShort(TestUtils.RANDOM);
containerId = Utils.getRandomShort(TestUtils.RANDOM);
toEncrypt = TestUtils.RANDOM.nextBoolean();
// add a corrupt message to the remote host only
id = new BlobId(blobIdVersion, BlobId.BlobIdType.NATIVE, ClusterMap.UNKNOWN_DATACENTER_ID, accountId, containerId, partitionId, toEncrypt, BlobId.BlobDataType.DATACHUNK);
msgInfoAndBuffer = createPutMessage(id, accountId, containerId, toEncrypt);
byte[] data = msgInfoAndBuffer.byteBuffer.array();
// flip every bit in the array
for (int j = 0; j < data.length; j++) {
data[j] ^= 0xFF;
}
remoteHost.addMessage(partitionId, msgInfoAndBuffer.messageInfo, msgInfoAndBuffer.byteBuffer);
idsToBeIgnored.add(id);
// add 3 messages to the remote host only
addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), 3);
// add delete record for the very first blob in the remote host only
addDeleteMessagesToReplicasOfPartition(partitionId, toDeleteId, Collections.singletonList(remoteHost));
// PUT and DELETE a blob in the remote host only
id = addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), 1).get(0);
addDeleteMessagesToReplicasOfPartition(partitionId, id, Collections.singletonList(remoteHost));
idsToBeIgnored.add(id);
// add 2 or 3 messages (depending on whether partition is even-numbered or odd-numbered) to the remote host only
addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), i % 2 == 0 ? 2 : 3);
idsToBeIgnoredByPartition.put(partitionId, idsToBeIgnored);
// ensure that the first key is not deleted in the local host
assertNull(toDeleteId + " should not be deleted in the local host", getMessageInfo(toDeleteId, localHost.infosByPartition.get(partitionId), true, false, false));
}
StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
Transformer transformer = new BlobIdTransformer(storeKeyFactory, storeKeyConverter);
int batchSize = 4;
Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost, storeKeyConverter, transformer, null, null);
Map<DataNodeId, List<RemoteReplicaInfo>> replicasToReplicate = replicasAndThread.getFirst();
ReplicaThread replicaThread = replicasAndThread.getSecond();
Map<PartitionId, List<ByteBuffer>> missingBuffers = remoteHost.getMissingBuffers(localHost.buffersByPartition);
for (Map.Entry<PartitionId, List<ByteBuffer>> entry : missingBuffers.entrySet()) {
if (partitionIds.indexOf(entry.getKey()) % 2 == 0) {
assertEquals("Missing buffers count mismatch", 13, entry.getValue().size());
} else {
assertEquals("Missing buffers count mismatch", 14, entry.getValue().size());
}
}
// 1st and 2nd iterations - no keys missing because all data is in both hosts
// 3rd iteration - 3 missing keys (one expired)
// 4th iteration - 3 missing keys (one expired) - the corrupt key also shows up as missing but is ignored later
// 5th iteration - 1 missing key (1 key from prev cycle, 1 deleted key, 1 never present key but deleted in remote)
// 6th iteration - 2 missing keys (2 entries i.e put,delete of never present key)
int[] missingKeysCounts = { 0, 0, 3, 3, 1, 2 };
int[] missingBuffersCount = { 12, 12, 9, 7, 6, 4 };
int expectedIndex = 0;
int missingBuffersIndex = 0;
for (int missingKeysCount : missingKeysCounts) {
expectedIndex = assertMissingKeysAndFixMissingStoreKeys(expectedIndex, batchSize - 1, batchSize, missingKeysCount, replicaThread, remoteHost, replicasToReplicate);
missingBuffers = remoteHost.getMissingBuffers(localHost.buffersByPartition);
for (Map.Entry<PartitionId, List<ByteBuffer>> entry : missingBuffers.entrySet()) {
if (partitionIds.indexOf(entry.getKey()) % 2 == 0) {
assertEquals("Missing buffers count mismatch for iteration count " + missingBuffersIndex, missingBuffersCount[missingBuffersIndex], entry.getValue().size());
} else {
assertEquals("Missing buffers count mismatch for iteration count " + missingBuffersIndex, missingBuffersCount[missingBuffersIndex] + 1, entry.getValue().size());
}
}
missingBuffersIndex++;
}
// Test the case where some partitions have missing keys, but not all.
List<ReplicaThread.ExchangeMetadataResponse> response = replicaThread.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHost, batchSize), replicasToReplicate.get(remoteHost.dataNodeId));
List<RemoteReplicaInfo> remoteReplicaInfos = replicasToReplicate.get(remoteHost.dataNodeId);
assertEquals("Response should contain a response for each replica", remoteReplicaInfos.size(), response.size());
for (int i = 0; i < response.size(); i++) {
if (i % 2 == 0) {
assertEquals(0, response.get(i).missingStoreMessages.size());
assertEquals(expectedIndex, ((MockFindToken) response.get(i).remoteToken).getIndex());
} else {
assertEquals(1, response.get(i).missingStoreMessages.size());
assertEquals(expectedIndex + 1, ((MockFindToken) response.get(i).remoteToken).getIndex());
}
}
replicaThread.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHost, batchSize), replicasToReplicate.get(remoteHost.dataNodeId), response, false);
for (int i = 0; i < response.size(); i++) {
assertEquals("Token should have been set correctly in fixMissingStoreKeys()", response.get(i).remoteToken, replicasToReplicate.get(remoteHost.dataNodeId).get(i).getToken());
}
// 1 expired + 1 corrupt + 1 put (never present) + 1 deleted (never present) expected missing buffers
verifyNoMoreMissingKeysAndExpectedMissingBufferCount(remoteHost, localHost, replicaThread, replicasToReplicate, idsToBeIgnoredByPartition, storeKeyConverter, expectedIndex, expectedIndex + 1, 4);
}
use of com.github.ambry.clustermap.ClusterMap in project ambry by linkedin.
the class ReplicationTest method ttlUpdateReplicationTest.
/**
* Tests replication of TTL updates
* @throws Exception
*/
@Test
public void ttlUpdateReplicationTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
MockHost localHost = localAndRemoteHosts.getFirst();
MockHost remoteHost = localAndRemoteHosts.getSecond();
MockHost expectedLocalHost = new MockHost(localHost.dataNodeId, clusterMap);
MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
storeKeyConverterFactory.setConversionMap(new HashMap<>());
storeKeyConverterFactory.setReturnInputIfAbsent(true);
MockStoreKeyConverterFactory.MockStoreKeyConverter storeKeyConverter = storeKeyConverterFactory.getStoreKeyConverter();
Map<StoreKey, StoreKey> conversionMap = new HashMap<>();
storeKeyConverter.setConversionMap(conversionMap);
StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
Transformer transformer = new BlobIdTransformer(storeKeyFactory, storeKeyConverter);
List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
int numMessagesInEachPart = 0;
Map<PartitionId, StoreKey> idsDeletedLocallyByPartition = new HashMap<>();
List<MockHost> remoteHostOnly = Collections.singletonList(remoteHost);
List<MockHost> expectedLocalHostOnly = Collections.singletonList(expectedLocalHost);
List<MockHost> localHostAndExpectedLocalHost = Arrays.asList(localHost, expectedLocalHost);
List<MockHost> remoteHostAndExpectedLocalHost = Arrays.asList(remoteHost, expectedLocalHost);
List<MockHost> allHosts = Arrays.asList(localHost, expectedLocalHost, remoteHost);
for (PartitionId pid : partitionIds) {
// add 3 put messages to both hosts (also add to expectedLocal)
List<StoreKey> ids = addPutMessagesToReplicasOfPartition(pid, allHosts, 3);
// delete 1 of the messages in the local host only
addDeleteMessagesToReplicasOfPartition(pid, ids.get(0), localHostAndExpectedLocalHost);
idsDeletedLocallyByPartition.put(pid, ids.get(0));
// ttl update 1 of the messages in the local host only
addTtlUpdateMessagesToReplicasOfPartition(pid, ids.get(1), localHostAndExpectedLocalHost, UPDATED_EXPIRY_TIME_MS);
// remote host only
// add 2 put messages
ids.addAll(addPutMessagesToReplicasOfPartition(pid, remoteHostOnly, 1));
ids.addAll(addPutMessagesToReplicasOfPartition(pid, remoteHostAndExpectedLocalHost, 1));
// ttl update all 5 put messages
for (int i = ids.size() - 1; i >= 0; i--) {
List<MockHost> hostList = remoteHostOnly;
if (i == 2 || i == 4) {
hostList = remoteHostAndExpectedLocalHost;
}
// doing it in reverse order so that a put and ttl update arrive in the same batch
addTtlUpdateMessagesToReplicasOfPartition(pid, ids.get(i), hostList, UPDATED_EXPIRY_TIME_MS);
}
// delete one of the keys that has put and ttl update on local host
addDeleteMessagesToReplicasOfPartition(pid, ids.get(1), remoteHostAndExpectedLocalHost);
// delete one of the keys that has put and ttl update on remote only
addDeleteMessagesToReplicasOfPartition(pid, ids.get(3), remoteHostOnly);
// add a TTL update and delete message without a put msg (compaction can create such a situation)
BlobId id = generateRandomBlobId(pid);
addTtlUpdateMessagesToReplicasOfPartition(pid, id, remoteHostOnly, UPDATED_EXPIRY_TIME_MS);
addDeleteMessagesToReplicasOfPartition(pid, id, remoteHostOnly);
// message transformation test cases
// a blob ID with PUT and TTL update in both remote and local
BlobId b0 = generateRandomBlobId(pid);
BlobId b0p = generateRandomBlobId(pid);
// a blob ID with a PUT in the local and PUT and TTL update in remote (with mapping)
BlobId b1 = generateRandomBlobId(pid);
BlobId b1p = generateRandomBlobId(pid);
// a blob ID with PUT and TTL update in remote only (with mapping)
BlobId b2 = generateRandomBlobId(pid);
BlobId b2p = generateRandomBlobId(pid);
// a blob ID with PUT and TTL update in remote (no mapping)
BlobId b3 = generateRandomBlobId(pid);
conversionMap.put(b0, b0p);
conversionMap.put(b1, b1p);
conversionMap.put(b2, b2p);
conversionMap.put(b3, null);
storeKeyConverter.convert(conversionMap.keySet());
// add as required on local, remote and expected local
// only PUT of b0p and b1p on local
addPutMessagesToReplicasOfPartition(Arrays.asList(b0p, b1p), localHostAndExpectedLocalHost);
// PUT of b0,b1,b2,b3 on remote
addPutMessagesToReplicasOfPartition(Arrays.asList(b0, b1, b2, b3), remoteHostOnly);
// PUT of b0, b1, b2 expected in local at the end
addPutMessagesToReplicasOfPartition(Collections.singletonList(b2), Collections.singletonList(transformer), expectedLocalHostOnly);
// TTL update of b0 on all hosts
addTtlUpdateMessagesToReplicasOfPartition(pid, b0p, localHostAndExpectedLocalHost, UPDATED_EXPIRY_TIME_MS);
addTtlUpdateMessagesToReplicasOfPartition(pid, b0, remoteHostOnly, UPDATED_EXPIRY_TIME_MS);
// TTL update on b1, b2 and b3 on remote
addTtlUpdateMessagesToReplicasOfPartition(pid, b1, remoteHostOnly, UPDATED_EXPIRY_TIME_MS);
addTtlUpdateMessagesToReplicasOfPartition(pid, b1p, expectedLocalHostOnly, UPDATED_EXPIRY_TIME_MS);
addTtlUpdateMessagesToReplicasOfPartition(pid, b2, remoteHostOnly, UPDATED_EXPIRY_TIME_MS);
addTtlUpdateMessagesToReplicasOfPartition(pid, b2p, expectedLocalHostOnly, UPDATED_EXPIRY_TIME_MS);
addTtlUpdateMessagesToReplicasOfPartition(pid, b3, remoteHostOnly, UPDATED_EXPIRY_TIME_MS);
numMessagesInEachPart = remoteHost.infosByPartition.get(pid).size();
}
// After the for loop above, we have records in hosts just like below
// L|id0|id1|id2|id0D|id1T| | | | | | | | | | | |b0p|b1p| | |b0pT| | | |
// R|id0|id1|id2| | |id3|id4|id4T|id3T|id2T|id1T|id0T|id1D|id3D|idT|idD|b0 |b1 |b2 |b3|b0T |b1T |b2T | b3T|
// E|id0|id1|id2|id0D|id1T| |id4|id4T| |id2T| | |id1D| | | |b0p|b1p|b2p| |b0pT|b1pT|b2pT| |
//
// converter map: b0->b0p, b1->b1p, b2->b2p, b3->null
int batchSize = 4;
Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost, storeKeyConverter, transformer, null, null);
List<RemoteReplicaInfo> remoteReplicaInfos = replicasAndThread.getFirst().get(remoteHost.dataNodeId);
ReplicaThread replicaThread = replicasAndThread.getSecond();
Map<PartitionId, List<ByteBuffer>> missingBuffers = expectedLocalHost.getMissingBuffers(localHost.buffersByPartition);
// We can see from the table in the comments above, Local has 7 records less than expected local.
for (Map.Entry<PartitionId, List<ByteBuffer>> entry : missingBuffers.entrySet()) {
assertEquals("Missing buffers count mismatch", 7, entry.getValue().size());
}
// 1st iteration - 0 missing keys (3 puts already present, one put missing but del in remote, 1 ttl update will be
// applied, 1 delete will be applied): Remote returns: id0T, id1TD, id2T, id3TD. id3 put missing, but it's deleted.
// id1 apply delete, id2 apply ttl update. Token index is pointing to id3.
// 2nd iteration - 1 missing key, 1 of which will also be ttl updated (one key with put + ttl update missing but
// del in remote, one put and ttl update replicated): Remote returns: id3TD, id4T. id4 put missing, id3 deleted.
// Token index is pointing to id3T.
// 3rd iteration - 0 missing keys (1 ttl update missing but del in remote, 1 already ttl updated in iter 1, 1 key
// already ttl updated in local, 1 key del local): Remote returns: id3TD, id2T, id1TD, id0T. Token index is pointing
// to id0T.
// 4th iteration - 0 missing keys (1 key del local, 1 key already deleted, 1 key missing but del in remote, 1 key
// with ttl update missing but del remote): Remote returns: id0T, id1D, id3TD, idTD. Token index is pointing to idT.
// 5th iteration - 0 missing keys (1 key - two records - missing but del remote, 2 puts already present but TTL
// update of one of them is applied): Remote returns: idTD, b0T, b1T. b1 apply ttl update. Token index is pointing to
// b1.
// 6th iteration - 1 missing key (put + ttl update for a key, 1 deprecated id ignored, 1 TTL update already applied):
// Remote returns: b1T, b2T, b3T, b0T. b2 missing, and ttl updated. b3 has no local key.
// 7th iteration - 0 missing keys (2 TTL updates already applied, 1 TTL update of a deprecated ID ignored)
// |1st iter |2nd iter|3rd iter|4th iter|5th iter|6th iter|7th iter|
// L|id0|id1|id2|id0D|id1T| | | | | | | | | | | |b0p|b1p| | |b0pT| | | |id1D|id2T|id4|id4T| | |b1pT |b2p|b2pT|
// R|id0|id1|id2| | |id3|id4|id4T|id3T|id2T|id1T|id0T|id1D|id3D|idT|idD|b0 |b1 |b2 |b3|b0T |b1T |b2T | b3T|
// E|id0|id1|id2|id0D|id1T| |id4|id4T| |id2T| | |id1D| | | |b0p|b1p|b2p| |b0pT|b1pT|b2pT| |
int[] missingKeysCounts = { 0, 1, 0, 0, 0, 1, 0 };
int[] missingBuffersCount = { 5, 3, 3, 3, 2, 0, 0 };
int expectedIndex = 0;
int missingBuffersIndex = 0;
for (int missingKeysCount : missingKeysCounts) {
expectedIndex = Math.min(expectedIndex + batchSize, numMessagesInEachPart) - 1;
List<ReplicaThread.ExchangeMetadataResponse> response = replicaThread.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHost, batchSize), remoteReplicaInfos);
assertEquals("Response should contain a response for each replica", remoteReplicaInfos.size(), response.size());
for (int i = 0; i < response.size(); i++) {
assertEquals(missingKeysCount, response.get(i).missingStoreMessages.size());
assertEquals(expectedIndex, ((MockFindToken) response.get(i).remoteToken).getIndex());
remoteReplicaInfos.get(i).setToken(response.get(i).remoteToken);
}
replicaThread.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHost, batchSize), remoteReplicaInfos, response, false);
for (int i = 0; i < response.size(); i++) {
assertEquals("Token should have been set correctly in fixMissingStoreKeys()", response.get(i).remoteToken, remoteReplicaInfos.get(i).getToken());
}
missingBuffers = expectedLocalHost.getMissingBuffers(localHost.buffersByPartition);
for (Map.Entry<PartitionId, List<ByteBuffer>> entry : missingBuffers.entrySet()) {
assertEquals("Missing buffers count mismatch for iteration count " + missingBuffersIndex, missingBuffersCount[missingBuffersIndex], entry.getValue().size());
}
missingBuffersIndex++;
}
// no more missing keys
List<ReplicaThread.ExchangeMetadataResponse> response = replicaThread.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHost, batchSize), remoteReplicaInfos);
assertEquals("Response should contain a response for each replica", remoteReplicaInfos.size(), response.size());
for (ReplicaThread.ExchangeMetadataResponse metadata : response) {
assertEquals(0, metadata.missingStoreMessages.size());
assertEquals(expectedIndex, ((MockFindToken) metadata.remoteToken).getIndex());
}
missingBuffers = expectedLocalHost.getMissingBuffers(localHost.buffersByPartition);
assertEquals("There should be no missing buffers", 0, missingBuffers.size());
// validate everything
for (Map.Entry<PartitionId, List<MessageInfo>> remoteInfoEntry : remoteHost.infosByPartition.entrySet()) {
List<MessageInfo> remoteInfos = remoteInfoEntry.getValue();
List<MessageInfo> localInfos = localHost.infosByPartition.get(remoteInfoEntry.getKey());
Set<StoreKey> seen = new HashSet<>();
for (MessageInfo remoteInfo : remoteInfos) {
StoreKey remoteId = remoteInfo.getStoreKey();
if (seen.add(remoteId)) {
StoreKey localId = storeKeyConverter.convert(Collections.singleton(remoteId)).get(remoteId);
MessageInfo localInfo = getMessageInfo(localId, localInfos, false, false, false);
if (localId == null) {
// this is a deprecated ID. There should be no messages locally
assertNull(remoteId + " is deprecated and should have no entries", localInfo);
} else {
MessageInfo mergedRemoteInfo = getMergedMessageInfo(remoteId, remoteInfos);
if (localInfo == null) {
// local has no put, must be deleted on remote
assertTrue(localId + ":" + remoteId + " not replicated", mergedRemoteInfo.isDeleted());
} else {
// local has a put and must be either at or beyond the state of the remote (based on ops above)
MessageInfo mergedLocalInfo = getMergedMessageInfo(localId, localInfos);
if (mergedRemoteInfo.isDeleted()) {
// delete on remote, should be deleted locally too
assertTrue(localId + ":" + remoteId + " is deleted on remote but not locally", mergedLocalInfo.isDeleted());
} else if (mergedRemoteInfo.isTtlUpdated() && !idsDeletedLocallyByPartition.get(remoteInfoEntry.getKey()).equals(localId)) {
// ttl updated on remote, should be ttl updated locally too
assertTrue(localId + ":" + remoteId + " is updated on remote but not locally", mergedLocalInfo.isTtlUpdated());
} else if (!idsDeletedLocallyByPartition.get(remoteInfoEntry.getKey()).equals(localId)) {
// should not be updated or deleted locally
assertFalse(localId + ":" + remoteId + " has been updated", mergedLocalInfo.isTtlUpdated());
assertFalse(localId + ":" + remoteId + " has been deleted", mergedLocalInfo.isDeleted());
}
}
}
}
}
}
}
Aggregations