use of com.github.ambry.store.StoreKeyConverter in project ambry by linkedin.
the class ReplicaThread method replicate.
/**
* Do replication for replicas grouped by {@link DataNodeId}
* A replication cycle between two replicas involves the following steps:
* 1. Exchange metadata : fetch the metadata of blobs added to remote replica since the last synchronization point
* and filter the ones missing in local store.
* 2. Fetch missing blobs: fetch the missing blobs by issuing GET request to remote replica and write them to
* the local store
*
* During cross-colo replication, depending on the {@link ReplicationModelType}, the missing blobs are either fetched
* from all remote replicas (if modelType == ALL_TO_ALL) or only fetched for local leader replicas from their remote
* leader replicas (if modelType == LEADER_BASED). In the latter case, non-leader replica pairs (leader <-> standby,
* standby <-> leader, standby <-> standby) will get their missing blobs from their corresponding leader<->leader
* exchanges and intra-dc replication.
*
* Here is a table listing on what is exchanged between local and remote replicas based on their roles
* (leader/standby) when {@link ReplicationModelType is LEADER_BASED}.
*
* | Local Leader | Local Standby | Remote Leader | Remote Standby
* -------------------------------------------------------------------------------------
* Leader: | --- | metadata and data | metadata and data | metadata only
* Standby: | metadata and data | metadata and data | metadata only | metadata only
*/
public void replicate() {
boolean allCaughtUp = true;
Map<DataNodeId, List<RemoteReplicaInfo>> dataNodeToRemoteReplicaInfo = getRemoteReplicaInfos();
logger.trace("Replicating from {} DataNodes.", replicasToReplicateGroupedByNode.size());
for (Map.Entry<DataNodeId, List<RemoteReplicaInfo>> entry : dataNodeToRemoteReplicaInfo.entrySet()) {
DataNodeId remoteNode = entry.getKey();
if (!running) {
break;
}
List<RemoteReplicaInfo> replicasToReplicatePerNode = entry.getValue();
Timer.Context context = null;
Timer.Context portTypeBasedContext = null;
if (replicatingFromRemoteColo) {
context = replicationMetrics.interColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
if (replicatingOverSsl) {
portTypeBasedContext = replicationMetrics.sslInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
} else {
portTypeBasedContext = replicationMetrics.plainTextInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
}
} else {
context = replicationMetrics.intraColoReplicationLatency.time();
if (replicatingOverSsl) {
portTypeBasedContext = replicationMetrics.sslIntraColoReplicationLatency.time();
} else {
portTypeBasedContext = replicationMetrics.plainTextIntraColoReplicationLatency.time();
}
}
ConnectedChannel connectedChannel = null;
long checkoutConnectionTimeInMs = -1;
long exchangeMetadataTimeInMs = -1;
long fixMissingStoreKeysTimeInMs = -1;
long replicationStartTimeInMs = time.milliseconds();
long startTimeInMs = replicationStartTimeInMs;
// Get a list of active replicas that needs be included for this replication cycle
List<RemoteReplicaInfo> activeReplicasPerNode = new ArrayList<>();
List<RemoteReplicaInfo> standbyReplicasWithNoProgress = new ArrayList<>();
for (RemoteReplicaInfo remoteReplicaInfo : replicasToReplicatePerNode) {
ReplicaId replicaId = remoteReplicaInfo.getReplicaId();
boolean inBackoff = time.milliseconds() < remoteReplicaInfo.getReEnableReplicationTime();
if (replicaId.isDown() || inBackoff || remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE || replicationDisabledPartitions.contains(replicaId.getPartitionId())) {
logger.debug("Skipping replication on replica {} because one of following conditions is true: remote replica is down " + "= {}; in backoff = {}; local store is offline = {}; replication is disabled = {}.", replicaId.getPartitionId().toPathString(), replicaId.isDown(), inBackoff, remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE, replicationDisabledPartitions.contains(replicaId.getPartitionId()));
continue;
}
if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
// check if all missing keys for standby replicas from previous replication cycle are now obtained
// via leader replica. If we still have missing keys, don't include them in current replication cycle
// to avoid sending duplicate metadata requests since their token wouldn't have advanced.
processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
if (containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo)) {
standbyReplicasWithNoProgress.add(remoteReplicaInfo);
continue;
}
}
activeReplicasPerNode.add(remoteReplicaInfo);
}
logger.trace("Replicating from {} RemoteReplicaInfos.", activeReplicasPerNode.size());
// use a variable to track current replica list to replicate (for logging purpose)
List<RemoteReplicaInfo> currentReplicaList = activeReplicasPerNode;
try {
if (activeReplicasPerNode.size() > 0) {
allCaughtUp = false;
// if maxReplicaCountPerRequest > 0, split remote replicas on same node into multiple lists; otherwise there is
// no limit.
List<List<RemoteReplicaInfo>> activeReplicaSubLists = maxReplicaCountPerRequest > 0 ? Utils.partitionList(activeReplicasPerNode, maxReplicaCountPerRequest) : Collections.singletonList(activeReplicasPerNode);
startTimeInMs = time.milliseconds();
connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), activeReplicasPerNode.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
// we checkout ConnectedChannel once and replicate remote replicas in batch via same ConnectedChannel
for (List<RemoteReplicaInfo> replicaSubList : activeReplicaSubLists) {
exchangeMetadataTimeInMs = -1;
fixMissingStoreKeysTimeInMs = -1;
currentReplicaList = replicaSubList;
logger.debug("Exchanging metadata with {} remote replicas on {}", currentReplicaList.size(), remoteNode);
startTimeInMs = time.milliseconds();
List<ExchangeMetadataResponse> exchangeMetadataResponseList = exchangeMetadata(connectedChannel, replicaSubList);
exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
// If leader based replication is enabled and we are replicating from remote colo, fetch the missing blobs
// only for local leader replicas from their corresponding peer leader replicas (Leader <-> Leader).
// Non-leader replica pairs (standby <-> leaders, leader <-> standby, standby <-> standby) will get their
// missing blobs from their leader pair exchanges and intra-dc replication.
List<RemoteReplicaInfo> leaderReplicaList = new ArrayList<>();
List<ExchangeMetadataResponse> exchangeMetadataResponseListForLeaderReplicas = new ArrayList<>();
getLeaderReplicaList(replicaSubList, exchangeMetadataResponseList, leaderReplicaList, exchangeMetadataResponseListForLeaderReplicas);
replicaSubList = leaderReplicaList;
exchangeMetadataResponseList = exchangeMetadataResponseListForLeaderReplicas;
}
if (replicaSubList.size() > 0) {
startTimeInMs = time.milliseconds();
fixMissingStoreKeys(connectedChannel, replicaSubList, exchangeMetadataResponseList, false);
fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
}
}
}
if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
// Get a list of blocked standby replicas whose missing keys haven't arrived for long time.
// Use case: In leader-based cross colo replication, standby replicas don't send GET requests for missing keys
// found in metadata exchange and expect them to come via leader <-> leader replication.
// This is a safety condition to ensure that standby replicas are not stuck waiting for the keys to come from leader
// by fetching the missing keys themselves.
// TODO: As an improvement to this, we can first fetch missing blobs from local leader/other replicas in intra-dc first.
// TODO: If the result to fetch a blob from local dc is Blob_Not_Found, then we can fetch it from replicas in remote datacenter.
// This will involve co-ordination between replica threads containing replicas of same partition.
List<RemoteReplicaInfo> standbyReplicasTimedOutOnNoProgress = getRemoteStandbyReplicasTimedOutOnNoProgress(standbyReplicasWithNoProgress);
if (standbyReplicasTimedOutOnNoProgress.size() > 0) {
allCaughtUp = false;
currentReplicaList = standbyReplicasTimedOutOnNoProgress;
if (connectedChannel == null) {
checkoutConnectionTimeInMs = -1;
startTimeInMs = time.milliseconds();
connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), standbyReplicasTimedOutOnNoProgress.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
}
List<ExchangeMetadataResponse> exchangeMetadataResponseListForBlockedReplicas = standbyReplicasTimedOutOnNoProgress.stream().map(remoteReplicaInfo -> new ExchangeMetadataResponse(remoteReplicaInfo.getExchangeMetadataResponse())).collect(Collectors.toList());
// Convert (and cache) the remote keys that are being fetched as the StoreKeyConverter would have cleared
// these keys from its cache while it is replicating with other replicas before time out happened for these standby replicas.
List<StoreKey> storeKeysToConvert = exchangeMetadataResponseListForBlockedReplicas.stream().map(ExchangeMetadataResponse::getMissingStoreKeys).flatMap(Collection::stream).collect(Collectors.toList());
convertStoreKeys(storeKeysToConvert);
exchangeMetadataTimeInMs = 0;
fixMissingStoreKeysTimeInMs = -1;
logger.debug("Sending GET request to fetch missing keys for standby remote replicas {} timed out on no progress", currentReplicaList);
startTimeInMs = time.milliseconds();
fixMissingStoreKeys(connectedChannel, standbyReplicasTimedOutOnNoProgress, exchangeMetadataResponseListForBlockedReplicas, true);
fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
}
}
} catch (Throwable e) {
if (checkoutConnectionTimeInMs == -1) {
// throwable happened in checkout connection phase
checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
responseHandler.onEvent(currentReplicaList.get(0).getReplicaId(), e);
} else if (exchangeMetadataTimeInMs == -1) {
// throwable happened in exchange metadata phase
exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
} else if (fixMissingStoreKeysTimeInMs == -1) {
// throwable happened in fix missing store phase
fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
}
logger.error("Error while talking to peer: Remote node: {}, Thread name: {}, Remote replicas: {}, Current active " + "remote replica list: {}, Checkout connection time: {}, Exchange metadata time: {}, Fix missing " + "store key time {}", remoteNode, threadName, replicasToReplicatePerNode, currentReplicaList, checkoutConnectionTimeInMs, exchangeMetadataTimeInMs, fixMissingStoreKeysTimeInMs, e);
replicationMetrics.incrementReplicationErrors(replicatingOverSsl);
if (connectedChannel != null) {
connectionPool.destroyConnection(connectedChannel);
connectedChannel = null;
}
} finally {
long totalReplicationTime = time.milliseconds() - replicationStartTimeInMs;
replicationMetrics.updateTotalReplicationTime(totalReplicationTime, replicatingFromRemoteColo, replicatingOverSsl, datacenterName);
if (connectedChannel != null) {
connectionPool.checkInConnection(connectedChannel);
}
context.stop();
portTypeBasedContext.stop();
}
}
long sleepDurationMs = 0;
if (allCaughtUp && replicationConfig.replicationReplicaThreadIdleSleepDurationMs > 0) {
sleepDurationMs = replicationConfig.replicationReplicaThreadIdleSleepDurationMs;
idleCount.inc();
} else if (threadThrottleDurationMs > 0) {
sleepDurationMs = threadThrottleDurationMs;
throttleCount.inc();
}
if (sleepDurationMs > 0) {
try {
long currentTime = time.milliseconds();
time.sleep(sleepDurationMs);
logger.trace("Replica thread: {} slept for {} ms", threadName, time.milliseconds() - currentTime);
} catch (InterruptedException e) {
logger.error("Received interrupted exception during throttling", e);
}
}
}
use of com.github.ambry.store.StoreKeyConverter in project ambry by linkedin.
the class ReplicationEngine method createThreadPool.
/**
* Create thread pool for a datacenter.
* @param datacenter The datacenter String.
* @param numberOfThreads Number of threads to create for the thread pool.
* @param startThread If thread needs to be started when create.
*/
private List<ReplicaThread> createThreadPool(String datacenter, int numberOfThreads, boolean startThread) {
nextReplicaThreadIndexByDc.put(datacenter, new AtomicInteger(0));
List<ReplicaThread> replicaThreads = new ArrayList<>();
logger.info("Number of replica threads to replicate from {}: {}", datacenter, numberOfThreads);
ResponseHandler responseHandler = new ResponseHandler(clusterMap);
for (int i = 0; i < numberOfThreads; i++) {
boolean replicatingOverSsl = sslEnabledDatacenters.contains(datacenter);
String threadIdentity = getReplicaThreadName(datacenter, i);
try {
StoreKeyConverter threadSpecificKeyConverter = storeKeyConverterFactory.getStoreKeyConverter();
Transformer threadSpecificTransformer = Utils.getObj(transformerClassName, storeKeyFactory, threadSpecificKeyConverter);
ReplicaThread replicaThread = new ReplicaThread(threadIdentity, tokenHelper, clusterMap, correlationIdGenerator, dataNodeId, connectionPool, replicationConfig, replicationMetrics, notification, threadSpecificKeyConverter, threadSpecificTransformer, metricRegistry, replicatingOverSsl, datacenter, responseHandler, time, replicaSyncUpManager, skipPredicate, leaderBasedReplicationAdmin);
replicaThreads.add(replicaThread);
if (startThread) {
Thread thread = Utils.newThread(replicaThread.getName(), replicaThread, false);
thread.start();
logger.info("Started replica thread {}", thread.getName());
}
} catch (Exception e) {
throw new RuntimeException("Encountered exception instantiating ReplicaThread", e);
}
}
replicationMetrics.trackLiveThreadsCount(replicaThreads, datacenter);
replicationMetrics.populateSingleColoMetrics(datacenter);
return replicaThreads;
}
use of com.github.ambry.store.StoreKeyConverter in project ambry by linkedin.
the class ReplicationTest method remoteReplicaInfoAddRemoveTest.
/**
* Tests add/remove replicaInfo to {@link ReplicaThread}
* @throws Exception
*/
@Test
public void remoteReplicaInfoAddRemoveTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
MockHost localHost = localAndRemoteHosts.getFirst();
MockHost remoteHost = localAndRemoteHosts.getSecond();
StoreKeyFactory storeKeyFactory = Utils.getObj("com.github.ambry.commons.BlobIdFactory", clusterMap);
MockStoreKeyConverterFactory mockStoreKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
mockStoreKeyConverterFactory.setReturnInputIfAbsent(true);
mockStoreKeyConverterFactory.setConversionMap(new HashMap<>());
StoreKeyConverter storeKeyConverter = mockStoreKeyConverterFactory.getStoreKeyConverter();
Transformer transformer = new ValidatingTransformer(storeKeyFactory, storeKeyConverter);
ReplicationMetrics replicationMetrics = new ReplicationMetrics(new MetricRegistry(), clusterMap.getReplicaIds(localHost.dataNodeId));
replicationMetrics.populateSingleColoMetrics(remoteHost.dataNodeId.getDatacenterName());
List<RemoteReplicaInfo> remoteReplicaInfoList = localHost.getRemoteReplicaInfos(remoteHost, null);
Map<DataNodeId, MockHost> hosts = new HashMap<>();
hosts.put(remoteHost.dataNodeId, remoteHost);
MockConnectionPool connectionPool = new MockConnectionPool(hosts, clusterMap, 4);
ReplicaThread replicaThread = new ReplicaThread("threadtest", new MockFindTokenHelper(storeKeyFactory, replicationConfig), clusterMap, new AtomicInteger(0), localHost.dataNodeId, connectionPool, replicationConfig, replicationMetrics, null, mockStoreKeyConverterFactory.getStoreKeyConverter(), transformer, clusterMap.getMetricRegistry(), false, localHost.dataNodeId.getDatacenterName(), new ResponseHandler(clusterMap), time, null, null, null);
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfoList) {
replicaThread.addRemoteReplicaInfo(remoteReplicaInfo);
}
List<RemoteReplicaInfo> actualRemoteReplicaInfoList = replicaThread.getRemoteReplicaInfos().get(remoteHost.dataNodeId);
Comparator<RemoteReplicaInfo> remoteReplicaInfoComparator = Comparator.comparing(info -> info.getReplicaId().getPartitionId().toPathString());
Collections.sort(remoteReplicaInfoList, remoteReplicaInfoComparator);
Collections.sort(actualRemoteReplicaInfoList, remoteReplicaInfoComparator);
assertEquals("getRemoteReplicaInfos not correct", remoteReplicaInfoList, actualRemoteReplicaInfoList);
// Test remove remoteReplicaInfo.
replicaThread.removeRemoteReplicaInfo(remoteReplicaInfoList.get(remoteReplicaInfoList.size() - 1));
actualRemoteReplicaInfoList = replicaThread.getRemoteReplicaInfos().get(remoteHost.dataNodeId);
Collections.sort(actualRemoteReplicaInfoList, remoteReplicaInfoComparator);
remoteReplicaInfoList.remove(remoteReplicaInfoList.size() - 1);
assertEquals("getRemoteReplicaInfos not correct", remoteReplicaInfoList, actualRemoteReplicaInfoList);
}
use of com.github.ambry.store.StoreKeyConverter in project ambry by linkedin.
the class ReplicationTest method replicationPauseTest.
/**
* Tests pausing replication for all and individual partitions. Also tests replication will pause on store that is not
* started and resume when store restarted.
* @throws Exception
*/
@Test
public void replicationPauseTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
MockHost localHost = localAndRemoteHosts.getFirst();
MockHost remoteHost = localAndRemoteHosts.getSecond();
List<PartitionId> partitionIds = clusterMap.getAllPartitionIds(null);
for (PartitionId partitionId : partitionIds) {
// add 10 messages to the remote host only
addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), 10);
}
StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
storeKeyConverterFactory.setConversionMap(new HashMap<>());
storeKeyConverterFactory.setReturnInputIfAbsent(true);
StoreKeyConverter storeKeyConverter = storeKeyConverterFactory.getStoreKeyConverter();
Transformer transformer = new ValidatingTransformer(storeKeyFactory, storeKeyConverter);
int batchSize = 4;
Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost, storeKeyConverter, transformer, null, null);
Map<DataNodeId, List<RemoteReplicaInfo>> replicasToReplicate = replicasAndThread.getFirst();
ReplicaThread replicaThread = replicasAndThread.getSecond();
Map<PartitionId, Integer> progressTracker = new HashMap<>();
PartitionId partitionToResumeFirst = clusterMap.getAllPartitionIds(null).get(0);
PartitionId partitionToShutdownLocally = clusterMap.getAllPartitionIds(null).get(1);
boolean allStopped = false;
boolean onlyOneResumed = false;
boolean allReenabled = false;
boolean shutdownStoreRestarted = false;
Set<PartitionId> expectedPaused = new HashSet<>();
assertEquals("There should be no disabled partitions", expectedPaused, replicaThread.getReplicationDisabledPartitions());
while (true) {
replicaThread.replicate();
boolean replicationDone = true;
for (RemoteReplicaInfo replicaInfo : replicasToReplicate.get(remoteHost.dataNodeId)) {
PartitionId id = replicaInfo.getReplicaId().getPartitionId();
MockFindToken token = (MockFindToken) replicaInfo.getToken();
int lastProgress = progressTracker.computeIfAbsent(id, id1 -> 0);
int currentProgress = token.getIndex();
boolean partDone = currentProgress + 1 == remoteHost.infosByPartition.get(id).size();
if (allStopped || (onlyOneResumed && !id.equals(partitionToResumeFirst)) || (allReenabled && !shutdownStoreRestarted && id.equals(partitionToShutdownLocally))) {
assertEquals("There should have been no progress", lastProgress, currentProgress);
} else if (!partDone) {
assertTrue("There has been no progress", currentProgress > lastProgress);
progressTracker.put(id, currentProgress);
}
replicationDone = replicationDone && partDone;
}
if (!allStopped && !onlyOneResumed && !allReenabled && !shutdownStoreRestarted) {
replicaThread.controlReplicationForPartitions(clusterMap.getAllPartitionIds(null), false);
expectedPaused.addAll(clusterMap.getAllPartitionIds(null));
assertEquals("Disabled partitions sets do not match", expectedPaused, replicaThread.getReplicationDisabledPartitions());
allStopped = true;
} else if (!onlyOneResumed && !allReenabled && !shutdownStoreRestarted) {
// resume replication for first partition
replicaThread.controlReplicationForPartitions(Collections.singletonList(partitionIds.get(0)), true);
expectedPaused.remove(partitionIds.get(0));
assertEquals("Disabled partitions sets do not match", expectedPaused, replicaThread.getReplicationDisabledPartitions());
allStopped = false;
onlyOneResumed = true;
} else if (!allReenabled && !shutdownStoreRestarted) {
// not removing the first partition
replicaThread.controlReplicationForPartitions(clusterMap.getAllPartitionIds(null), true);
// shutdown one local store to pause replication against that store
localHost.storesByPartition.get(partitionToShutdownLocally).shutdown();
onlyOneResumed = false;
allReenabled = true;
expectedPaused.clear();
assertEquals("Disabled partitions sets do not match", expectedPaused, replicaThread.getReplicationDisabledPartitions());
} else if (!shutdownStoreRestarted) {
localHost.storesByPartition.get(partitionToShutdownLocally).start();
shutdownStoreRestarted = true;
}
if (replicationDone) {
break;
}
}
Map<PartitionId, List<MessageInfo>> missingInfos = remoteHost.getMissingInfos(localHost.infosByPartition);
for (Map.Entry<PartitionId, List<MessageInfo>> entry : missingInfos.entrySet()) {
assertEquals("No infos should be missing", 0, entry.getValue().size());
}
Map<PartitionId, List<ByteBuffer>> missingBuffers = remoteHost.getMissingBuffers(localHost.buffersByPartition);
for (Map.Entry<PartitionId, List<ByteBuffer>> entry : missingBuffers.entrySet()) {
assertEquals("No buffers should be missing", 0, entry.getValue().size());
}
}
use of com.github.ambry.store.StoreKeyConverter in project ambry by linkedin.
the class ReplicationTest method limitMaxPartitionCountPerRequestTest.
/**
* Test that max partition count per request is honored in {@link ReplicaThread} if there are too many partitions to
* replicate from the remote node.
* @throws Exception
*/
@Test
public void limitMaxPartitionCountPerRequestTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
MockHost localHost = localAndRemoteHosts.getFirst();
MockHost remoteHost = localAndRemoteHosts.getSecond();
List<PartitionId> partitionIds = clusterMap.getAllPartitionIds(null);
for (PartitionId partitionId : partitionIds) {
// add 5 messages into each partition and place it on remote host only
addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), 5);
}
StoreKeyFactory storeKeyFactory = Utils.getObj("com.github.ambry.commons.BlobIdFactory", clusterMap);
MockStoreKeyConverterFactory mockStoreKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
mockStoreKeyConverterFactory.setReturnInputIfAbsent(true);
mockStoreKeyConverterFactory.setConversionMap(new HashMap<>());
// we set batchSize to 10 in order to get all messages from one partition within single replication cycle
int batchSize = 10;
StoreKeyConverter storeKeyConverter = mockStoreKeyConverterFactory.getStoreKeyConverter();
Transformer transformer = new ValidatingTransformer(storeKeyFactory, storeKeyConverter);
// we set max partition count per request to 5, which forces thread to replicate replicas in two cycles. (Note that
// number of partition to replicate is 10, they will be replicated in two batches)
ReplicationConfig initialReplicationConfig = replicationConfig;
properties.setProperty("replication.max.partition.count.per.request", String.valueOf(5));
replicationConfig = new ReplicationConfig(new VerifiableProperties(properties));
CountDownLatch replicationCompleted = new CountDownLatch(partitionIds.size());
AtomicReference<Exception> exception = new AtomicReference<>();
Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost, storeKeyConverter, transformer, (store, messageInfos) -> {
try {
replicationCompleted.countDown();
// for each partition, replication should complete within single cycle (fetch once should suffice), so
// we shut down local store once blobs are written. This can avoid unnecessary metadata requests sent to
// remote host.
store.shutdown();
} catch (Exception e) {
exception.set(e);
}
}, null);
ReplicaThread replicaThread = replicasAndThread.getSecond();
Thread thread = Utils.newThread(replicaThread, false);
thread.start();
assertTrue("Replication didn't complete within 10 secs", replicationCompleted.await(10, TimeUnit.SECONDS));
// verify the # of replicas per metadata request is limited to 5 (note that there are 10 replicas to replicate, they
// are split into to 2 small batches and get replicated in separate requests)
assertEquals("There should be 2 metadata requests and each has 5 replicas to replicate", Arrays.asList(5, 5), remoteHost.replicaCountPerRequestTracker);
// shutdown
replicaThread.shutdown();
if (exception.get() != null) {
throw exception.get();
}
replicationConfig = initialReplicationConfig;
}
Aggregations