use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class SocketNetworkClient method prepareSends.
/**
* Process the requests in the pendingRequestsQueue. Create {@link ResponseInfo} for those requests that have timed
* out while waiting in the queue. Then, attempt to prepare {@link NetworkSend}s by checking out connections for
* the rest of the requests in the queue.
* @param requestsToDrop the list of correlation IDs representing the requests that can be dropped. If any of these
* correlation IDs match pending requests, those pending requests will not be sent out.
* @param responseInfoList the list to populate with responseInfos for requests that timed out waiting for
* connections.
* @return the list of {@link NetworkSend} objects to hand over to the Selector.
*/
private List<NetworkSend> prepareSends(Set<Integer> requestsToDrop, List<ResponseInfo> responseInfoList) {
List<NetworkSend> sends = new ArrayList<>();
ListIterator<RequestMetadata> iter = pendingRequests.listIterator();
/* Drop requests that have waited too long */
while (iter.hasNext()) {
RequestMetadata requestMetadata = iter.next();
if (time.milliseconds() - requestMetadata.requestQueuedAtMs > checkoutTimeoutMs) {
responseInfoList.add(new ResponseInfo(requestMetadata.requestInfo, NetworkClientErrorCode.ConnectionUnavailable, null));
requestMetadata.requestInfo.getRequest().release();
logger.trace("Failing request to host {} port {} due to connection unavailability", requestMetadata.requestInfo.getHost(), requestMetadata.requestInfo.getPort());
iter.remove();
if (requestMetadata.pendingConnectionId != null) {
pendingConnectionsToAssociatedRequests.remove(requestMetadata.pendingConnectionId);
requestMetadata.pendingConnectionId = null;
}
String connId = correlationIdInFlightToConnectionId.get(requestMetadata.requestInfo.getRequest().getCorrelationId());
if (connId != null) {
connectionTracker.checkInConnection(connId);
connectionIdToRequestInFlight.remove(connId);
}
networkMetrics.connectionCheckoutTimeoutError.inc();
} else {
// Since requests are ordered by time, once the first request that cannot be dropped is found,
// we let that and the rest be iterated over in the next while loop. Just move the cursor backwards as this
// element needs to be processed.
iter.previous();
break;
}
}
while (iter.hasNext()) {
RequestMetadata requestMetadata = iter.next();
try {
String host = requestMetadata.requestInfo.getHost();
Port port = requestMetadata.requestInfo.getPort();
ReplicaId replicaId = requestMetadata.requestInfo.getReplicaId();
if (replicaId == null) {
throw new IllegalStateException("ReplicaId in request is null.");
}
if (requestsToDrop.contains(requestMetadata.requestInfo.getRequest().getCorrelationId())) {
responseInfoList.add(new ResponseInfo(requestMetadata.requestInfo, NetworkClientErrorCode.ConnectionUnavailable, null));
requestMetadata.requestInfo.getRequest().release();
if (requestMetadata.pendingConnectionId != null) {
pendingConnectionsToAssociatedRequests.remove(requestMetadata.pendingConnectionId);
requestMetadata.pendingConnectionId = null;
}
iter.remove();
} else {
String connId = connectionTracker.checkOutConnection(host, port, replicaId.getDataNodeId());
if (connId == null) {
networkMetrics.connectionNotAvailable.inc();
if (requestMetadata.pendingConnectionId == null) {
if (connectionTracker.mayCreateNewConnection(host, port, replicaId.getDataNodeId())) {
connId = connectionTracker.connectAndTrack(this::connect, host, port, replicaId.getDataNodeId());
requestMetadata.pendingConnectionId = connId;
pendingConnectionsToAssociatedRequests.put(connId, requestMetadata);
logger.trace("Initiated a connection to host {} port {} ", host, port);
} else {
networkMetrics.connectionReachLimit.inc();
}
}
} else {
if (requestMetadata.pendingConnectionId != null) {
pendingConnectionsToAssociatedRequests.remove(requestMetadata.pendingConnectionId);
requestMetadata.pendingConnectionId = null;
}
logger.trace("Connection checkout succeeded for {}:{} with connectionId {} ", host, port, connId);
sends.add(new NetworkSend(connId, requestMetadata.requestInfo.getRequest(), null, time));
connectionIdToRequestInFlight.put(connId, requestMetadata);
correlationIdInFlightToConnectionId.put(requestMetadata.requestInfo.getRequest().getCorrelationId(), connId);
iter.remove();
requestMetadata.onRequestDequeue();
}
}
} catch (IOException e) {
networkMetrics.networkClientIOError.inc();
logger.error("Received exception while checking out a connection", e);
}
}
return sends;
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class ReplicaThread method exchangeMetadata.
/**
* Gets all the metadata about messages from the remote replicas since last token. Checks the messages with the local
* store and finds all the messages that are missing. For the messages that are not missing, updates the delete
* and ttl state.
* @param connectedChannel The connected channel that represents a connection to the remote replica
* @param replicasToReplicatePerNode The information about the replicas that is being replicated
* @return - List of ExchangeMetadataResponse that contains the set of store keys that are missing from the local
* store and are present in the remote replicas and also the new token from the remote replicas
* @throws IOException
* @throws ReplicationException
*/
List<ExchangeMetadataResponse> exchangeMetadata(ConnectedChannel connectedChannel, List<RemoteReplicaInfo> replicasToReplicatePerNode) throws IOException, ReplicationException {
long exchangeMetadataStartTimeInMs = time.milliseconds();
List<ExchangeMetadataResponse> exchangeMetadataResponseList = new ArrayList<>();
if (replicasToReplicatePerNode.size() > 0) {
try {
DataNodeId remoteNode = replicasToReplicatePerNode.get(0).getReplicaId().getDataNodeId();
ReplicaMetadataResponse response = getReplicaMetadataResponse(replicasToReplicatePerNode, connectedChannel, remoteNode);
long startTimeInMs = time.milliseconds();
Map<StoreKey, StoreKey> remoteKeyToLocalKeyMap = batchConvertReplicaMetadataResponseKeys(response);
for (int i = 0; i < response.getReplicaMetadataResponseInfoList().size(); i++) {
RemoteReplicaInfo remoteReplicaInfo = replicasToReplicatePerNode.get(i);
ReplicaMetadataResponseInfo replicaMetadataResponseInfo = response.getReplicaMetadataResponseInfoList().get(i);
responseHandler.onEvent(remoteReplicaInfo.getReplicaId(), replicaMetadataResponseInfo.getError());
if (replicaMetadataResponseInfo.getError() == ServerErrorCode.No_Error) {
// Skip stores that were stopped during call to getReplicaMetadataResponse
if (!remoteReplicaInfo.getLocalStore().isStarted()) {
exchangeMetadataResponseList.add(new ExchangeMetadataResponse(ServerErrorCode.Temporarily_Disabled));
} else {
try {
logger.trace("Remote node: {} Thread name: {} Remote replica: {} Token from remote: {} Replica lag: {} ", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), replicaMetadataResponseInfo.getFindToken(), replicaMetadataResponseInfo.getRemoteReplicaLagInBytes());
Set<MessageInfo> remoteMissingStoreMessages = getMissingStoreMessages(replicaMetadataResponseInfo, remoteNode, remoteReplicaInfo);
processReplicaMetadataResponse(remoteMissingStoreMessages, replicaMetadataResponseInfo, remoteReplicaInfo, remoteNode, remoteKeyToLocalKeyMap);
// Get the converted keys for the missing keys of this replica (to store them along with missing keys in
// the exchange metadata response). For leader based replication, these are used during processing
// of missing keys for non-leader replica pairs which will come later via leader<->leader replication.
Map<StoreKey, StoreKey> remoteKeyToLocalKeySubMap = new HashMap<>();
remoteMissingStoreMessages.forEach(remoteMissingStoreMessage -> {
StoreKey remoteKey = remoteMissingStoreMessage.getStoreKey();
remoteKeyToLocalKeySubMap.put(remoteKey, remoteKeyToLocalKeyMap.get(remoteKey));
});
ExchangeMetadataResponse exchangeMetadataResponse = new ExchangeMetadataResponse(remoteMissingStoreMessages, replicaMetadataResponseInfo.getFindToken(), replicaMetadataResponseInfo.getRemoteReplicaLagInBytes(), remoteKeyToLocalKeySubMap, time);
// update replication lag in ReplicaSyncUpManager
if (replicaSyncUpManager != null && remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.BOOTSTRAP) {
ReplicaId localReplica = remoteReplicaInfo.getLocalReplicaId();
ReplicaId remoteReplica = remoteReplicaInfo.getReplicaId();
boolean isSyncCompleted = replicaSyncUpManager.updateReplicaLagAndCheckSyncStatus(localReplica, remoteReplica, exchangeMetadataResponse.localLagFromRemoteInBytes, ReplicaState.STANDBY);
// if catchup is completed by this update call, we can complete bootstrap in local store
if (isSyncCompleted) {
// complete BOOTSTRAP -> STANDBY transition
remoteReplicaInfo.getLocalStore().setCurrentState(ReplicaState.STANDBY);
remoteReplicaInfo.getLocalStore().completeBootstrap();
}
}
// If remote token has not moved forward, wait for back off time before resending next metadata request
if (remoteReplicaInfo.getToken().equals(exchangeMetadataResponse.remoteToken)) {
remoteReplicaInfo.setReEnableReplicationTime(time.milliseconds() + replicationConfig.replicationSyncedReplicaBackoffDurationMs);
syncedBackOffCount.inc();
}
// There are no missing keys. We just advance the token
if (exchangeMetadataResponse.missingStoreMessages.size() == 0) {
remoteReplicaInfo.setToken(exchangeMetadataResponse.remoteToken);
remoteReplicaInfo.setLocalLagFromRemoteInBytes(exchangeMetadataResponse.localLagFromRemoteInBytes);
logger.trace("Remote node: {} Thread name: {} Remote replica: {} Token after speaking to remote node: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), exchangeMetadataResponse.remoteToken);
}
replicationMetrics.updateLagMetricForRemoteReplica(remoteReplicaInfo, exchangeMetadataResponse.localLagFromRemoteInBytes);
if (replicaMetadataResponseInfo.getMessageInfoList().size() > 0) {
replicationMetrics.updateCatchupPointMetricForCloudReplica(remoteReplicaInfo, replicaMetadataResponseInfo.getMessageInfoList().get(replicaMetadataResponseInfo.getMessageInfoList().size() - 1).getOperationTimeMs());
}
// Add exchangeMetadataResponse to list at the end after operations such as replicaSyncUpManager(if not null)
// has completed update, etc. The reason is we may get exceptions in between (for ex: replicaSyncUpManager may
// throw exception) and end up adding one more exchangeMetadataResponse associated with same RemoteReplicaInfo.
exchangeMetadataResponseList.add(exchangeMetadataResponse);
} catch (Exception e) {
if (e instanceof StoreException && ((StoreException) e).getErrorCode() == StoreErrorCodes.Store_Not_Started) {
// Must have just been stopped, just skip it and move on.
logger.info("Local store not started for remote replica: {}", remoteReplicaInfo.getReplicaId());
exchangeMetadataResponseList.add(new ExchangeMetadataResponse(ServerErrorCode.Temporarily_Disabled));
} else {
logger.error("Remote node: {} Thread name: {} Remote replica: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), e);
replicationMetrics.updateLocalStoreError(remoteReplicaInfo.getReplicaId());
responseHandler.onEvent(remoteReplicaInfo.getReplicaId(), e);
exchangeMetadataResponseList.add(new ExchangeMetadataResponse(ServerErrorCode.Unknown_Error));
}
}
}
} else {
replicationMetrics.updateMetadataRequestError(remoteReplicaInfo.getReplicaId());
logger.error("Remote node: {} Thread name: {} Remote replica: {} Server error: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), replicaMetadataResponseInfo.getError());
exchangeMetadataResponseList.add(new ExchangeMetadataResponse(replicaMetadataResponseInfo.getError()));
}
if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
ExchangeMetadataResponse exchangeMetadataResponse = exchangeMetadataResponseList.get(i);
if (exchangeMetadataResponse.serverErrorCode.equals(ServerErrorCode.No_Error)) {
// If leader-based replication is enabled, store the meta data exchange received for the remote replica as
// standby replicas will not send GET request for the missing store keys and track them from leader <->
// leader exchanges and intra-dc replication.
remoteReplicaInfo.setExchangeMetadataResponse(new ExchangeMetadataResponse(exchangeMetadataResponse));
// It is possible that some of the missing keys found in exchange metadata response are written in parallel
// by other replica threads since the time we calculated it. Go through the local store once more and
// update missing keys set stored in the exchangeMetadataResponse for the remote replica.
refreshMissingStoreMessagesForStandbyReplica(remoteReplicaInfo);
}
}
}
long processMetadataResponseTimeInMs = time.milliseconds() - startTimeInMs;
logger.trace("Remote node: {} Thread name: {} processMetadataResponseTime: {}", remoteNode, threadName, processMetadataResponseTimeInMs);
} finally {
long exchangeMetadataTime = time.milliseconds() - exchangeMetadataStartTimeInMs;
replicationMetrics.updateExchangeMetadataTime(exchangeMetadataTime, replicatingFromRemoteColo, replicatingOverSsl, datacenterName);
}
}
return exchangeMetadataResponseList;
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class ReplicaThread method replicate.
/**
* Do replication for replicas grouped by {@link DataNodeId}
* A replication cycle between two replicas involves the following steps:
* 1. Exchange metadata : fetch the metadata of blobs added to remote replica since the last synchronization point
* and filter the ones missing in local store.
* 2. Fetch missing blobs: fetch the missing blobs by issuing GET request to remote replica and write them to
* the local store
*
* During cross-colo replication, depending on the {@link ReplicationModelType}, the missing blobs are either fetched
* from all remote replicas (if modelType == ALL_TO_ALL) or only fetched for local leader replicas from their remote
* leader replicas (if modelType == LEADER_BASED). In the latter case, non-leader replica pairs (leader <-> standby,
* standby <-> leader, standby <-> standby) will get their missing blobs from their corresponding leader<->leader
* exchanges and intra-dc replication.
*
* Here is a table listing on what is exchanged between local and remote replicas based on their roles
* (leader/standby) when {@link ReplicationModelType is LEADER_BASED}.
*
* | Local Leader | Local Standby | Remote Leader | Remote Standby
* -------------------------------------------------------------------------------------
* Leader: | --- | metadata and data | metadata and data | metadata only
* Standby: | metadata and data | metadata and data | metadata only | metadata only
*/
public void replicate() {
boolean allCaughtUp = true;
Map<DataNodeId, List<RemoteReplicaInfo>> dataNodeToRemoteReplicaInfo = getRemoteReplicaInfos();
logger.trace("Replicating from {} DataNodes.", replicasToReplicateGroupedByNode.size());
for (Map.Entry<DataNodeId, List<RemoteReplicaInfo>> entry : dataNodeToRemoteReplicaInfo.entrySet()) {
DataNodeId remoteNode = entry.getKey();
if (!running) {
break;
}
List<RemoteReplicaInfo> replicasToReplicatePerNode = entry.getValue();
Timer.Context context = null;
Timer.Context portTypeBasedContext = null;
if (replicatingFromRemoteColo) {
context = replicationMetrics.interColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
if (replicatingOverSsl) {
portTypeBasedContext = replicationMetrics.sslInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
} else {
portTypeBasedContext = replicationMetrics.plainTextInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
}
} else {
context = replicationMetrics.intraColoReplicationLatency.time();
if (replicatingOverSsl) {
portTypeBasedContext = replicationMetrics.sslIntraColoReplicationLatency.time();
} else {
portTypeBasedContext = replicationMetrics.plainTextIntraColoReplicationLatency.time();
}
}
ConnectedChannel connectedChannel = null;
long checkoutConnectionTimeInMs = -1;
long exchangeMetadataTimeInMs = -1;
long fixMissingStoreKeysTimeInMs = -1;
long replicationStartTimeInMs = time.milliseconds();
long startTimeInMs = replicationStartTimeInMs;
// Get a list of active replicas that needs be included for this replication cycle
List<RemoteReplicaInfo> activeReplicasPerNode = new ArrayList<>();
List<RemoteReplicaInfo> standbyReplicasWithNoProgress = new ArrayList<>();
for (RemoteReplicaInfo remoteReplicaInfo : replicasToReplicatePerNode) {
ReplicaId replicaId = remoteReplicaInfo.getReplicaId();
boolean inBackoff = time.milliseconds() < remoteReplicaInfo.getReEnableReplicationTime();
if (replicaId.isDown() || inBackoff || remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE || replicationDisabledPartitions.contains(replicaId.getPartitionId())) {
logger.debug("Skipping replication on replica {} because one of following conditions is true: remote replica is down " + "= {}; in backoff = {}; local store is offline = {}; replication is disabled = {}.", replicaId.getPartitionId().toPathString(), replicaId.isDown(), inBackoff, remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE, replicationDisabledPartitions.contains(replicaId.getPartitionId()));
continue;
}
if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
// check if all missing keys for standby replicas from previous replication cycle are now obtained
// via leader replica. If we still have missing keys, don't include them in current replication cycle
// to avoid sending duplicate metadata requests since their token wouldn't have advanced.
processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
if (containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo)) {
standbyReplicasWithNoProgress.add(remoteReplicaInfo);
continue;
}
}
activeReplicasPerNode.add(remoteReplicaInfo);
}
logger.trace("Replicating from {} RemoteReplicaInfos.", activeReplicasPerNode.size());
// use a variable to track current replica list to replicate (for logging purpose)
List<RemoteReplicaInfo> currentReplicaList = activeReplicasPerNode;
try {
if (activeReplicasPerNode.size() > 0) {
allCaughtUp = false;
// if maxReplicaCountPerRequest > 0, split remote replicas on same node into multiple lists; otherwise there is
// no limit.
List<List<RemoteReplicaInfo>> activeReplicaSubLists = maxReplicaCountPerRequest > 0 ? Utils.partitionList(activeReplicasPerNode, maxReplicaCountPerRequest) : Collections.singletonList(activeReplicasPerNode);
startTimeInMs = time.milliseconds();
connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), activeReplicasPerNode.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
// we checkout ConnectedChannel once and replicate remote replicas in batch via same ConnectedChannel
for (List<RemoteReplicaInfo> replicaSubList : activeReplicaSubLists) {
exchangeMetadataTimeInMs = -1;
fixMissingStoreKeysTimeInMs = -1;
currentReplicaList = replicaSubList;
logger.debug("Exchanging metadata with {} remote replicas on {}", currentReplicaList.size(), remoteNode);
startTimeInMs = time.milliseconds();
List<ExchangeMetadataResponse> exchangeMetadataResponseList = exchangeMetadata(connectedChannel, replicaSubList);
exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
// If leader based replication is enabled and we are replicating from remote colo, fetch the missing blobs
// only for local leader replicas from their corresponding peer leader replicas (Leader <-> Leader).
// Non-leader replica pairs (standby <-> leaders, leader <-> standby, standby <-> standby) will get their
// missing blobs from their leader pair exchanges and intra-dc replication.
List<RemoteReplicaInfo> leaderReplicaList = new ArrayList<>();
List<ExchangeMetadataResponse> exchangeMetadataResponseListForLeaderReplicas = new ArrayList<>();
getLeaderReplicaList(replicaSubList, exchangeMetadataResponseList, leaderReplicaList, exchangeMetadataResponseListForLeaderReplicas);
replicaSubList = leaderReplicaList;
exchangeMetadataResponseList = exchangeMetadataResponseListForLeaderReplicas;
}
if (replicaSubList.size() > 0) {
startTimeInMs = time.milliseconds();
fixMissingStoreKeys(connectedChannel, replicaSubList, exchangeMetadataResponseList, false);
fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
}
}
}
if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
// Get a list of blocked standby replicas whose missing keys haven't arrived for long time.
// Use case: In leader-based cross colo replication, standby replicas don't send GET requests for missing keys
// found in metadata exchange and expect them to come via leader <-> leader replication.
// This is a safety condition to ensure that standby replicas are not stuck waiting for the keys to come from leader
// by fetching the missing keys themselves.
// TODO: As an improvement to this, we can first fetch missing blobs from local leader/other replicas in intra-dc first.
// TODO: If the result to fetch a blob from local dc is Blob_Not_Found, then we can fetch it from replicas in remote datacenter.
// This will involve co-ordination between replica threads containing replicas of same partition.
List<RemoteReplicaInfo> standbyReplicasTimedOutOnNoProgress = getRemoteStandbyReplicasTimedOutOnNoProgress(standbyReplicasWithNoProgress);
if (standbyReplicasTimedOutOnNoProgress.size() > 0) {
allCaughtUp = false;
currentReplicaList = standbyReplicasTimedOutOnNoProgress;
if (connectedChannel == null) {
checkoutConnectionTimeInMs = -1;
startTimeInMs = time.milliseconds();
connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), standbyReplicasTimedOutOnNoProgress.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
}
List<ExchangeMetadataResponse> exchangeMetadataResponseListForBlockedReplicas = standbyReplicasTimedOutOnNoProgress.stream().map(remoteReplicaInfo -> new ExchangeMetadataResponse(remoteReplicaInfo.getExchangeMetadataResponse())).collect(Collectors.toList());
// Convert (and cache) the remote keys that are being fetched as the StoreKeyConverter would have cleared
// these keys from its cache while it is replicating with other replicas before time out happened for these standby replicas.
List<StoreKey> storeKeysToConvert = exchangeMetadataResponseListForBlockedReplicas.stream().map(ExchangeMetadataResponse::getMissingStoreKeys).flatMap(Collection::stream).collect(Collectors.toList());
convertStoreKeys(storeKeysToConvert);
exchangeMetadataTimeInMs = 0;
fixMissingStoreKeysTimeInMs = -1;
logger.debug("Sending GET request to fetch missing keys for standby remote replicas {} timed out on no progress", currentReplicaList);
startTimeInMs = time.milliseconds();
fixMissingStoreKeys(connectedChannel, standbyReplicasTimedOutOnNoProgress, exchangeMetadataResponseListForBlockedReplicas, true);
fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
}
}
} catch (Throwable e) {
if (checkoutConnectionTimeInMs == -1) {
// throwable happened in checkout connection phase
checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
responseHandler.onEvent(currentReplicaList.get(0).getReplicaId(), e);
} else if (exchangeMetadataTimeInMs == -1) {
// throwable happened in exchange metadata phase
exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
} else if (fixMissingStoreKeysTimeInMs == -1) {
// throwable happened in fix missing store phase
fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
}
logger.error("Error while talking to peer: Remote node: {}, Thread name: {}, Remote replicas: {}, Current active " + "remote replica list: {}, Checkout connection time: {}, Exchange metadata time: {}, Fix missing " + "store key time {}", remoteNode, threadName, replicasToReplicatePerNode, currentReplicaList, checkoutConnectionTimeInMs, exchangeMetadataTimeInMs, fixMissingStoreKeysTimeInMs, e);
replicationMetrics.incrementReplicationErrors(replicatingOverSsl);
if (connectedChannel != null) {
connectionPool.destroyConnection(connectedChannel);
connectedChannel = null;
}
} finally {
long totalReplicationTime = time.milliseconds() - replicationStartTimeInMs;
replicationMetrics.updateTotalReplicationTime(totalReplicationTime, replicatingFromRemoteColo, replicatingOverSsl, datacenterName);
if (connectedChannel != null) {
connectionPool.checkInConnection(connectedChannel);
}
context.stop();
portTypeBasedContext.stop();
}
}
long sleepDurationMs = 0;
if (allCaughtUp && replicationConfig.replicationReplicaThreadIdleSleepDurationMs > 0) {
sleepDurationMs = replicationConfig.replicationReplicaThreadIdleSleepDurationMs;
idleCount.inc();
} else if (threadThrottleDurationMs > 0) {
sleepDurationMs = threadThrottleDurationMs;
throttleCount.inc();
}
if (sleepDurationMs > 0) {
try {
long currentTime = time.milliseconds();
time.sleep(sleepDurationMs);
logger.trace("Replica thread: {} slept for {} ms", threadName, time.milliseconds() - currentTime);
} catch (InterruptedException e) {
logger.error("Received interrupted exception during throttling", e);
}
}
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class ReplicationEngine method updateTotalBytesReadByRemoteReplica.
@Override
public void updateTotalBytesReadByRemoteReplica(PartitionId partitionId, String hostName, String replicaPath, long totalBytesRead) throws StoreException {
RemoteReplicaInfo remoteReplicaInfo = getRemoteReplicaInfo(partitionId, hostName, replicaPath);
if (remoteReplicaInfo != null) {
ReplicaId localReplica = remoteReplicaInfo.getLocalReplicaId();
remoteReplicaInfo.setTotalBytesReadFromLocalStore(totalBytesRead);
// update replication lag in ReplicaSyncUpManager
if (replicaSyncUpManager != null) {
Store localStore = storeManager.getStore(partitionId);
if (localStore.getCurrentState() == ReplicaState.INACTIVE) {
// if local store is in INACTIVE state, that means deactivation process is initiated and in progress on this
// replica. We update SyncUpManager by peer's lag from last PUT offset in local store.
// it's ok if deactivation has completed and concurrent metadata request attempts to update lag of same replica
// again. The reason is, SyncUpManager has a lock to ensure only one request will call onDeactivationComplete()
// method. The local replica should have been removed when another request acquires the lock.
replicaSyncUpManager.updateReplicaLagAndCheckSyncStatus(localReplica, remoteReplicaInfo.getReplicaId(), localStore.getEndPositionOfLastPut() - totalBytesRead, ReplicaState.INACTIVE);
} else if (localStore.getCurrentState() == ReplicaState.OFFLINE && localStore.isDecommissionInProgress()) {
// if local store is in OFFLINE state, we need more info to determine if replica is really in Inactive-To-Offline
// transition. So we check if decommission file is present. If present, we update SyncUpManager by peer's lag
// from end offset in local store.
replicaSyncUpManager.updateReplicaLagAndCheckSyncStatus(localReplica, remoteReplicaInfo.getReplicaId(), localStore.getSizeInBytes() - totalBytesRead, ReplicaState.OFFLINE);
}
}
}
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class ReplicationManager method createRemoteReplicaInfos.
/**
* Create {@link RemoteReplicaInfo}(s) that associates with given local replica.
* @param peerReplicas the list peer replicas of given local replica
* @param replicaId the local replica
* @return list of {@link RemoteReplicaInfo} associated with local replica.
*/
private List<RemoteReplicaInfo> createRemoteReplicaInfos(List<? extends ReplicaId> peerReplicas, ReplicaId replicaId) {
List<RemoteReplicaInfo> remoteReplicaInfos = new ArrayList<>();
PartitionId partition = replicaId.getPartitionId();
Store store = storeManager.getStore(partition);
for (ReplicaId remoteReplica : peerReplicas) {
// We need to ensure that a replica token gets persisted only after the corresponding data in the
// store gets flushed to disk. We use the store flush interval multiplied by a constant factor
// to determine the token flush interval
FindToken findToken = this.tokenHelper.getFindTokenFactoryFromReplicaType(remoteReplica.getReplicaType()).getNewFindToken();
RemoteReplicaInfo remoteReplicaInfo = new RemoteReplicaInfo(remoteReplica, replicaId, store, findToken, TimeUnit.SECONDS.toMillis(storeConfig.storeDataFlushIntervalSeconds) * Replication_Delay_Multiplier, SystemTime.getInstance(), remoteReplica.getDataNodeId().getPortToConnectTo());
replicationMetrics.addMetricsForRemoteReplicaInfo(remoteReplicaInfo, trackPerPartitionLagInMetric);
remoteReplicaInfos.add(remoteReplicaInfo);
}
replicationMetrics.addLagMetricForPartition(partition, replicationConfig.replicationTrackPerPartitionLagFromRemote);
return remoteReplicaInfos;
}
Aggregations