Search in sources :

Example 31 with ReplicaId

use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.

the class SocketNetworkClient method prepareSends.

/**
 * Process the requests in the pendingRequestsQueue. Create {@link ResponseInfo} for those requests that have timed
 * out while waiting in the queue. Then, attempt to prepare {@link NetworkSend}s by checking out connections for
 * the rest of the requests in the queue.
 * @param requestsToDrop the list of correlation IDs representing the requests that can be dropped. If any of these
 *                       correlation IDs match pending requests, those pending requests will not be sent out.
 * @param responseInfoList the list to populate with responseInfos for requests that timed out waiting for
 *                         connections.
 * @return the list of {@link NetworkSend} objects to hand over to the Selector.
 */
private List<NetworkSend> prepareSends(Set<Integer> requestsToDrop, List<ResponseInfo> responseInfoList) {
    List<NetworkSend> sends = new ArrayList<>();
    ListIterator<RequestMetadata> iter = pendingRequests.listIterator();
    /* Drop requests that have waited too long */
    while (iter.hasNext()) {
        RequestMetadata requestMetadata = iter.next();
        if (time.milliseconds() - requestMetadata.requestQueuedAtMs > checkoutTimeoutMs) {
            responseInfoList.add(new ResponseInfo(requestMetadata.requestInfo, NetworkClientErrorCode.ConnectionUnavailable, null));
            requestMetadata.requestInfo.getRequest().release();
            logger.trace("Failing request to host {} port {} due to connection unavailability", requestMetadata.requestInfo.getHost(), requestMetadata.requestInfo.getPort());
            iter.remove();
            if (requestMetadata.pendingConnectionId != null) {
                pendingConnectionsToAssociatedRequests.remove(requestMetadata.pendingConnectionId);
                requestMetadata.pendingConnectionId = null;
            }
            String connId = correlationIdInFlightToConnectionId.get(requestMetadata.requestInfo.getRequest().getCorrelationId());
            if (connId != null) {
                connectionTracker.checkInConnection(connId);
                connectionIdToRequestInFlight.remove(connId);
            }
            networkMetrics.connectionCheckoutTimeoutError.inc();
        } else {
            // Since requests are ordered by time, once the first request that cannot be dropped is found,
            // we let that and the rest be iterated over in the next while loop. Just move the cursor backwards as this
            // element needs to be processed.
            iter.previous();
            break;
        }
    }
    while (iter.hasNext()) {
        RequestMetadata requestMetadata = iter.next();
        try {
            String host = requestMetadata.requestInfo.getHost();
            Port port = requestMetadata.requestInfo.getPort();
            ReplicaId replicaId = requestMetadata.requestInfo.getReplicaId();
            if (replicaId == null) {
                throw new IllegalStateException("ReplicaId in request is null.");
            }
            if (requestsToDrop.contains(requestMetadata.requestInfo.getRequest().getCorrelationId())) {
                responseInfoList.add(new ResponseInfo(requestMetadata.requestInfo, NetworkClientErrorCode.ConnectionUnavailable, null));
                requestMetadata.requestInfo.getRequest().release();
                if (requestMetadata.pendingConnectionId != null) {
                    pendingConnectionsToAssociatedRequests.remove(requestMetadata.pendingConnectionId);
                    requestMetadata.pendingConnectionId = null;
                }
                iter.remove();
            } else {
                String connId = connectionTracker.checkOutConnection(host, port, replicaId.getDataNodeId());
                if (connId == null) {
                    networkMetrics.connectionNotAvailable.inc();
                    if (requestMetadata.pendingConnectionId == null) {
                        if (connectionTracker.mayCreateNewConnection(host, port, replicaId.getDataNodeId())) {
                            connId = connectionTracker.connectAndTrack(this::connect, host, port, replicaId.getDataNodeId());
                            requestMetadata.pendingConnectionId = connId;
                            pendingConnectionsToAssociatedRequests.put(connId, requestMetadata);
                            logger.trace("Initiated a connection to host {} port {} ", host, port);
                        } else {
                            networkMetrics.connectionReachLimit.inc();
                        }
                    }
                } else {
                    if (requestMetadata.pendingConnectionId != null) {
                        pendingConnectionsToAssociatedRequests.remove(requestMetadata.pendingConnectionId);
                        requestMetadata.pendingConnectionId = null;
                    }
                    logger.trace("Connection checkout succeeded for {}:{} with connectionId {} ", host, port, connId);
                    sends.add(new NetworkSend(connId, requestMetadata.requestInfo.getRequest(), null, time));
                    connectionIdToRequestInFlight.put(connId, requestMetadata);
                    correlationIdInFlightToConnectionId.put(requestMetadata.requestInfo.getRequest().getCorrelationId(), connId);
                    iter.remove();
                    requestMetadata.onRequestDequeue();
                }
            }
        } catch (IOException e) {
            networkMetrics.networkClientIOError.inc();
            logger.error("Received exception while checking out a connection", e);
        }
    }
    return sends;
}
Also used : ArrayList(java.util.ArrayList) IOException(java.io.IOException) ReplicaId(com.github.ambry.clustermap.ReplicaId)

Example 32 with ReplicaId

use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.

the class ReplicaThread method exchangeMetadata.

/**
 * Gets all the metadata about messages from the remote replicas since last token. Checks the messages with the local
 * store and finds all the messages that are missing. For the messages that are not missing, updates the delete
 * and ttl state.
 * @param connectedChannel The connected channel that represents a connection to the remote replica
 * @param replicasToReplicatePerNode The information about the replicas that is being replicated
 * @return - List of ExchangeMetadataResponse that contains the set of store keys that are missing from the local
 *           store and are present in the remote replicas and also the new token from the remote replicas
 * @throws IOException
 * @throws ReplicationException
 */
List<ExchangeMetadataResponse> exchangeMetadata(ConnectedChannel connectedChannel, List<RemoteReplicaInfo> replicasToReplicatePerNode) throws IOException, ReplicationException {
    long exchangeMetadataStartTimeInMs = time.milliseconds();
    List<ExchangeMetadataResponse> exchangeMetadataResponseList = new ArrayList<>();
    if (replicasToReplicatePerNode.size() > 0) {
        try {
            DataNodeId remoteNode = replicasToReplicatePerNode.get(0).getReplicaId().getDataNodeId();
            ReplicaMetadataResponse response = getReplicaMetadataResponse(replicasToReplicatePerNode, connectedChannel, remoteNode);
            long startTimeInMs = time.milliseconds();
            Map<StoreKey, StoreKey> remoteKeyToLocalKeyMap = batchConvertReplicaMetadataResponseKeys(response);
            for (int i = 0; i < response.getReplicaMetadataResponseInfoList().size(); i++) {
                RemoteReplicaInfo remoteReplicaInfo = replicasToReplicatePerNode.get(i);
                ReplicaMetadataResponseInfo replicaMetadataResponseInfo = response.getReplicaMetadataResponseInfoList().get(i);
                responseHandler.onEvent(remoteReplicaInfo.getReplicaId(), replicaMetadataResponseInfo.getError());
                if (replicaMetadataResponseInfo.getError() == ServerErrorCode.No_Error) {
                    // Skip stores that were stopped during call to getReplicaMetadataResponse
                    if (!remoteReplicaInfo.getLocalStore().isStarted()) {
                        exchangeMetadataResponseList.add(new ExchangeMetadataResponse(ServerErrorCode.Temporarily_Disabled));
                    } else {
                        try {
                            logger.trace("Remote node: {} Thread name: {} Remote replica: {} Token from remote: {} Replica lag: {} ", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), replicaMetadataResponseInfo.getFindToken(), replicaMetadataResponseInfo.getRemoteReplicaLagInBytes());
                            Set<MessageInfo> remoteMissingStoreMessages = getMissingStoreMessages(replicaMetadataResponseInfo, remoteNode, remoteReplicaInfo);
                            processReplicaMetadataResponse(remoteMissingStoreMessages, replicaMetadataResponseInfo, remoteReplicaInfo, remoteNode, remoteKeyToLocalKeyMap);
                            // Get the converted keys for the missing keys of this replica (to store them along with missing keys in
                            // the exchange metadata response). For leader based replication, these are used during processing
                            // of missing keys for non-leader replica pairs which will come later via leader<->leader replication.
                            Map<StoreKey, StoreKey> remoteKeyToLocalKeySubMap = new HashMap<>();
                            remoteMissingStoreMessages.forEach(remoteMissingStoreMessage -> {
                                StoreKey remoteKey = remoteMissingStoreMessage.getStoreKey();
                                remoteKeyToLocalKeySubMap.put(remoteKey, remoteKeyToLocalKeyMap.get(remoteKey));
                            });
                            ExchangeMetadataResponse exchangeMetadataResponse = new ExchangeMetadataResponse(remoteMissingStoreMessages, replicaMetadataResponseInfo.getFindToken(), replicaMetadataResponseInfo.getRemoteReplicaLagInBytes(), remoteKeyToLocalKeySubMap, time);
                            // update replication lag in ReplicaSyncUpManager
                            if (replicaSyncUpManager != null && remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.BOOTSTRAP) {
                                ReplicaId localReplica = remoteReplicaInfo.getLocalReplicaId();
                                ReplicaId remoteReplica = remoteReplicaInfo.getReplicaId();
                                boolean isSyncCompleted = replicaSyncUpManager.updateReplicaLagAndCheckSyncStatus(localReplica, remoteReplica, exchangeMetadataResponse.localLagFromRemoteInBytes, ReplicaState.STANDBY);
                                // if catchup is completed by this update call, we can complete bootstrap in local store
                                if (isSyncCompleted) {
                                    // complete BOOTSTRAP -> STANDBY transition
                                    remoteReplicaInfo.getLocalStore().setCurrentState(ReplicaState.STANDBY);
                                    remoteReplicaInfo.getLocalStore().completeBootstrap();
                                }
                            }
                            // If remote token has not moved forward, wait for back off time before resending next metadata request
                            if (remoteReplicaInfo.getToken().equals(exchangeMetadataResponse.remoteToken)) {
                                remoteReplicaInfo.setReEnableReplicationTime(time.milliseconds() + replicationConfig.replicationSyncedReplicaBackoffDurationMs);
                                syncedBackOffCount.inc();
                            }
                            // There are no missing keys. We just advance the token
                            if (exchangeMetadataResponse.missingStoreMessages.size() == 0) {
                                remoteReplicaInfo.setToken(exchangeMetadataResponse.remoteToken);
                                remoteReplicaInfo.setLocalLagFromRemoteInBytes(exchangeMetadataResponse.localLagFromRemoteInBytes);
                                logger.trace("Remote node: {} Thread name: {} Remote replica: {} Token after speaking to remote node: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), exchangeMetadataResponse.remoteToken);
                            }
                            replicationMetrics.updateLagMetricForRemoteReplica(remoteReplicaInfo, exchangeMetadataResponse.localLagFromRemoteInBytes);
                            if (replicaMetadataResponseInfo.getMessageInfoList().size() > 0) {
                                replicationMetrics.updateCatchupPointMetricForCloudReplica(remoteReplicaInfo, replicaMetadataResponseInfo.getMessageInfoList().get(replicaMetadataResponseInfo.getMessageInfoList().size() - 1).getOperationTimeMs());
                            }
                            // Add exchangeMetadataResponse to list at the end after operations such as replicaSyncUpManager(if not null)
                            // has completed update, etc. The reason is we may get exceptions in between (for ex: replicaSyncUpManager may
                            // throw exception) and end up adding one more exchangeMetadataResponse associated with same RemoteReplicaInfo.
                            exchangeMetadataResponseList.add(exchangeMetadataResponse);
                        } catch (Exception e) {
                            if (e instanceof StoreException && ((StoreException) e).getErrorCode() == StoreErrorCodes.Store_Not_Started) {
                                // Must have just been stopped, just skip it and move on.
                                logger.info("Local store not started for remote replica: {}", remoteReplicaInfo.getReplicaId());
                                exchangeMetadataResponseList.add(new ExchangeMetadataResponse(ServerErrorCode.Temporarily_Disabled));
                            } else {
                                logger.error("Remote node: {} Thread name: {} Remote replica: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), e);
                                replicationMetrics.updateLocalStoreError(remoteReplicaInfo.getReplicaId());
                                responseHandler.onEvent(remoteReplicaInfo.getReplicaId(), e);
                                exchangeMetadataResponseList.add(new ExchangeMetadataResponse(ServerErrorCode.Unknown_Error));
                            }
                        }
                    }
                } else {
                    replicationMetrics.updateMetadataRequestError(remoteReplicaInfo.getReplicaId());
                    logger.error("Remote node: {} Thread name: {} Remote replica: {} Server error: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), replicaMetadataResponseInfo.getError());
                    exchangeMetadataResponseList.add(new ExchangeMetadataResponse(replicaMetadataResponseInfo.getError()));
                }
                if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                    ExchangeMetadataResponse exchangeMetadataResponse = exchangeMetadataResponseList.get(i);
                    if (exchangeMetadataResponse.serverErrorCode.equals(ServerErrorCode.No_Error)) {
                        // If leader-based replication is enabled, store the meta data exchange received for the remote replica as
                        // standby replicas will not send GET request for the missing store keys and track them from leader <->
                        // leader exchanges and intra-dc replication.
                        remoteReplicaInfo.setExchangeMetadataResponse(new ExchangeMetadataResponse(exchangeMetadataResponse));
                        // It is possible that some of the missing keys found in exchange metadata response are written in parallel
                        // by other replica threads since the time we calculated it. Go through the local store once more and
                        // update missing keys set stored in the exchangeMetadataResponse for the remote replica.
                        refreshMissingStoreMessagesForStandbyReplica(remoteReplicaInfo);
                    }
                }
            }
            long processMetadataResponseTimeInMs = time.milliseconds() - startTimeInMs;
            logger.trace("Remote node: {} Thread name: {} processMetadataResponseTime: {}", remoteNode, threadName, processMetadataResponseTimeInMs);
        } finally {
            long exchangeMetadataTime = time.milliseconds() - exchangeMetadataStartTimeInMs;
            replicationMetrics.updateExchangeMetadataTime(exchangeMetadataTime, replicatingFromRemoteColo, replicatingOverSsl, datacenterName);
        }
    }
    return exchangeMetadataResponseList;
}
Also used : ReplicaMetadataResponse(com.github.ambry.protocol.ReplicaMetadataResponse) ReplicaMetadataResponseInfo(com.github.ambry.protocol.ReplicaMetadataResponseInfo) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) StoreKey(com.github.ambry.store.StoreKey) ReplicaId(com.github.ambry.clustermap.ReplicaId) StoreException(com.github.ambry.store.StoreException) IOException(java.io.IOException) MessageInfo(com.github.ambry.store.MessageInfo) StoreException(com.github.ambry.store.StoreException) DataNodeId(com.github.ambry.clustermap.DataNodeId)

Example 33 with ReplicaId

use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.

the class ReplicaThread method replicate.

/**
 * Do replication for replicas grouped by {@link DataNodeId}
 * A replication cycle between two replicas involves the following steps:
 *    1. Exchange metadata : fetch the metadata of blobs added to remote replica since the last synchronization point
 *    and filter the ones missing in local store.
 *    2. Fetch missing blobs: fetch the missing blobs by issuing GET request to remote replica and write them to
 *       the local store
 *
 *  During cross-colo replication, depending on the {@link ReplicationModelType}, the missing blobs are either fetched
 *  from all remote replicas (if modelType == ALL_TO_ALL) or only fetched for local leader replicas from their remote
 *  leader replicas (if modelType == LEADER_BASED). In the latter case, non-leader replica pairs (leader <-> standby,
 *  standby <-> leader, standby <-> standby) will get their missing blobs from their corresponding leader<->leader
 *  exchanges and intra-dc replication.
 *
 *  Here is a table listing on what is exchanged between local and remote replicas based on their roles
 *  (leader/standby) when {@link ReplicationModelType is LEADER_BASED}.
 *
 *              |   Local Leader    |     Local Standby   |   Remote Leader   |  Remote Standby
 *            -------------------------------------------------------------------------------------
 *     Leader:  |        ---        |  metadata and data  | metadata and data |   metadata only
 *     Standby: | metadata and data |  metadata and data  | metadata only     |   metadata only
 */
public void replicate() {
    boolean allCaughtUp = true;
    Map<DataNodeId, List<RemoteReplicaInfo>> dataNodeToRemoteReplicaInfo = getRemoteReplicaInfos();
    logger.trace("Replicating from {} DataNodes.", replicasToReplicateGroupedByNode.size());
    for (Map.Entry<DataNodeId, List<RemoteReplicaInfo>> entry : dataNodeToRemoteReplicaInfo.entrySet()) {
        DataNodeId remoteNode = entry.getKey();
        if (!running) {
            break;
        }
        List<RemoteReplicaInfo> replicasToReplicatePerNode = entry.getValue();
        Timer.Context context = null;
        Timer.Context portTypeBasedContext = null;
        if (replicatingFromRemoteColo) {
            context = replicationMetrics.interColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            if (replicatingOverSsl) {
                portTypeBasedContext = replicationMetrics.sslInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            } else {
                portTypeBasedContext = replicationMetrics.plainTextInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            }
        } else {
            context = replicationMetrics.intraColoReplicationLatency.time();
            if (replicatingOverSsl) {
                portTypeBasedContext = replicationMetrics.sslIntraColoReplicationLatency.time();
            } else {
                portTypeBasedContext = replicationMetrics.plainTextIntraColoReplicationLatency.time();
            }
        }
        ConnectedChannel connectedChannel = null;
        long checkoutConnectionTimeInMs = -1;
        long exchangeMetadataTimeInMs = -1;
        long fixMissingStoreKeysTimeInMs = -1;
        long replicationStartTimeInMs = time.milliseconds();
        long startTimeInMs = replicationStartTimeInMs;
        // Get a list of active replicas that needs be included for this replication cycle
        List<RemoteReplicaInfo> activeReplicasPerNode = new ArrayList<>();
        List<RemoteReplicaInfo> standbyReplicasWithNoProgress = new ArrayList<>();
        for (RemoteReplicaInfo remoteReplicaInfo : replicasToReplicatePerNode) {
            ReplicaId replicaId = remoteReplicaInfo.getReplicaId();
            boolean inBackoff = time.milliseconds() < remoteReplicaInfo.getReEnableReplicationTime();
            if (replicaId.isDown() || inBackoff || remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE || replicationDisabledPartitions.contains(replicaId.getPartitionId())) {
                logger.debug("Skipping replication on replica {} because one of following conditions is true: remote replica is down " + "= {}; in backoff = {}; local store is offline = {}; replication is disabled = {}.", replicaId.getPartitionId().toPathString(), replicaId.isDown(), inBackoff, remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE, replicationDisabledPartitions.contains(replicaId.getPartitionId()));
                continue;
            }
            if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                // check if all missing keys for standby replicas from previous replication cycle are now obtained
                // via leader replica. If we still have missing keys, don't include them in current replication cycle
                // to avoid sending duplicate metadata requests since their token wouldn't have advanced.
                processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
                if (containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo)) {
                    standbyReplicasWithNoProgress.add(remoteReplicaInfo);
                    continue;
                }
            }
            activeReplicasPerNode.add(remoteReplicaInfo);
        }
        logger.trace("Replicating from {} RemoteReplicaInfos.", activeReplicasPerNode.size());
        // use a variable to track current replica list to replicate (for logging purpose)
        List<RemoteReplicaInfo> currentReplicaList = activeReplicasPerNode;
        try {
            if (activeReplicasPerNode.size() > 0) {
                allCaughtUp = false;
                // if maxReplicaCountPerRequest > 0, split remote replicas on same node into multiple lists; otherwise there is
                // no limit.
                List<List<RemoteReplicaInfo>> activeReplicaSubLists = maxReplicaCountPerRequest > 0 ? Utils.partitionList(activeReplicasPerNode, maxReplicaCountPerRequest) : Collections.singletonList(activeReplicasPerNode);
                startTimeInMs = time.milliseconds();
                connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), activeReplicasPerNode.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
                checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
                // we checkout ConnectedChannel once and replicate remote replicas in batch via same ConnectedChannel
                for (List<RemoteReplicaInfo> replicaSubList : activeReplicaSubLists) {
                    exchangeMetadataTimeInMs = -1;
                    fixMissingStoreKeysTimeInMs = -1;
                    currentReplicaList = replicaSubList;
                    logger.debug("Exchanging metadata with {} remote replicas on {}", currentReplicaList.size(), remoteNode);
                    startTimeInMs = time.milliseconds();
                    List<ExchangeMetadataResponse> exchangeMetadataResponseList = exchangeMetadata(connectedChannel, replicaSubList);
                    exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
                    if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                        // If leader based replication is enabled and we are replicating from remote colo, fetch the missing blobs
                        // only for local leader replicas from their corresponding peer leader replicas (Leader <-> Leader).
                        // Non-leader replica pairs (standby <-> leaders, leader <-> standby, standby <-> standby) will get their
                        // missing blobs from their leader pair exchanges and intra-dc replication.
                        List<RemoteReplicaInfo> leaderReplicaList = new ArrayList<>();
                        List<ExchangeMetadataResponse> exchangeMetadataResponseListForLeaderReplicas = new ArrayList<>();
                        getLeaderReplicaList(replicaSubList, exchangeMetadataResponseList, leaderReplicaList, exchangeMetadataResponseListForLeaderReplicas);
                        replicaSubList = leaderReplicaList;
                        exchangeMetadataResponseList = exchangeMetadataResponseListForLeaderReplicas;
                    }
                    if (replicaSubList.size() > 0) {
                        startTimeInMs = time.milliseconds();
                        fixMissingStoreKeys(connectedChannel, replicaSubList, exchangeMetadataResponseList, false);
                        fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
                    }
                }
            }
            if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                // Get a list of blocked standby replicas whose missing keys haven't arrived for long time.
                // Use case: In leader-based cross colo replication, standby replicas don't send GET requests for missing keys
                // found in metadata exchange and expect them to come via leader <-> leader replication.
                // This is a safety condition to ensure that standby replicas are not stuck waiting for the keys to come from leader
                // by fetching the missing keys themselves.
                // TODO: As an improvement to this, we can first fetch missing blobs from local leader/other replicas in intra-dc first.
                // TODO: If the result to fetch a blob from local dc is Blob_Not_Found, then we can fetch it from replicas in remote datacenter.
                // This will involve co-ordination between replica threads containing replicas of same partition.
                List<RemoteReplicaInfo> standbyReplicasTimedOutOnNoProgress = getRemoteStandbyReplicasTimedOutOnNoProgress(standbyReplicasWithNoProgress);
                if (standbyReplicasTimedOutOnNoProgress.size() > 0) {
                    allCaughtUp = false;
                    currentReplicaList = standbyReplicasTimedOutOnNoProgress;
                    if (connectedChannel == null) {
                        checkoutConnectionTimeInMs = -1;
                        startTimeInMs = time.milliseconds();
                        connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), standbyReplicasTimedOutOnNoProgress.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
                        checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
                    }
                    List<ExchangeMetadataResponse> exchangeMetadataResponseListForBlockedReplicas = standbyReplicasTimedOutOnNoProgress.stream().map(remoteReplicaInfo -> new ExchangeMetadataResponse(remoteReplicaInfo.getExchangeMetadataResponse())).collect(Collectors.toList());
                    // Convert (and cache) the remote keys that are being fetched as the StoreKeyConverter would have cleared
                    // these keys from its cache while it is replicating with other replicas before time out happened for these standby replicas.
                    List<StoreKey> storeKeysToConvert = exchangeMetadataResponseListForBlockedReplicas.stream().map(ExchangeMetadataResponse::getMissingStoreKeys).flatMap(Collection::stream).collect(Collectors.toList());
                    convertStoreKeys(storeKeysToConvert);
                    exchangeMetadataTimeInMs = 0;
                    fixMissingStoreKeysTimeInMs = -1;
                    logger.debug("Sending GET request to fetch missing keys for standby remote replicas {} timed out on no progress", currentReplicaList);
                    startTimeInMs = time.milliseconds();
                    fixMissingStoreKeys(connectedChannel, standbyReplicasTimedOutOnNoProgress, exchangeMetadataResponseListForBlockedReplicas, true);
                    fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
                }
            }
        } catch (Throwable e) {
            if (checkoutConnectionTimeInMs == -1) {
                // throwable happened in checkout connection phase
                checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
                responseHandler.onEvent(currentReplicaList.get(0).getReplicaId(), e);
            } else if (exchangeMetadataTimeInMs == -1) {
                // throwable happened in exchange metadata phase
                exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
            } else if (fixMissingStoreKeysTimeInMs == -1) {
                // throwable happened in fix missing store phase
                fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
            }
            logger.error("Error while talking to peer: Remote node: {}, Thread name: {}, Remote replicas: {}, Current active " + "remote replica list: {}, Checkout connection time: {}, Exchange metadata time: {}, Fix missing " + "store key time {}", remoteNode, threadName, replicasToReplicatePerNode, currentReplicaList, checkoutConnectionTimeInMs, exchangeMetadataTimeInMs, fixMissingStoreKeysTimeInMs, e);
            replicationMetrics.incrementReplicationErrors(replicatingOverSsl);
            if (connectedChannel != null) {
                connectionPool.destroyConnection(connectedChannel);
                connectedChannel = null;
            }
        } finally {
            long totalReplicationTime = time.milliseconds() - replicationStartTimeInMs;
            replicationMetrics.updateTotalReplicationTime(totalReplicationTime, replicatingFromRemoteColo, replicatingOverSsl, datacenterName);
            if (connectedChannel != null) {
                connectionPool.checkInConnection(connectedChannel);
            }
            context.stop();
            portTypeBasedContext.stop();
        }
    }
    long sleepDurationMs = 0;
    if (allCaughtUp && replicationConfig.replicationReplicaThreadIdleSleepDurationMs > 0) {
        sleepDurationMs = replicationConfig.replicationReplicaThreadIdleSleepDurationMs;
        idleCount.inc();
    } else if (threadThrottleDurationMs > 0) {
        sleepDurationMs = threadThrottleDurationMs;
        throttleCount.inc();
    }
    if (sleepDurationMs > 0) {
        try {
            long currentTime = time.milliseconds();
            time.sleep(sleepDurationMs);
            logger.trace("Replica thread: {} slept for {} ms", threadName, time.milliseconds() - currentTime);
        } catch (InterruptedException e) {
            logger.error("Received interrupted exception during throttling", e);
        }
    }
}
Also used : GetOption(com.github.ambry.protocol.GetOption) StoreKeyConverter(com.github.ambry.store.StoreKeyConverter) DataNodeId(com.github.ambry.clustermap.DataNodeId) LoggerFactory(org.slf4j.LoggerFactory) MessageFormatWriteSet(com.github.ambry.messageformat.MessageFormatWriteSet) StoreErrorCodes(com.github.ambry.store.StoreErrorCodes) GetResponse(com.github.ambry.protocol.GetResponse) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Counter(com.codahale.metrics.Counter) ReplicaMetadataRequest(com.github.ambry.protocol.ReplicaMetadataRequest) GetRequest(com.github.ambry.protocol.GetRequest) ReplicationConfig(com.github.ambry.config.ReplicationConfig) NotificationSystem(com.github.ambry.notification.NotificationSystem) ReplicaSyncUpManager(com.github.ambry.clustermap.ReplicaSyncUpManager) PartitionResponseInfo(com.github.ambry.protocol.PartitionResponseInfo) Predicate(java.util.function.Predicate) Collection(java.util.Collection) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) Collectors(java.util.stream.Collectors) ConnectedChannel(com.github.ambry.network.ConnectedChannel) ReplicaMetadataRequestInfo(com.github.ambry.protocol.ReplicaMetadataRequestInfo) CountDownLatch(java.util.concurrent.CountDownLatch) StoreKey(com.github.ambry.store.StoreKey) List(java.util.List) ReplicaMetadataResponse(com.github.ambry.protocol.ReplicaMetadataResponse) MessageFormatFlags(com.github.ambry.messageformat.MessageFormatFlags) UpdateType(com.github.ambry.notification.UpdateType) Timer(com.codahale.metrics.Timer) MessageSievingInputStream(com.github.ambry.messageformat.MessageSievingInputStream) PartitionId(com.github.ambry.clustermap.PartitionId) BlobId(com.github.ambry.commons.BlobId) ResponseHandler(com.github.ambry.commons.ResponseHandler) PartitionRequestInfo(com.github.ambry.protocol.PartitionRequestInfo) BlobReplicaSourceType(com.github.ambry.notification.BlobReplicaSourceType) ServerErrorCode(com.github.ambry.server.ServerErrorCode) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) NettyByteBufDataInputStream(com.github.ambry.utils.NettyByteBufDataInputStream) HashSet(java.util.HashSet) Transformer(com.github.ambry.store.Transformer) ChannelOutput(com.github.ambry.network.ChannelOutput) StoreException(com.github.ambry.store.StoreException) ReplicaMetadataResponseInfo(com.github.ambry.protocol.ReplicaMetadataResponseInfo) CloudDataNode(com.github.ambry.clustermap.CloudDataNode) Time(com.github.ambry.utils.Time) ReplicaState(com.github.ambry.clustermap.ReplicaState) MetricRegistry(com.codahale.metrics.MetricRegistry) Logger(org.slf4j.Logger) ReentrantLock(java.util.concurrent.locks.ReentrantLock) ConnectionPool(com.github.ambry.network.ConnectionPool) ClusterMap(com.github.ambry.clustermap.ClusterMap) IOException(java.io.IOException) Condition(java.util.concurrent.locks.Condition) MessageInfo(com.github.ambry.store.MessageInfo) ReplicaId(com.github.ambry.clustermap.ReplicaId) BlobStore(com.github.ambry.store.BlobStore) Collections(java.util.Collections) ArrayList(java.util.ArrayList) ConnectedChannel(com.github.ambry.network.ConnectedChannel) StoreKey(com.github.ambry.store.StoreKey) ReplicaId(com.github.ambry.clustermap.ReplicaId) Timer(com.codahale.metrics.Timer) List(java.util.List) ArrayList(java.util.ArrayList) DataNodeId(com.github.ambry.clustermap.DataNodeId) Map(java.util.Map) HashMap(java.util.HashMap) ClusterMap(com.github.ambry.clustermap.ClusterMap)

Example 34 with ReplicaId

use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.

the class ReplicationEngine method updateTotalBytesReadByRemoteReplica.

@Override
public void updateTotalBytesReadByRemoteReplica(PartitionId partitionId, String hostName, String replicaPath, long totalBytesRead) throws StoreException {
    RemoteReplicaInfo remoteReplicaInfo = getRemoteReplicaInfo(partitionId, hostName, replicaPath);
    if (remoteReplicaInfo != null) {
        ReplicaId localReplica = remoteReplicaInfo.getLocalReplicaId();
        remoteReplicaInfo.setTotalBytesReadFromLocalStore(totalBytesRead);
        // update replication lag in ReplicaSyncUpManager
        if (replicaSyncUpManager != null) {
            Store localStore = storeManager.getStore(partitionId);
            if (localStore.getCurrentState() == ReplicaState.INACTIVE) {
                // if local store is in INACTIVE state, that means deactivation process is initiated and in progress on this
                // replica. We update SyncUpManager by peer's lag from last PUT offset in local store.
                // it's ok if deactivation has completed and concurrent metadata request attempts to update lag of same replica
                // again. The reason is, SyncUpManager has a lock to ensure only one request will call onDeactivationComplete()
                // method. The local replica should have been removed when another request acquires the lock.
                replicaSyncUpManager.updateReplicaLagAndCheckSyncStatus(localReplica, remoteReplicaInfo.getReplicaId(), localStore.getEndPositionOfLastPut() - totalBytesRead, ReplicaState.INACTIVE);
            } else if (localStore.getCurrentState() == ReplicaState.OFFLINE && localStore.isDecommissionInProgress()) {
                // if local store is in OFFLINE state, we need more info to determine if replica is really in Inactive-To-Offline
                // transition. So we check if decommission file is present. If present, we update SyncUpManager by peer's lag
                // from end offset in local store.
                replicaSyncUpManager.updateReplicaLagAndCheckSyncStatus(localReplica, remoteReplicaInfo.getReplicaId(), localStore.getSizeInBytes() - totalBytesRead, ReplicaState.OFFLINE);
            }
        }
    }
}
Also used : Store(com.github.ambry.store.Store) ReplicaId(com.github.ambry.clustermap.ReplicaId)

Example 35 with ReplicaId

use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.

the class ReplicationManager method createRemoteReplicaInfos.

/**
 * Create {@link RemoteReplicaInfo}(s) that associates with given local replica.
 * @param peerReplicas the list peer replicas of given local replica
 * @param replicaId the local replica
 * @return list of {@link RemoteReplicaInfo} associated with local replica.
 */
private List<RemoteReplicaInfo> createRemoteReplicaInfos(List<? extends ReplicaId> peerReplicas, ReplicaId replicaId) {
    List<RemoteReplicaInfo> remoteReplicaInfos = new ArrayList<>();
    PartitionId partition = replicaId.getPartitionId();
    Store store = storeManager.getStore(partition);
    for (ReplicaId remoteReplica : peerReplicas) {
        // We need to ensure that a replica token gets persisted only after the corresponding data in the
        // store gets flushed to disk. We use the store flush interval multiplied by a constant factor
        // to determine the token flush interval
        FindToken findToken = this.tokenHelper.getFindTokenFactoryFromReplicaType(remoteReplica.getReplicaType()).getNewFindToken();
        RemoteReplicaInfo remoteReplicaInfo = new RemoteReplicaInfo(remoteReplica, replicaId, store, findToken, TimeUnit.SECONDS.toMillis(storeConfig.storeDataFlushIntervalSeconds) * Replication_Delay_Multiplier, SystemTime.getInstance(), remoteReplica.getDataNodeId().getPortToConnectTo());
        replicationMetrics.addMetricsForRemoteReplicaInfo(remoteReplicaInfo, trackPerPartitionLagInMetric);
        remoteReplicaInfos.add(remoteReplicaInfo);
    }
    replicationMetrics.addLagMetricForPartition(partition, replicationConfig.replicationTrackPerPartitionLagFromRemote);
    return remoteReplicaInfos;
}
Also used : ArrayList(java.util.ArrayList) Store(com.github.ambry.store.Store) PartitionId(com.github.ambry.clustermap.PartitionId) ReplicaId(com.github.ambry.clustermap.ReplicaId)

Aggregations

ReplicaId (com.github.ambry.clustermap.ReplicaId)147 Test (org.junit.Test)83 PartitionId (com.github.ambry.clustermap.PartitionId)68 MockPartitionId (com.github.ambry.clustermap.MockPartitionId)60 MockReplicaId (com.github.ambry.clustermap.MockReplicaId)57 ArrayList (java.util.ArrayList)55 MockDataNodeId (com.github.ambry.clustermap.MockDataNodeId)43 DataNodeId (com.github.ambry.clustermap.DataNodeId)32 MockClusterMap (com.github.ambry.clustermap.MockClusterMap)31 MetricRegistry (com.codahale.metrics.MetricRegistry)29 HashMap (java.util.HashMap)28 HashSet (java.util.HashSet)25 ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)24 VerifiableProperties (com.github.ambry.config.VerifiableProperties)24 BlobStoreTest (com.github.ambry.store.BlobStoreTest)24 File (java.io.File)24 List (java.util.List)21 Map (java.util.Map)21 Port (com.github.ambry.network.Port)20 Properties (java.util.Properties)20