Search in sources :

Example 31 with StoreKey

use of com.github.ambry.store.StoreKey in project ambry by linkedin.

the class ReplicaThread method getMissingStoreMessages.

/**
 * Gets the missing store messages by comparing the messages from the remote node
 * @param replicaMetadataResponseInfo The response that contains the messages from the remote node
 * @param remoteNode The remote node from which replication needs to happen
 * @param remoteReplicaInfo The remote replica that contains information about the remote replica id
 * @return List of store messages that are missing from the local store
 * @throws StoreException if store error (usually IOError) occurs when getting missing keys.
 */
Set<MessageInfo> getMissingStoreMessages(ReplicaMetadataResponseInfo replicaMetadataResponseInfo, DataNodeId remoteNode, RemoteReplicaInfo remoteReplicaInfo) throws StoreException {
    long startTime = time.milliseconds();
    List<MessageInfo> messageInfoList = replicaMetadataResponseInfo.getMessageInfoList();
    Map<MessageInfo, StoreKey> remoteMessageToConvertedKeyNonNull = new HashMap<>();
    for (MessageInfo messageInfo : messageInfoList) {
        StoreKey storeKey = messageInfo.getStoreKey();
        logger.trace("Remote node: {} Thread name: {} Remote replica: {} Key from remote: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), storeKey);
        StoreKey convertedKey = storeKeyConverter.getConverted(storeKey);
        if (skipPredicate == null) {
            logger.debug("SkipPredicate is null");
        }
        if (convertedKey != null && (!replicationConfig.replicationContainerDeletionEnabled || skipPredicate == null || !skipPredicate.test(messageInfo))) {
            remoteMessageToConvertedKeyNonNull.put(messageInfo, convertedKey);
        }
    }
    Set<StoreKey> convertedMissingStoreKeys = remoteReplicaInfo.getLocalStore().findMissingKeys(new ArrayList<>(remoteMessageToConvertedKeyNonNull.values()));
    Set<MessageInfo> missingRemoteMessages = new HashSet<>();
    remoteMessageToConvertedKeyNonNull.forEach((messageInfo, convertedKey) -> {
        if (convertedMissingStoreKeys.contains(convertedKey)) {
            logger.trace("Remote node: {} Thread name: {} Remote replica: {} Key missing id (converted): {} Key missing id (remote): {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), convertedKey, messageInfo.getStoreKey());
            missingRemoteMessages.add(messageInfo);
        }
    });
    if (messageInfoList.size() != 0 && missingRemoteMessages.size() == 0) {
        // Catching up
        replicationMetrics.allResponsedKeysExist.inc();
    }
    replicationMetrics.updateCheckMissingKeysTime(time.milliseconds() - startTime, replicatingFromRemoteColo, datacenterName);
    return missingRemoteMessages;
}
Also used : HashMap(java.util.HashMap) StoreKey(com.github.ambry.store.StoreKey) MessageInfo(com.github.ambry.store.MessageInfo) HashSet(java.util.HashSet)

Example 32 with StoreKey

use of com.github.ambry.store.StoreKey in project ambry by linkedin.

the class ReplicaThread method exchangeMetadata.

/**
 * Gets all the metadata about messages from the remote replicas since last token. Checks the messages with the local
 * store and finds all the messages that are missing. For the messages that are not missing, updates the delete
 * and ttl state.
 * @param connectedChannel The connected channel that represents a connection to the remote replica
 * @param replicasToReplicatePerNode The information about the replicas that is being replicated
 * @return - List of ExchangeMetadataResponse that contains the set of store keys that are missing from the local
 *           store and are present in the remote replicas and also the new token from the remote replicas
 * @throws IOException
 * @throws ReplicationException
 */
List<ExchangeMetadataResponse> exchangeMetadata(ConnectedChannel connectedChannel, List<RemoteReplicaInfo> replicasToReplicatePerNode) throws IOException, ReplicationException {
    long exchangeMetadataStartTimeInMs = time.milliseconds();
    List<ExchangeMetadataResponse> exchangeMetadataResponseList = new ArrayList<>();
    if (replicasToReplicatePerNode.size() > 0) {
        try {
            DataNodeId remoteNode = replicasToReplicatePerNode.get(0).getReplicaId().getDataNodeId();
            ReplicaMetadataResponse response = getReplicaMetadataResponse(replicasToReplicatePerNode, connectedChannel, remoteNode);
            long startTimeInMs = time.milliseconds();
            Map<StoreKey, StoreKey> remoteKeyToLocalKeyMap = batchConvertReplicaMetadataResponseKeys(response);
            for (int i = 0; i < response.getReplicaMetadataResponseInfoList().size(); i++) {
                RemoteReplicaInfo remoteReplicaInfo = replicasToReplicatePerNode.get(i);
                ReplicaMetadataResponseInfo replicaMetadataResponseInfo = response.getReplicaMetadataResponseInfoList().get(i);
                responseHandler.onEvent(remoteReplicaInfo.getReplicaId(), replicaMetadataResponseInfo.getError());
                if (replicaMetadataResponseInfo.getError() == ServerErrorCode.No_Error) {
                    // Skip stores that were stopped during call to getReplicaMetadataResponse
                    if (!remoteReplicaInfo.getLocalStore().isStarted()) {
                        exchangeMetadataResponseList.add(new ExchangeMetadataResponse(ServerErrorCode.Temporarily_Disabled));
                    } else {
                        try {
                            logger.trace("Remote node: {} Thread name: {} Remote replica: {} Token from remote: {} Replica lag: {} ", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), replicaMetadataResponseInfo.getFindToken(), replicaMetadataResponseInfo.getRemoteReplicaLagInBytes());
                            Set<MessageInfo> remoteMissingStoreMessages = getMissingStoreMessages(replicaMetadataResponseInfo, remoteNode, remoteReplicaInfo);
                            processReplicaMetadataResponse(remoteMissingStoreMessages, replicaMetadataResponseInfo, remoteReplicaInfo, remoteNode, remoteKeyToLocalKeyMap);
                            // Get the converted keys for the missing keys of this replica (to store them along with missing keys in
                            // the exchange metadata response). For leader based replication, these are used during processing
                            // of missing keys for non-leader replica pairs which will come later via leader<->leader replication.
                            Map<StoreKey, StoreKey> remoteKeyToLocalKeySubMap = new HashMap<>();
                            remoteMissingStoreMessages.forEach(remoteMissingStoreMessage -> {
                                StoreKey remoteKey = remoteMissingStoreMessage.getStoreKey();
                                remoteKeyToLocalKeySubMap.put(remoteKey, remoteKeyToLocalKeyMap.get(remoteKey));
                            });
                            ExchangeMetadataResponse exchangeMetadataResponse = new ExchangeMetadataResponse(remoteMissingStoreMessages, replicaMetadataResponseInfo.getFindToken(), replicaMetadataResponseInfo.getRemoteReplicaLagInBytes(), remoteKeyToLocalKeySubMap, time);
                            // update replication lag in ReplicaSyncUpManager
                            if (replicaSyncUpManager != null && remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.BOOTSTRAP) {
                                ReplicaId localReplica = remoteReplicaInfo.getLocalReplicaId();
                                ReplicaId remoteReplica = remoteReplicaInfo.getReplicaId();
                                boolean isSyncCompleted = replicaSyncUpManager.updateReplicaLagAndCheckSyncStatus(localReplica, remoteReplica, exchangeMetadataResponse.localLagFromRemoteInBytes, ReplicaState.STANDBY);
                                // if catchup is completed by this update call, we can complete bootstrap in local store
                                if (isSyncCompleted) {
                                    // complete BOOTSTRAP -> STANDBY transition
                                    remoteReplicaInfo.getLocalStore().setCurrentState(ReplicaState.STANDBY);
                                    remoteReplicaInfo.getLocalStore().completeBootstrap();
                                }
                            }
                            // If remote token has not moved forward, wait for back off time before resending next metadata request
                            if (remoteReplicaInfo.getToken().equals(exchangeMetadataResponse.remoteToken)) {
                                remoteReplicaInfo.setReEnableReplicationTime(time.milliseconds() + replicationConfig.replicationSyncedReplicaBackoffDurationMs);
                                syncedBackOffCount.inc();
                            }
                            // There are no missing keys. We just advance the token
                            if (exchangeMetadataResponse.missingStoreMessages.size() == 0) {
                                remoteReplicaInfo.setToken(exchangeMetadataResponse.remoteToken);
                                remoteReplicaInfo.setLocalLagFromRemoteInBytes(exchangeMetadataResponse.localLagFromRemoteInBytes);
                                logger.trace("Remote node: {} Thread name: {} Remote replica: {} Token after speaking to remote node: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), exchangeMetadataResponse.remoteToken);
                            }
                            replicationMetrics.updateLagMetricForRemoteReplica(remoteReplicaInfo, exchangeMetadataResponse.localLagFromRemoteInBytes);
                            if (replicaMetadataResponseInfo.getMessageInfoList().size() > 0) {
                                replicationMetrics.updateCatchupPointMetricForCloudReplica(remoteReplicaInfo, replicaMetadataResponseInfo.getMessageInfoList().get(replicaMetadataResponseInfo.getMessageInfoList().size() - 1).getOperationTimeMs());
                            }
                            // Add exchangeMetadataResponse to list at the end after operations such as replicaSyncUpManager(if not null)
                            // has completed update, etc. The reason is we may get exceptions in between (for ex: replicaSyncUpManager may
                            // throw exception) and end up adding one more exchangeMetadataResponse associated with same RemoteReplicaInfo.
                            exchangeMetadataResponseList.add(exchangeMetadataResponse);
                        } catch (Exception e) {
                            if (e instanceof StoreException && ((StoreException) e).getErrorCode() == StoreErrorCodes.Store_Not_Started) {
                                // Must have just been stopped, just skip it and move on.
                                logger.info("Local store not started for remote replica: {}", remoteReplicaInfo.getReplicaId());
                                exchangeMetadataResponseList.add(new ExchangeMetadataResponse(ServerErrorCode.Temporarily_Disabled));
                            } else {
                                logger.error("Remote node: {} Thread name: {} Remote replica: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), e);
                                replicationMetrics.updateLocalStoreError(remoteReplicaInfo.getReplicaId());
                                responseHandler.onEvent(remoteReplicaInfo.getReplicaId(), e);
                                exchangeMetadataResponseList.add(new ExchangeMetadataResponse(ServerErrorCode.Unknown_Error));
                            }
                        }
                    }
                } else {
                    replicationMetrics.updateMetadataRequestError(remoteReplicaInfo.getReplicaId());
                    logger.error("Remote node: {} Thread name: {} Remote replica: {} Server error: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), replicaMetadataResponseInfo.getError());
                    exchangeMetadataResponseList.add(new ExchangeMetadataResponse(replicaMetadataResponseInfo.getError()));
                }
                if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                    ExchangeMetadataResponse exchangeMetadataResponse = exchangeMetadataResponseList.get(i);
                    if (exchangeMetadataResponse.serverErrorCode.equals(ServerErrorCode.No_Error)) {
                        // If leader-based replication is enabled, store the meta data exchange received for the remote replica as
                        // standby replicas will not send GET request for the missing store keys and track them from leader <->
                        // leader exchanges and intra-dc replication.
                        remoteReplicaInfo.setExchangeMetadataResponse(new ExchangeMetadataResponse(exchangeMetadataResponse));
                        // It is possible that some of the missing keys found in exchange metadata response are written in parallel
                        // by other replica threads since the time we calculated it. Go through the local store once more and
                        // update missing keys set stored in the exchangeMetadataResponse for the remote replica.
                        refreshMissingStoreMessagesForStandbyReplica(remoteReplicaInfo);
                    }
                }
            }
            long processMetadataResponseTimeInMs = time.milliseconds() - startTimeInMs;
            logger.trace("Remote node: {} Thread name: {} processMetadataResponseTime: {}", remoteNode, threadName, processMetadataResponseTimeInMs);
        } finally {
            long exchangeMetadataTime = time.milliseconds() - exchangeMetadataStartTimeInMs;
            replicationMetrics.updateExchangeMetadataTime(exchangeMetadataTime, replicatingFromRemoteColo, replicatingOverSsl, datacenterName);
        }
    }
    return exchangeMetadataResponseList;
}
Also used : ReplicaMetadataResponse(com.github.ambry.protocol.ReplicaMetadataResponse) ReplicaMetadataResponseInfo(com.github.ambry.protocol.ReplicaMetadataResponseInfo) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) StoreKey(com.github.ambry.store.StoreKey) ReplicaId(com.github.ambry.clustermap.ReplicaId) StoreException(com.github.ambry.store.StoreException) IOException(java.io.IOException) MessageInfo(com.github.ambry.store.MessageInfo) StoreException(com.github.ambry.store.StoreException) DataNodeId(com.github.ambry.clustermap.DataNodeId)

Example 33 with StoreKey

use of com.github.ambry.store.StoreKey in project ambry by linkedin.

the class ReplicaThread method refreshMissingStoreMessagesForStandbyReplica.

/**
 * Refreshes missing messages found in the exchange metadata response for the input replica by checking in the local store again.
 * @param remoteReplicaInfo remote replica information
 */
private void refreshMissingStoreMessagesForStandbyReplica(RemoteReplicaInfo remoteReplicaInfo) {
    ExchangeMetadataResponse exchangeMetadataResponse = remoteReplicaInfo.getExchangeMetadataResponse();
    Set<MessageInfo> missingStoreMessages = exchangeMetadataResponse.getMissingStoreMessages();
    if (!missingStoreMessages.isEmpty()) {
        Set<MessageInfo> missingStoreMessagesFoundInStore = new HashSet<>();
        try {
            // construct map of message info -> converted non-null local key
            Map<StoreKey, StoreKey> remoteKeyToLocalKeyMap = exchangeMetadataResponse.remoteKeyToLocalKeyMap;
            Map<MessageInfo, StoreKey> remoteMessageToConvertedKeyNonNull = new HashMap<>();
            for (MessageInfo messageInfo : missingStoreMessages) {
                StoreKey convertedKey = remoteKeyToLocalKeyMap.get(messageInfo.getStoreKey());
                if (convertedKey != null) {
                    remoteMessageToConvertedKeyNonNull.put(messageInfo, convertedKey);
                }
            }
            // Find the set of store keys that are still missing in the store
            Set<StoreKey> convertedMissingStoreKeys = remoteReplicaInfo.getLocalStore().findMissingKeys(new ArrayList<>(remoteMessageToConvertedKeyNonNull.values()));
            // Filter the remote messages whose keys are now found in store, i.e. not present in convertedMissingStoreKeys set.
            remoteMessageToConvertedKeyNonNull.forEach((messageInfo, convertedKey) -> {
                if (!convertedMissingStoreKeys.contains(convertedKey)) {
                    missingStoreMessagesFoundInStore.add(messageInfo);
                }
            });
            // update the missing store messages being tracked for this replica
            exchangeMetadataResponse.removeMissingStoreMessages(missingStoreMessagesFoundInStore);
        } catch (StoreException e) {
            logger.error("Exception occurred while checking for missing keys in local store for partition {} and Remote replica: {}", remoteReplicaInfo.getReplicaId().getPartitionId().toPathString(), remoteReplicaInfo.getReplicaId(), e);
            // reset stored metadata response so that metadata request is sent again for this replica
            remoteReplicaInfo.setExchangeMetadataResponse(new ExchangeMetadataResponse(ServerErrorCode.No_Error));
        }
    }
}
Also used : HashMap(java.util.HashMap) StoreKey(com.github.ambry.store.StoreKey) MessageInfo(com.github.ambry.store.MessageInfo) HashSet(java.util.HashSet) StoreException(com.github.ambry.store.StoreException)

Example 34 with StoreKey

use of com.github.ambry.store.StoreKey in project ambry by linkedin.

the class ReplicaThread method replicate.

/**
 * Do replication for replicas grouped by {@link DataNodeId}
 * A replication cycle between two replicas involves the following steps:
 *    1. Exchange metadata : fetch the metadata of blobs added to remote replica since the last synchronization point
 *    and filter the ones missing in local store.
 *    2. Fetch missing blobs: fetch the missing blobs by issuing GET request to remote replica and write them to
 *       the local store
 *
 *  During cross-colo replication, depending on the {@link ReplicationModelType}, the missing blobs are either fetched
 *  from all remote replicas (if modelType == ALL_TO_ALL) or only fetched for local leader replicas from their remote
 *  leader replicas (if modelType == LEADER_BASED). In the latter case, non-leader replica pairs (leader <-> standby,
 *  standby <-> leader, standby <-> standby) will get their missing blobs from their corresponding leader<->leader
 *  exchanges and intra-dc replication.
 *
 *  Here is a table listing on what is exchanged between local and remote replicas based on their roles
 *  (leader/standby) when {@link ReplicationModelType is LEADER_BASED}.
 *
 *              |   Local Leader    |     Local Standby   |   Remote Leader   |  Remote Standby
 *            -------------------------------------------------------------------------------------
 *     Leader:  |        ---        |  metadata and data  | metadata and data |   metadata only
 *     Standby: | metadata and data |  metadata and data  | metadata only     |   metadata only
 */
public void replicate() {
    boolean allCaughtUp = true;
    Map<DataNodeId, List<RemoteReplicaInfo>> dataNodeToRemoteReplicaInfo = getRemoteReplicaInfos();
    logger.trace("Replicating from {} DataNodes.", replicasToReplicateGroupedByNode.size());
    for (Map.Entry<DataNodeId, List<RemoteReplicaInfo>> entry : dataNodeToRemoteReplicaInfo.entrySet()) {
        DataNodeId remoteNode = entry.getKey();
        if (!running) {
            break;
        }
        List<RemoteReplicaInfo> replicasToReplicatePerNode = entry.getValue();
        Timer.Context context = null;
        Timer.Context portTypeBasedContext = null;
        if (replicatingFromRemoteColo) {
            context = replicationMetrics.interColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            if (replicatingOverSsl) {
                portTypeBasedContext = replicationMetrics.sslInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            } else {
                portTypeBasedContext = replicationMetrics.plainTextInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            }
        } else {
            context = replicationMetrics.intraColoReplicationLatency.time();
            if (replicatingOverSsl) {
                portTypeBasedContext = replicationMetrics.sslIntraColoReplicationLatency.time();
            } else {
                portTypeBasedContext = replicationMetrics.plainTextIntraColoReplicationLatency.time();
            }
        }
        ConnectedChannel connectedChannel = null;
        long checkoutConnectionTimeInMs = -1;
        long exchangeMetadataTimeInMs = -1;
        long fixMissingStoreKeysTimeInMs = -1;
        long replicationStartTimeInMs = time.milliseconds();
        long startTimeInMs = replicationStartTimeInMs;
        // Get a list of active replicas that needs be included for this replication cycle
        List<RemoteReplicaInfo> activeReplicasPerNode = new ArrayList<>();
        List<RemoteReplicaInfo> standbyReplicasWithNoProgress = new ArrayList<>();
        for (RemoteReplicaInfo remoteReplicaInfo : replicasToReplicatePerNode) {
            ReplicaId replicaId = remoteReplicaInfo.getReplicaId();
            boolean inBackoff = time.milliseconds() < remoteReplicaInfo.getReEnableReplicationTime();
            if (replicaId.isDown() || inBackoff || remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE || replicationDisabledPartitions.contains(replicaId.getPartitionId())) {
                logger.debug("Skipping replication on replica {} because one of following conditions is true: remote replica is down " + "= {}; in backoff = {}; local store is offline = {}; replication is disabled = {}.", replicaId.getPartitionId().toPathString(), replicaId.isDown(), inBackoff, remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE, replicationDisabledPartitions.contains(replicaId.getPartitionId()));
                continue;
            }
            if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                // check if all missing keys for standby replicas from previous replication cycle are now obtained
                // via leader replica. If we still have missing keys, don't include them in current replication cycle
                // to avoid sending duplicate metadata requests since their token wouldn't have advanced.
                processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
                if (containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo)) {
                    standbyReplicasWithNoProgress.add(remoteReplicaInfo);
                    continue;
                }
            }
            activeReplicasPerNode.add(remoteReplicaInfo);
        }
        logger.trace("Replicating from {} RemoteReplicaInfos.", activeReplicasPerNode.size());
        // use a variable to track current replica list to replicate (for logging purpose)
        List<RemoteReplicaInfo> currentReplicaList = activeReplicasPerNode;
        try {
            if (activeReplicasPerNode.size() > 0) {
                allCaughtUp = false;
                // if maxReplicaCountPerRequest > 0, split remote replicas on same node into multiple lists; otherwise there is
                // no limit.
                List<List<RemoteReplicaInfo>> activeReplicaSubLists = maxReplicaCountPerRequest > 0 ? Utils.partitionList(activeReplicasPerNode, maxReplicaCountPerRequest) : Collections.singletonList(activeReplicasPerNode);
                startTimeInMs = time.milliseconds();
                connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), activeReplicasPerNode.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
                checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
                // we checkout ConnectedChannel once and replicate remote replicas in batch via same ConnectedChannel
                for (List<RemoteReplicaInfo> replicaSubList : activeReplicaSubLists) {
                    exchangeMetadataTimeInMs = -1;
                    fixMissingStoreKeysTimeInMs = -1;
                    currentReplicaList = replicaSubList;
                    logger.debug("Exchanging metadata with {} remote replicas on {}", currentReplicaList.size(), remoteNode);
                    startTimeInMs = time.milliseconds();
                    List<ExchangeMetadataResponse> exchangeMetadataResponseList = exchangeMetadata(connectedChannel, replicaSubList);
                    exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
                    if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                        // If leader based replication is enabled and we are replicating from remote colo, fetch the missing blobs
                        // only for local leader replicas from their corresponding peer leader replicas (Leader <-> Leader).
                        // Non-leader replica pairs (standby <-> leaders, leader <-> standby, standby <-> standby) will get their
                        // missing blobs from their leader pair exchanges and intra-dc replication.
                        List<RemoteReplicaInfo> leaderReplicaList = new ArrayList<>();
                        List<ExchangeMetadataResponse> exchangeMetadataResponseListForLeaderReplicas = new ArrayList<>();
                        getLeaderReplicaList(replicaSubList, exchangeMetadataResponseList, leaderReplicaList, exchangeMetadataResponseListForLeaderReplicas);
                        replicaSubList = leaderReplicaList;
                        exchangeMetadataResponseList = exchangeMetadataResponseListForLeaderReplicas;
                    }
                    if (replicaSubList.size() > 0) {
                        startTimeInMs = time.milliseconds();
                        fixMissingStoreKeys(connectedChannel, replicaSubList, exchangeMetadataResponseList, false);
                        fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
                    }
                }
            }
            if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                // Get a list of blocked standby replicas whose missing keys haven't arrived for long time.
                // Use case: In leader-based cross colo replication, standby replicas don't send GET requests for missing keys
                // found in metadata exchange and expect them to come via leader <-> leader replication.
                // This is a safety condition to ensure that standby replicas are not stuck waiting for the keys to come from leader
                // by fetching the missing keys themselves.
                // TODO: As an improvement to this, we can first fetch missing blobs from local leader/other replicas in intra-dc first.
                // TODO: If the result to fetch a blob from local dc is Blob_Not_Found, then we can fetch it from replicas in remote datacenter.
                // This will involve co-ordination between replica threads containing replicas of same partition.
                List<RemoteReplicaInfo> standbyReplicasTimedOutOnNoProgress = getRemoteStandbyReplicasTimedOutOnNoProgress(standbyReplicasWithNoProgress);
                if (standbyReplicasTimedOutOnNoProgress.size() > 0) {
                    allCaughtUp = false;
                    currentReplicaList = standbyReplicasTimedOutOnNoProgress;
                    if (connectedChannel == null) {
                        checkoutConnectionTimeInMs = -1;
                        startTimeInMs = time.milliseconds();
                        connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), standbyReplicasTimedOutOnNoProgress.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
                        checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
                    }
                    List<ExchangeMetadataResponse> exchangeMetadataResponseListForBlockedReplicas = standbyReplicasTimedOutOnNoProgress.stream().map(remoteReplicaInfo -> new ExchangeMetadataResponse(remoteReplicaInfo.getExchangeMetadataResponse())).collect(Collectors.toList());
                    // Convert (and cache) the remote keys that are being fetched as the StoreKeyConverter would have cleared
                    // these keys from its cache while it is replicating with other replicas before time out happened for these standby replicas.
                    List<StoreKey> storeKeysToConvert = exchangeMetadataResponseListForBlockedReplicas.stream().map(ExchangeMetadataResponse::getMissingStoreKeys).flatMap(Collection::stream).collect(Collectors.toList());
                    convertStoreKeys(storeKeysToConvert);
                    exchangeMetadataTimeInMs = 0;
                    fixMissingStoreKeysTimeInMs = -1;
                    logger.debug("Sending GET request to fetch missing keys for standby remote replicas {} timed out on no progress", currentReplicaList);
                    startTimeInMs = time.milliseconds();
                    fixMissingStoreKeys(connectedChannel, standbyReplicasTimedOutOnNoProgress, exchangeMetadataResponseListForBlockedReplicas, true);
                    fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
                }
            }
        } catch (Throwable e) {
            if (checkoutConnectionTimeInMs == -1) {
                // throwable happened in checkout connection phase
                checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
                responseHandler.onEvent(currentReplicaList.get(0).getReplicaId(), e);
            } else if (exchangeMetadataTimeInMs == -1) {
                // throwable happened in exchange metadata phase
                exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
            } else if (fixMissingStoreKeysTimeInMs == -1) {
                // throwable happened in fix missing store phase
                fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
            }
            logger.error("Error while talking to peer: Remote node: {}, Thread name: {}, Remote replicas: {}, Current active " + "remote replica list: {}, Checkout connection time: {}, Exchange metadata time: {}, Fix missing " + "store key time {}", remoteNode, threadName, replicasToReplicatePerNode, currentReplicaList, checkoutConnectionTimeInMs, exchangeMetadataTimeInMs, fixMissingStoreKeysTimeInMs, e);
            replicationMetrics.incrementReplicationErrors(replicatingOverSsl);
            if (connectedChannel != null) {
                connectionPool.destroyConnection(connectedChannel);
                connectedChannel = null;
            }
        } finally {
            long totalReplicationTime = time.milliseconds() - replicationStartTimeInMs;
            replicationMetrics.updateTotalReplicationTime(totalReplicationTime, replicatingFromRemoteColo, replicatingOverSsl, datacenterName);
            if (connectedChannel != null) {
                connectionPool.checkInConnection(connectedChannel);
            }
            context.stop();
            portTypeBasedContext.stop();
        }
    }
    long sleepDurationMs = 0;
    if (allCaughtUp && replicationConfig.replicationReplicaThreadIdleSleepDurationMs > 0) {
        sleepDurationMs = replicationConfig.replicationReplicaThreadIdleSleepDurationMs;
        idleCount.inc();
    } else if (threadThrottleDurationMs > 0) {
        sleepDurationMs = threadThrottleDurationMs;
        throttleCount.inc();
    }
    if (sleepDurationMs > 0) {
        try {
            long currentTime = time.milliseconds();
            time.sleep(sleepDurationMs);
            logger.trace("Replica thread: {} slept for {} ms", threadName, time.milliseconds() - currentTime);
        } catch (InterruptedException e) {
            logger.error("Received interrupted exception during throttling", e);
        }
    }
}
Also used : GetOption(com.github.ambry.protocol.GetOption) StoreKeyConverter(com.github.ambry.store.StoreKeyConverter) DataNodeId(com.github.ambry.clustermap.DataNodeId) LoggerFactory(org.slf4j.LoggerFactory) MessageFormatWriteSet(com.github.ambry.messageformat.MessageFormatWriteSet) StoreErrorCodes(com.github.ambry.store.StoreErrorCodes) GetResponse(com.github.ambry.protocol.GetResponse) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Counter(com.codahale.metrics.Counter) ReplicaMetadataRequest(com.github.ambry.protocol.ReplicaMetadataRequest) GetRequest(com.github.ambry.protocol.GetRequest) ReplicationConfig(com.github.ambry.config.ReplicationConfig) NotificationSystem(com.github.ambry.notification.NotificationSystem) ReplicaSyncUpManager(com.github.ambry.clustermap.ReplicaSyncUpManager) PartitionResponseInfo(com.github.ambry.protocol.PartitionResponseInfo) Predicate(java.util.function.Predicate) Collection(java.util.Collection) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) Collectors(java.util.stream.Collectors) ConnectedChannel(com.github.ambry.network.ConnectedChannel) ReplicaMetadataRequestInfo(com.github.ambry.protocol.ReplicaMetadataRequestInfo) CountDownLatch(java.util.concurrent.CountDownLatch) StoreKey(com.github.ambry.store.StoreKey) List(java.util.List) ReplicaMetadataResponse(com.github.ambry.protocol.ReplicaMetadataResponse) MessageFormatFlags(com.github.ambry.messageformat.MessageFormatFlags) UpdateType(com.github.ambry.notification.UpdateType) Timer(com.codahale.metrics.Timer) MessageSievingInputStream(com.github.ambry.messageformat.MessageSievingInputStream) PartitionId(com.github.ambry.clustermap.PartitionId) BlobId(com.github.ambry.commons.BlobId) ResponseHandler(com.github.ambry.commons.ResponseHandler) PartitionRequestInfo(com.github.ambry.protocol.PartitionRequestInfo) BlobReplicaSourceType(com.github.ambry.notification.BlobReplicaSourceType) ServerErrorCode(com.github.ambry.server.ServerErrorCode) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) NettyByteBufDataInputStream(com.github.ambry.utils.NettyByteBufDataInputStream) HashSet(java.util.HashSet) Transformer(com.github.ambry.store.Transformer) ChannelOutput(com.github.ambry.network.ChannelOutput) StoreException(com.github.ambry.store.StoreException) ReplicaMetadataResponseInfo(com.github.ambry.protocol.ReplicaMetadataResponseInfo) CloudDataNode(com.github.ambry.clustermap.CloudDataNode) Time(com.github.ambry.utils.Time) ReplicaState(com.github.ambry.clustermap.ReplicaState) MetricRegistry(com.codahale.metrics.MetricRegistry) Logger(org.slf4j.Logger) ReentrantLock(java.util.concurrent.locks.ReentrantLock) ConnectionPool(com.github.ambry.network.ConnectionPool) ClusterMap(com.github.ambry.clustermap.ClusterMap) IOException(java.io.IOException) Condition(java.util.concurrent.locks.Condition) MessageInfo(com.github.ambry.store.MessageInfo) ReplicaId(com.github.ambry.clustermap.ReplicaId) BlobStore(com.github.ambry.store.BlobStore) Collections(java.util.Collections) ArrayList(java.util.ArrayList) ConnectedChannel(com.github.ambry.network.ConnectedChannel) StoreKey(com.github.ambry.store.StoreKey) ReplicaId(com.github.ambry.clustermap.ReplicaId) Timer(com.codahale.metrics.Timer) List(java.util.List) ArrayList(java.util.ArrayList) DataNodeId(com.github.ambry.clustermap.DataNodeId) Map(java.util.Map) HashMap(java.util.HashMap) ClusterMap(com.github.ambry.clustermap.ClusterMap)

Example 35 with StoreKey

use of com.github.ambry.store.StoreKey in project ambry by linkedin.

the class InMemoryStore method get.

@Override
public StoreInfo get(List<? extends StoreKey> ids, EnumSet<StoreGetOptions> getOptions) throws StoreException {
    // unused function
    List<MessageInfo> infos = new ArrayList<>();
    List<ByteBuffer> buffers = new ArrayList<>();
    List<StoreKey> keys = new ArrayList<>();
    for (StoreKey id : ids) {
        for (int i = 0; i < messageInfos.size(); i++) {
            MessageInfo info = messageInfos.get(i);
            if (info.getStoreKey().equals(id)) {
                infos.add(info);
                buffers.add(log.getData(i));
                keys.add(info.getStoreKey());
            }
        }
    }
    return new StoreInfo(new MockMessageReadSet(buffers, keys), infos);
}
Also used : ArrayList(java.util.ArrayList) StoreInfo(com.github.ambry.store.StoreInfo) ByteBuffer(java.nio.ByteBuffer) StoreKey(com.github.ambry.store.StoreKey) MessageInfo(com.github.ambry.store.MessageInfo)

Aggregations

StoreKey (com.github.ambry.store.StoreKey)89 ArrayList (java.util.ArrayList)56 MessageInfo (com.github.ambry.store.MessageInfo)43 ByteBuffer (java.nio.ByteBuffer)43 Test (org.junit.Test)37 DataInputStream (java.io.DataInputStream)30 BlobId (com.github.ambry.commons.BlobId)27 HashMap (java.util.HashMap)26 IOException (java.io.IOException)23 List (java.util.List)22 PartitionId (com.github.ambry.clustermap.PartitionId)21 ByteBufferInputStream (com.github.ambry.utils.ByteBufferInputStream)21 Map (java.util.Map)19 MockPartitionId (com.github.ambry.clustermap.MockPartitionId)18 MockId (com.github.ambry.store.MockId)18 MockClusterMap (com.github.ambry.clustermap.MockClusterMap)17 InputStream (java.io.InputStream)16 HashSet (java.util.HashSet)16 ClusterMap (com.github.ambry.clustermap.ClusterMap)15 MetricRegistry (com.codahale.metrics.MetricRegistry)14