Search in sources :

Example 1 with MessageSievingInputStream

use of com.github.ambry.messageformat.MessageSievingInputStream in project ambry by linkedin.

the class ReplicaThread method writeMessagesToLocalStoreAndAdvanceTokens.

/**
 * Writes the messages (if any) to the local stores from the remote stores for the missing keys, and advances tokens.
 * @param exchangeMetadataResponseList The list of metadata response from the remote node
 * @param getResponse The {@link GetResponse} that contains the missing messages. This may be null if there are no
 *                    missing messages to write as per the exchange metadata response. In that case this method will
 *                    simply advance the tokens for every store.
 * @param replicasToReplicatePerNode The list of remote replicas for the remote node
 * @param remoteNode The remote node from which replication needs to happen
 * @param remoteColoGetRequestForStandby boolean which indicates if we are getting missing keys for standby or
 *                                       non-leader replica pairs during leader-based replication.
 * @throws IOException
 */
private void writeMessagesToLocalStoreAndAdvanceTokens(List<ExchangeMetadataResponse> exchangeMetadataResponseList, GetResponse getResponse, List<RemoteReplicaInfo> replicasToReplicatePerNode, DataNodeId remoteNode, boolean remoteColoGetRequestForStandby) throws IOException {
    int partitionResponseInfoIndex = 0;
    long totalBytesFixed = 0;
    long totalBlobsFixed = 0;
    long startTime = time.milliseconds();
    for (int i = 0; i < exchangeMetadataResponseList.size(); i++) {
        ExchangeMetadataResponse exchangeMetadataResponse = exchangeMetadataResponseList.get(i);
        RemoteReplicaInfo remoteReplicaInfo = replicasToReplicatePerNode.get(i);
        // TODO: if remoteReplicaInfo.getLocalStore() is closed, write will fail
        if (exchangeMetadataResponse.serverErrorCode == ServerErrorCode.No_Error) {
            if (exchangeMetadataResponse.missingStoreMessages.size() > 0) {
                PartitionResponseInfo partitionResponseInfo = getResponse.getPartitionResponseInfoList().get(partitionResponseInfoIndex);
                responseHandler.onEvent(remoteReplicaInfo.getReplicaId(), partitionResponseInfo.getErrorCode());
                partitionResponseInfoIndex++;
                if (!partitionResponseInfo.getPartition().toPathString().equals(remoteReplicaInfo.getReplicaId().getPartitionId().toPathString())) {
                    throw new IllegalStateException("The partition id from partitionResponseInfo " + partitionResponseInfo.getPartition() + " and from remoteReplicaInfo " + remoteReplicaInfo.getReplicaId().getPartitionId() + " are not the same");
                }
                if (partitionResponseInfo.getErrorCode() == ServerErrorCode.No_Error) {
                    List<MessageInfo> messageInfoList = partitionResponseInfo.getMessageInfoList();
                    try {
                        logger.trace("Remote node: {} Thread name: {} Remote replica: {} Messages to fix: {} " + "Partition: {} Local mount path: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), exchangeMetadataResponse.getMissingStoreKeys(), remoteReplicaInfo.getReplicaId().getPartitionId(), remoteReplicaInfo.getLocalReplicaId().getMountPath());
                        MessageFormatWriteSet writeset;
                        MessageSievingInputStream validMessageDetectionInputStream = new MessageSievingInputStream(getResponse.getInputStream(), messageInfoList, Collections.singletonList(transformer), metricRegistry);
                        if (validMessageDetectionInputStream.hasInvalidMessages()) {
                            replicationMetrics.incrementInvalidMessageError(partitionResponseInfo.getPartition());
                            logger.error("Out of {} messages, {} invalid messages were found in message stream from {}", messageInfoList.size(), messageInfoList.size() - validMessageDetectionInputStream.getValidMessageInfoList().size(), remoteReplicaInfo.getReplicaId());
                        }
                        messageInfoList = validMessageDetectionInputStream.getValidMessageInfoList();
                        if (messageInfoList.size() == 0) {
                            logger.debug("MessageInfoList is of size 0 as all messages are invalidated, deprecated, deleted or expired.");
                        } else {
                            writeset = new MessageFormatWriteSet(validMessageDetectionInputStream, messageInfoList, false);
                            remoteReplicaInfo.getLocalStore().put(writeset);
                        }
                        for (MessageInfo messageInfo : messageInfoList) {
                            totalBytesFixed += messageInfo.getSize();
                            logger.trace("Remote node: {} Thread name: {} Remote replica: {} Message replicated: {} Partition: {} " + "Local mount path: {} Message size: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), messageInfo.getStoreKey(), remoteReplicaInfo.getReplicaId().getPartitionId(), remoteReplicaInfo.getLocalReplicaId().getMountPath(), messageInfo.getSize());
                            if (notification != null) {
                                notification.onBlobReplicaCreated(dataNodeId.getHostname(), dataNodeId.getPort(), messageInfo.getStoreKey().getID(), BlobReplicaSourceType.REPAIRED);
                            }
                            if (messageInfo.isTtlUpdated()) {
                                applyTtlUpdate(messageInfo, remoteReplicaInfo);
                            }
                        }
                        totalBlobsFixed += messageInfoList.size();
                        if (leaderBasedReplicationAdmin != null) {
                            // If leader based replication is enabled, we will only fetch missing blobs for local leaders from their
                            // remote leaders. For non-leader replicas pairs (leader <-> standby, standby <-> leader, standby <->
                            // standby), we will store the missing keys and track them via leader<->leader exchanges and intra-dc
                            // replication.
                            // Notify all the replicas of the partition on newly written messages so that non-leader replica pairs
                            // can update their missing keys and advance token if needed.
                            leaderBasedReplicationAdmin.onMessageWriteForPartition(partitionResponseInfo.getPartition(), messageInfoList);
                        }
                        remoteReplicaInfo.setToken(exchangeMetadataResponse.remoteToken);
                        remoteReplicaInfo.setLocalLagFromRemoteInBytes(exchangeMetadataResponse.localLagFromRemoteInBytes);
                        // reset stored metadata response for this replica
                        remoteReplicaInfo.setExchangeMetadataResponse(new ExchangeMetadataResponse(ServerErrorCode.No_Error));
                        logger.trace("Remote node: {} Thread name: {} Remote replica: {} Token after speaking to remote node: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), exchangeMetadataResponse.remoteToken);
                    } catch (StoreException e) {
                        if (e.getErrorCode() != StoreErrorCodes.Already_Exist) {
                            replicationMetrics.updateLocalStoreError(remoteReplicaInfo.getReplicaId());
                            logger.error("Remote node: {} Thread name: {} Remote replica: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), e);
                        }
                    }
                } else if (partitionResponseInfo.getErrorCode() == ServerErrorCode.Blob_Deleted) {
                    replicationMetrics.blobDeletedOnGetCount.inc();
                    logger.trace("One of the blobs to GET is deleted: Remote node: {} Thread name: {} Remote replica: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId());
                } else if (partitionResponseInfo.getErrorCode() == ServerErrorCode.Blob_Authorization_Failure) {
                    replicationMetrics.blobAuthorizationFailureCount.inc();
                    logger.error("One of the blobs authorization failed: Remote node: {} Thread name: {} Remote replica: {} Keys are: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), exchangeMetadataResponse.getMissingStoreKeys());
                } else {
                    replicationMetrics.updateGetRequestError(remoteReplicaInfo.getReplicaId());
                    logger.error("Remote node: {} Thread name: {} Remote replica: {} Server error: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), partitionResponseInfo.getErrorCode());
                }
            }
        }
    }
    long batchStoreWriteTime = time.milliseconds() - startTime;
    replicationMetrics.updateBatchStoreWriteTime(batchStoreWriteTime, totalBytesFixed, totalBlobsFixed, replicatingFromRemoteColo, replicatingOverSsl, datacenterName, remoteColoGetRequestForStandby);
}
Also used : MessageSievingInputStream(com.github.ambry.messageformat.MessageSievingInputStream) PartitionResponseInfo(com.github.ambry.protocol.PartitionResponseInfo) MessageInfo(com.github.ambry.store.MessageInfo) MessageFormatWriteSet(com.github.ambry.messageformat.MessageFormatWriteSet) StoreException(com.github.ambry.store.StoreException)

Example 2 with MessageSievingInputStream

use of com.github.ambry.messageformat.MessageSievingInputStream in project ambry by linkedin.

the class ReplicaThread method writeMessagesToLocalStoreAndAdvanceTokens.

/**
 * Writes the messages (if any) to the local stores from the remote stores for the missing keys, and advances tokens.
 * @param exchangeMetadataResponseList The list of metadata response from the remote node
 * @param getResponse The {@link GetResponse} that contains the missing messages. This may be null if there are no
 *                    missing messages to write as per the exchange metadata response. In that case this method will
 *                    simply advance the tokens for every store.
 * @param replicasToReplicatePerNode The list of remote replicas for the remote node
 * @param remoteNode The remote node from which replication needs to happen
 */
private void writeMessagesToLocalStoreAndAdvanceTokens(List<ExchangeMetadataResponse> exchangeMetadataResponseList, GetResponse getResponse, List<RemoteReplicaInfo> replicasToReplicatePerNode, DataNodeId remoteNode) throws IOException {
    int partitionResponseInfoIndex = 0;
    long totalBytesFixed = 0;
    long totalBlobsFixed = 0;
    long startTime = SystemTime.getInstance().milliseconds();
    for (int i = 0; i < exchangeMetadataResponseList.size(); i++) {
        ExchangeMetadataResponse exchangeMetadataResponse = exchangeMetadataResponseList.get(i);
        RemoteReplicaInfo remoteReplicaInfo = replicasToReplicatePerNode.get(i);
        if (exchangeMetadataResponse.serverErrorCode == ServerErrorCode.No_Error) {
            if (exchangeMetadataResponse.missingStoreKeys.size() > 0) {
                PartitionResponseInfo partitionResponseInfo = getResponse.getPartitionResponseInfoList().get(partitionResponseInfoIndex);
                responseHandler.onEvent(remoteReplicaInfo.getReplicaId(), partitionResponseInfo.getErrorCode());
                partitionResponseInfoIndex++;
                if (partitionResponseInfo.getPartition().compareTo(remoteReplicaInfo.getReplicaId().getPartitionId()) != 0) {
                    throw new IllegalStateException("The partition id from partitionResponseInfo " + partitionResponseInfo.getPartition() + " and from remoteReplicaInfo " + remoteReplicaInfo.getReplicaId().getPartitionId() + " are not the same");
                }
                if (partitionResponseInfo.getErrorCode() == ServerErrorCode.No_Error) {
                    try {
                        List<MessageInfo> messageInfoList = partitionResponseInfo.getMessageInfoList();
                        logger.trace("Remote node: {} Thread name: {} Remote replica: {} Messages to fix: {} " + "Partition: {} Local mount path: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), exchangeMetadataResponse.missingStoreKeys, remoteReplicaInfo.getReplicaId().getPartitionId(), remoteReplicaInfo.getLocalReplicaId().getMountPath());
                        MessageFormatWriteSet writeset = null;
                        if (validateMessageStream) {
                            MessageSievingInputStream validMessageDetectionInputStream = new MessageSievingInputStream(getResponse.getInputStream(), messageInfoList, storeKeyFactory, metricRegistry);
                            if (validMessageDetectionInputStream.hasInvalidMessages()) {
                                replicationMetrics.incrementInvalidMessageError(partitionResponseInfo.getPartition());
                                logger.error("Out of " + (messageInfoList.size()) + " messages, " + (messageInfoList.size() - validMessageDetectionInputStream.getValidMessageInfoList().size()) + " invalid messages were found in message stream from " + remoteReplicaInfo.getReplicaId());
                            }
                            messageInfoList = validMessageDetectionInputStream.getValidMessageInfoList();
                            if (messageInfoList.size() == 0) {
                                logger.error("MessageInfoList is of size 0 as all messages are invalidated ");
                            } else {
                                writeset = new MessageFormatWriteSet(validMessageDetectionInputStream, messageInfoList, false);
                                remoteReplicaInfo.getLocalStore().put(writeset);
                            }
                        } else {
                            writeset = new MessageFormatWriteSet(getResponse.getInputStream(), messageInfoList, true);
                            remoteReplicaInfo.getLocalStore().put(writeset);
                        }
                        for (MessageInfo messageInfo : messageInfoList) {
                            totalBytesFixed += messageInfo.getSize();
                            logger.trace("Remote node: {} Thread name: {} Remote replica: {} Message replicated: {} Partition: {} " + "Local mount path: {} Message size: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), messageInfo.getStoreKey(), remoteReplicaInfo.getReplicaId().getPartitionId(), remoteReplicaInfo.getLocalReplicaId().getMountPath(), messageInfo.getSize());
                            if (notification != null) {
                                notification.onBlobReplicaCreated(dataNodeId.getHostname(), dataNodeId.getPort(), messageInfo.getStoreKey().getID(), BlobReplicaSourceType.REPAIRED);
                            }
                        }
                        totalBlobsFixed += messageInfoList.size();
                        remoteReplicaInfo.setToken(exchangeMetadataResponse.remoteToken);
                        remoteReplicaInfo.setLocalLagFromRemoteInBytes(exchangeMetadataResponse.localLagFromRemoteInBytes);
                        logger.trace("Remote node: {} Thread name: {} Remote replica: {} Token after speaking to remote node: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), exchangeMetadataResponse.remoteToken);
                    } catch (StoreException e) {
                        if (e.getErrorCode() != StoreErrorCodes.Already_Exist) {
                            replicationMetrics.updateLocalStoreError(remoteReplicaInfo.getReplicaId());
                            logger.error("Remote node: " + remoteNode + " Thread name: " + threadName + " Remote replica: " + remoteReplicaInfo.getReplicaId(), e);
                        }
                    }
                } else if (partitionResponseInfo.getErrorCode() == ServerErrorCode.Blob_Deleted) {
                    replicationMetrics.blobDeletedOnGetCount.inc();
                    logger.trace("One of the blobs to GET is deleted: Remote node: {} Thread name: {} Remote replica: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId());
                } else {
                    replicationMetrics.updateGetRequestError(remoteReplicaInfo.getReplicaId());
                    logger.error("Remote node: {} Thread name: {} Remote replica: {} Server error: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), partitionResponseInfo.getErrorCode());
                }
            } else {
                // There are no missing keys. We just advance the token
                remoteReplicaInfo.setToken(exchangeMetadataResponse.remoteToken);
                remoteReplicaInfo.setLocalLagFromRemoteInBytes(exchangeMetadataResponse.localLagFromRemoteInBytes);
                logger.trace("Remote node: {} Thread name: {} Remote replica: {} Token after speaking to remote node: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), exchangeMetadataResponse.remoteToken);
            }
        }
    }
    long batchStoreWriteTime = SystemTime.getInstance().milliseconds() - startTime;
    replicationMetrics.updateBatchStoreWriteTime(batchStoreWriteTime, totalBytesFixed, totalBlobsFixed, replicatingFromRemoteColo, replicatingOverSsl, datacenterName);
}
Also used : MessageSievingInputStream(com.github.ambry.messageformat.MessageSievingInputStream) PartitionResponseInfo(com.github.ambry.protocol.PartitionResponseInfo) MessageInfo(com.github.ambry.store.MessageInfo) MessageFormatWriteSet(com.github.ambry.messageformat.MessageFormatWriteSet) StoreException(com.github.ambry.store.StoreException)

Aggregations

MessageFormatWriteSet (com.github.ambry.messageformat.MessageFormatWriteSet)2 MessageSievingInputStream (com.github.ambry.messageformat.MessageSievingInputStream)2 PartitionResponseInfo (com.github.ambry.protocol.PartitionResponseInfo)2 MessageInfo (com.github.ambry.store.MessageInfo)2 StoreException (com.github.ambry.store.StoreException)2