use of com.github.ambry.replication.FindToken in project ambry by linkedin.
the class AmbryRequests method handleReplicaMetadataRequest.
@Override
public void handleReplicaMetadataRequest(NetworkRequest request) throws IOException, InterruptedException {
if (replicationEngine == null) {
throw new UnsupportedOperationException("Replication not supported on this node.");
}
ReplicaMetadataRequest replicaMetadataRequest = ReplicaMetadataRequest.readFrom(new DataInputStream(request.getInputStream()), clusterMap, findTokenHelper);
long requestQueueTime = SystemTime.getInstance().milliseconds() - request.getStartTimeInMs();
long totalTimeSpent = requestQueueTime;
metrics.replicaMetadataRequestQueueTimeInMs.update(requestQueueTime);
metrics.replicaMetadataRequestRate.mark();
List<ReplicaMetadataRequestInfo> replicaMetadataRequestInfoList = replicaMetadataRequest.getReplicaMetadataRequestInfoList();
int partitionCnt = replicaMetadataRequestInfoList.size();
long startTimeInMs = SystemTime.getInstance().milliseconds();
ReplicaMetadataResponse response = null;
try {
List<ReplicaMetadataResponseInfo> replicaMetadataResponseList = new ArrayList<>(partitionCnt);
for (ReplicaMetadataRequestInfo replicaMetadataRequestInfo : replicaMetadataRequestInfoList) {
long partitionStartTimeInMs = SystemTime.getInstance().milliseconds();
PartitionId partitionId = replicaMetadataRequestInfo.getPartitionId();
ReplicaType replicaType = replicaMetadataRequestInfo.getReplicaType();
ServerErrorCode error = validateRequest(partitionId, RequestOrResponseType.ReplicaMetadataRequest, false);
logger.trace("{} Time used to validate metadata request: {}", partitionId, (SystemTime.getInstance().milliseconds() - partitionStartTimeInMs));
if (error != ServerErrorCode.No_Error) {
logger.error("Validating replica metadata request failed with error {} for partition {}", error, partitionId);
ReplicaMetadataResponseInfo replicaMetadataResponseInfo = new ReplicaMetadataResponseInfo(partitionId, replicaType, error, ReplicaMetadataResponse.getCompatibleResponseVersion(replicaMetadataRequest.getVersionId()));
replicaMetadataResponseList.add(replicaMetadataResponseInfo);
} else {
try {
FindToken findToken = replicaMetadataRequestInfo.getToken();
String hostName = replicaMetadataRequestInfo.getHostName();
String replicaPath = replicaMetadataRequestInfo.getReplicaPath();
Store store = storeManager.getStore(partitionId);
partitionStartTimeInMs = SystemTime.getInstance().milliseconds();
FindInfo findInfo = store.findEntriesSince(findToken, replicaMetadataRequest.getMaxTotalSizeOfEntriesInBytes(), hostName, replicaPath);
logger.trace("{} Time used to find entry since: {}", partitionId, (SystemTime.getInstance().milliseconds() - partitionStartTimeInMs));
partitionStartTimeInMs = SystemTime.getInstance().milliseconds();
long totalBytesRead = findInfo.getFindToken().getBytesRead();
replicationEngine.updateTotalBytesReadByRemoteReplica(partitionId, hostName, replicaPath, totalBytesRead);
logger.trace("{} Time used to update total bytes read: {}", partitionId, (SystemTime.getInstance().milliseconds() - partitionStartTimeInMs));
partitionStartTimeInMs = SystemTime.getInstance().milliseconds();
logger.trace("{} Time used to get remote replica lag in bytes: {}", partitionId, (SystemTime.getInstance().milliseconds() - partitionStartTimeInMs));
ReplicaMetadataResponseInfo replicaMetadataResponseInfo = new ReplicaMetadataResponseInfo(partitionId, replicaType, findInfo.getFindToken(), findInfo.getMessageEntries(), getRemoteReplicaLag(store, totalBytesRead), ReplicaMetadataResponse.getCompatibleResponseVersion(replicaMetadataRequest.getVersionId()));
if (replicaMetadataResponseInfo.getTotalSizeOfAllMessages() > 5 * replicaMetadataRequest.getMaxTotalSizeOfEntriesInBytes()) {
logger.debug("{} generated a metadata response {} where the cumulative size of messages is {}", replicaMetadataRequest, replicaMetadataResponseInfo, replicaMetadataResponseInfo.getTotalSizeOfAllMessages());
metrics.replicationResponseMessageSizeTooHigh.inc();
}
replicaMetadataResponseList.add(replicaMetadataResponseInfo);
metrics.replicaMetadataTotalSizeOfMessages.update(replicaMetadataResponseInfo.getTotalSizeOfAllMessages());
} catch (StoreException e) {
logger.error("Store exception on a replica metadata request with error code {} for partition {}", e.getErrorCode(), partitionId, e);
if (e.getErrorCode() == StoreErrorCodes.IOError) {
metrics.storeIOError.inc();
} else {
metrics.unExpectedStoreFindEntriesError.inc();
}
ReplicaMetadataResponseInfo replicaMetadataResponseInfo = new ReplicaMetadataResponseInfo(partitionId, replicaType, ErrorMapping.getStoreErrorMapping(e.getErrorCode()), ReplicaMetadataResponse.getCompatibleResponseVersion(replicaMetadataRequest.getVersionId()));
replicaMetadataResponseList.add(replicaMetadataResponseInfo);
}
}
}
response = new ReplicaMetadataResponse(replicaMetadataRequest.getCorrelationId(), replicaMetadataRequest.getClientId(), ServerErrorCode.No_Error, replicaMetadataResponseList, ReplicaMetadataResponse.getCompatibleResponseVersion(replicaMetadataRequest.getVersionId()));
} catch (Exception e) {
logger.error("Unknown exception for request {}", replicaMetadataRequest, e);
response = new ReplicaMetadataResponse(replicaMetadataRequest.getCorrelationId(), replicaMetadataRequest.getClientId(), ServerErrorCode.Unknown_Error, ReplicaMetadataResponse.getCompatibleResponseVersion(replicaMetadataRequest.getVersionId()));
} finally {
long processingTime = SystemTime.getInstance().milliseconds() - startTimeInMs;
totalTimeSpent += processingTime;
publicAccessLogger.info("{} {} processingTime {}", replicaMetadataRequest, response, processingTime);
logger.trace("{} {} processingTime {}", replicaMetadataRequest, response, processingTime);
metrics.replicaMetadataRequestProcessingTimeInMs.update(processingTime);
// client id now has dc name at the end, for example: ClientId=replication-metadata-abc.example.com[dc1]
String[] clientStrs = replicaMetadataRequest.getClientId().split("\\[");
if (clientStrs.length > 1) {
String clientDc = clientStrs[1].substring(0, clientStrs[1].length() - 1);
if (!currentNode.getDatacenterName().equals(clientDc)) {
metrics.updateCrossColoMetadataExchangeBytesRate(clientDc, response != null ? response.sizeInBytes() : 0L);
}
}
}
requestResponseChannel.sendResponse(response, request, new ServerNetworkResponseMetrics(metrics.replicaMetadataResponseQueueTimeInMs, metrics.replicaMetadataSendTimeInMs, metrics.replicaMetadataTotalTimeInMs, null, null, totalTimeSpent));
}
use of com.github.ambry.replication.FindToken in project ambry by linkedin.
the class ReplicaMetadataRequestInfo method readFrom.
public static ReplicaMetadataRequestInfo readFrom(DataInputStream stream, ClusterMap clusterMap, FindTokenHelper findTokenHelper, short requestVersion) throws IOException {
String hostName = Utils.readIntString(stream);
String replicaPath = Utils.readIntString(stream);
ReplicaType replicaType;
if (requestVersion == ReplicaMetadataRequest.Replica_Metadata_Request_Version_V2) {
replicaType = ReplicaType.values()[stream.readShort()];
} else {
// before version 2 we only have disk based replicas
replicaType = ReplicaType.DISK_BACKED;
}
PartitionId partitionId = clusterMap.getPartitionIdFromStream(stream);
FindTokenFactory findTokenFactory = findTokenHelper.getFindTokenFactoryFromReplicaType(replicaType);
FindToken token = findTokenFactory.getFindToken(stream);
return new ReplicaMetadataRequestInfo(partitionId, token, hostName, replicaPath, replicaType, requestVersion);
}
use of com.github.ambry.replication.FindToken in project ambry by linkedin.
the class ReplicaMetadataResponseInfo method readFrom.
public static ReplicaMetadataResponseInfo readFrom(DataInputStream stream, FindTokenHelper helper, ClusterMap clusterMap, short replicaMetadataResponseVersion) throws IOException {
PartitionId partitionId = clusterMap.getPartitionIdFromStream(stream);
ReplicaType replicaType;
if (replicaMetadataResponseVersion == ReplicaMetadataResponse.REPLICA_METADATA_RESPONSE_VERSION_V_6) {
replicaType = ReplicaType.values()[stream.readShort()];
} else {
// before REPLICA_METADATA_RESPONSE_VERSION_V_6 there were only disk based replicas
replicaType = ReplicaType.DISK_BACKED;
}
ServerErrorCode error = ServerErrorCode.values()[stream.readShort()];
if (error != ServerErrorCode.No_Error) {
return new ReplicaMetadataResponseInfo(partitionId, replicaType, error, replicaMetadataResponseVersion);
} else {
FindTokenFactory findTokenFactory = helper.getFindTokenFactoryFromReplicaType(replicaType);
FindToken token = findTokenFactory.getFindToken(stream);
MessageInfoAndMetadataListSerde messageInfoAndMetadataList = MessageInfoAndMetadataListSerde.deserializeMessageInfoAndMetadataList(stream, clusterMap, getMessageInfoAndMetadataListSerDeVersion(replicaMetadataResponseVersion));
long remoteReplicaLag = stream.readLong();
return new ReplicaMetadataResponseInfo(partitionId, replicaType, token, messageInfoAndMetadataList.getMessageInfoList(), remoteReplicaLag, replicaMetadataResponseVersion);
}
}
use of com.github.ambry.replication.FindToken in project ambry by linkedin.
the class BlobStoreCompactor method isDeleteTombstoneRemovable.
/**
* Check if given delete tombstone is removable. There are two cases where delete tombstone can be safely removed:
* 1. delete record has finite expiration time and it has expired already;
* 2. all peer replica tokens have passed position of this delete (That is, they have replicated this delete already)
* @param deleteIndexEntry the {@link IndexEntry} associated with delete tombstone
* @param currentIndexSegment the {@link IndexSegment} delete tombstone comes from.
* @return {@code true} if this delete tombstone can be safely removed. {@code false} otherwise.
*/
private boolean isDeleteTombstoneRemovable(IndexEntry deleteIndexEntry, IndexSegment currentIndexSegment) throws StoreException {
if (srcIndex.isExpired(deleteIndexEntry.getValue())) {
return true;
}
if (remoteTokenTracker == null) {
return false;
}
for (Map.Entry<String, FindToken> entry : remoteTokenTracker.getPeerReplicaAndToken().entrySet()) {
FindToken token = srcIndex.resetTokenIfRequired((StoreFindToken) entry.getValue());
if (!token.equals(entry.getValue())) {
// incarnation id has changed or there is unclean shutdown
return false;
}
token = srcIndex.revalidateFindToken(entry.getValue());
if (!token.equals(entry.getValue())) {
// the log segment (token refers to) has been compacted already
return false;
}
switch(token.getType()) {
case Uninitialized:
return false;
case JournalBased:
// performs on segments out of journal, this journal-based token must be past the delete tombstone.
break;
case IndexBased:
// if code reaches here, the index-based token is valid (didn't get reset). We check two following rules:
// 1. token's index segment is behind delete tombstone's index segment
// 2. if they are in the same segment, compare the store key (sealed index segment is sorted based on key)
StoreFindToken indexBasedToken = (StoreFindToken) token;
if (indexBasedToken.getOffset().compareTo(currentIndexSegment.getStartOffset()) < 0) {
// index-based token is ahead of current index segment (hasn't reached this delete tombstone)
return false;
}
if (indexBasedToken.getOffset().compareTo(currentIndexSegment.getStartOffset()) == 0) {
// index-based token refers to current index segment, we need to compare the key
if (indexBasedToken.getStoreKey().compareTo(deleteIndexEntry.getKey()) <= 0) {
return false;
}
}
// if tokens start offset > current index segment start offset, then token is obviously past tombstone
break;
default:
throw new IllegalArgumentException("Unsupported token type in compaction: " + token.getType());
}
}
srcMetrics.permanentDeleteTombstonePurgeCount.inc();
return true;
}
use of com.github.ambry.replication.FindToken in project ambry by linkedin.
the class CosmosChangeFeedBasedReplicationFeed method getNextEntriesAndUpdatedToken.
/**
* Get next set of change feed entries for the specified partition, after the {@code curFindToken}.
* The number of entries is capped by maxEntriesSize.
* This method creates a cache for change feed entries. If the {@code curFindToken} is not valid,
* or if all the items in the cache are consumed, then it queries Cosmos for new entries.
* @param curFindToken {@link FindToken} after which the next entries have to be returned.
* @param maxTotalSizeOfEntries maximum size of all the blobs returned.
* @param partitionPath Partition for which change feed entries have to be returned.
* @return {@link FindResult} instance that contains updated {@link FindToken} object which can act as a bookmark for
* subsequent requests, and {@link List} of {@link CloudBlobMetadata} entries.
* @throws DocumentClientException if any cosmos query encounters error.
*/
@Override
public FindResult getNextEntriesAndUpdatedToken(FindToken curFindToken, long maxTotalSizeOfEntries, String partitionPath) throws DocumentClientException {
Timer.Context operationTimer = azureMetrics.replicationFeedQueryTime.time();
try {
List<CloudBlobMetadata> nextEntries = new ArrayList<>();
CosmosChangeFeedFindToken cosmosChangeFeedFindToken = (CosmosChangeFeedFindToken) curFindToken;
int index = cosmosChangeFeedFindToken.getIndex();
ChangeFeedCacheEntry changeFeedCacheEntry = changeFeedCache.get(cosmosChangeFeedFindToken.getCacheSessionId());
boolean cacheHit = true;
if (changeFeedCacheEntry == null || !isCacheValid(partitionPath, cosmosChangeFeedFindToken, changeFeedCacheEntry)) {
// the cache may not be valid. So we cannot use session id
azureMetrics.changeFeedCacheMissRate.mark();
cacheHit = false;
changeFeedCacheEntry = getNextChangeFeed(partitionPath, cosmosChangeFeedFindToken.getStartContinuationToken());
// invalidate the previous token's cache
changeFeedCache.remove(cosmosChangeFeedFindToken.getCacheSessionId());
index = 0;
}
long resultSize = 0;
List<CloudBlobMetadata> fetchedEntries = changeFeedCacheEntry.getFetchedEntries();
while (true) {
if (index < fetchedEntries.size()) {
if (cacheHit) {
azureMetrics.changeFeedCacheHitRate.mark();
cacheHit = false;
}
if (resultSize + fetchedEntries.get(index).getSize() < maxTotalSizeOfEntries || resultSize == 0) {
nextEntries.add(fetchedEntries.get(index));
resultSize = resultSize + fetchedEntries.get(index).getSize();
index++;
} else {
break;
}
} else {
// we can reuse the session id in this case, because we know that the cache ran out of new items.
changeFeedCacheEntry = getNextChangeFeed(partitionPath, changeFeedCacheEntry.getEndContinuationToken(), changeFeedCacheEntry.getCacheSessionId());
fetchedEntries = changeFeedCacheEntry.getFetchedEntries();
if (fetchedEntries.isEmpty()) {
// return updated token. The source replication logic will retry replication with updated token.
break;
} else {
azureMetrics.changeFeedCacheRefreshRate.mark();
}
index = 0;
}
}
FindToken updatedToken = new CosmosChangeFeedFindToken(cosmosChangeFeedFindToken.getBytesRead() + resultSize, changeFeedCacheEntry.getStartContinuationToken(), changeFeedCacheEntry.getEndContinuationToken(), index, changeFeedCacheEntry.getFetchedEntries().size(), changeFeedCacheEntry.getCacheSessionId(), cosmosChangeFeedFindToken.getVersion());
changeFeedCache.put(changeFeedCacheEntry.getCacheSessionId(), new ChangeFeedCacheEntry(changeFeedCacheEntry));
return new FindResult(nextEntries, updatedToken);
} finally {
operationTimer.stop();
}
}
Aggregations