use of com.github.ambry.cloud.CloudBlobMetadata in project ambry by linkedin.
the class CosmosChangeFeedBasedReplicationFeed method getNextEntriesAndUpdatedToken.
/**
* Get next set of change feed entries for the specified partition, after the {@code curFindToken}.
* The number of entries is capped by maxEntriesSize.
* This method creates a cache for change feed entries. If the {@code curFindToken} is not valid,
* or if all the items in the cache are consumed, then it queries Cosmos for new entries.
* @param curFindToken {@link FindToken} after which the next entries have to be returned.
* @param maxTotalSizeOfEntries maximum size of all the blobs returned.
* @param partitionPath Partition for which change feed entries have to be returned.
* @return {@link FindResult} instance that contains updated {@link FindToken} object which can act as a bookmark for
* subsequent requests, and {@link List} of {@link CloudBlobMetadata} entries.
* @throws DocumentClientException if any cosmos query encounters error.
*/
@Override
public FindResult getNextEntriesAndUpdatedToken(FindToken curFindToken, long maxTotalSizeOfEntries, String partitionPath) throws DocumentClientException {
Timer.Context operationTimer = azureMetrics.replicationFeedQueryTime.time();
try {
List<CloudBlobMetadata> nextEntries = new ArrayList<>();
CosmosChangeFeedFindToken cosmosChangeFeedFindToken = (CosmosChangeFeedFindToken) curFindToken;
int index = cosmosChangeFeedFindToken.getIndex();
ChangeFeedCacheEntry changeFeedCacheEntry = changeFeedCache.get(cosmosChangeFeedFindToken.getCacheSessionId());
boolean cacheHit = true;
if (changeFeedCacheEntry == null || !isCacheValid(partitionPath, cosmosChangeFeedFindToken, changeFeedCacheEntry)) {
// the cache may not be valid. So we cannot use session id
azureMetrics.changeFeedCacheMissRate.mark();
cacheHit = false;
changeFeedCacheEntry = getNextChangeFeed(partitionPath, cosmosChangeFeedFindToken.getStartContinuationToken());
// invalidate the previous token's cache
changeFeedCache.remove(cosmosChangeFeedFindToken.getCacheSessionId());
index = 0;
}
long resultSize = 0;
List<CloudBlobMetadata> fetchedEntries = changeFeedCacheEntry.getFetchedEntries();
while (true) {
if (index < fetchedEntries.size()) {
if (cacheHit) {
azureMetrics.changeFeedCacheHitRate.mark();
cacheHit = false;
}
if (resultSize + fetchedEntries.get(index).getSize() < maxTotalSizeOfEntries || resultSize == 0) {
nextEntries.add(fetchedEntries.get(index));
resultSize = resultSize + fetchedEntries.get(index).getSize();
index++;
} else {
break;
}
} else {
// we can reuse the session id in this case, because we know that the cache ran out of new items.
changeFeedCacheEntry = getNextChangeFeed(partitionPath, changeFeedCacheEntry.getEndContinuationToken(), changeFeedCacheEntry.getCacheSessionId());
fetchedEntries = changeFeedCacheEntry.getFetchedEntries();
if (fetchedEntries.isEmpty()) {
// return updated token. The source replication logic will retry replication with updated token.
break;
} else {
azureMetrics.changeFeedCacheRefreshRate.mark();
}
index = 0;
}
}
FindToken updatedToken = new CosmosChangeFeedFindToken(cosmosChangeFeedFindToken.getBytesRead() + resultSize, changeFeedCacheEntry.getStartContinuationToken(), changeFeedCacheEntry.getEndContinuationToken(), index, changeFeedCacheEntry.getFetchedEntries().size(), changeFeedCacheEntry.getCacheSessionId(), cosmosChangeFeedFindToken.getVersion());
changeFeedCache.put(changeFeedCacheEntry.getCacheSessionId(), new ChangeFeedCacheEntry(changeFeedCacheEntry));
return new FindResult(nextEntries, updatedToken);
} finally {
operationTimer.stop();
}
}
use of com.github.ambry.cloud.CloudBlobMetadata in project ambry by linkedin.
the class CosmosDataAccessor method getContainerBlobs.
/**
* Get the list of blobs in the specified partition that belong to the specified container.
* @param partitionPath the partition to query.
* @param accountId account id of the container.
* @param containerId container id of the container.
* @param queryLimit max number of blobs to return.
* @return a List of {@link CloudBlobMetadata} referencing the blobs belonging to the deprecated containers.
* @throws DocumentClientException in case of any error.
*/
List<CloudBlobMetadata> getContainerBlobs(String partitionPath, short accountId, short containerId, int queryLimit) throws DocumentClientException {
SqlQuerySpec querySpec = new SqlQuerySpec(CONTAINER_BLOBS_QUERY, new SqlParameterCollection(new SqlParameter(LIMIT_PARAM, queryLimit), new SqlParameter(CONTAINER_ID_PARAM, containerId), new SqlParameter(ACCOUNT_ID_PARAM, accountId)));
FeedOptions feedOptions = new FeedOptions();
feedOptions.setMaxItemCount(queryLimit);
feedOptions.setResponseContinuationTokenLimitInKb(continuationTokenLimitKb);
feedOptions.setPartitionKey(new PartitionKey(partitionPath));
try {
Iterator<FeedResponse<Document>> iterator = executeCosmosQuery(partitionPath, querySpec, feedOptions, azureMetrics.deletedContainerBlobsQueryTime).getIterator();
List<CloudBlobMetadata> containerBlobsList = new ArrayList<>();
double requestCharge = 0.0;
while (iterator.hasNext()) {
FeedResponse<Document> response = iterator.next();
requestCharge += response.getRequestCharge();
response.getResults().iterator().forEachRemaining(doc -> containerBlobsList.add(createMetadataFromDocument(doc)));
}
if (requestCharge >= requestChargeThreshold) {
logger.info("Deleted container blobs query partition {} containerId {} accountId {} request charge {} for {} records", partitionPath, containerId, accountId, requestCharge, containerBlobsList.size());
}
return containerBlobsList;
} catch (RuntimeException rex) {
if (rex.getCause() instanceof DocumentClientException) {
logger.warn("Dead blobs query {} partition {} got {}", querySpec.getQueryText(), partitionPath, ((DocumentClientException) rex.getCause()).getStatusCode());
throw (DocumentClientException) rex.getCause();
}
throw rex;
}
}
use of com.github.ambry.cloud.CloudBlobMetadata in project ambry by linkedin.
the class CosmosDataAccessor method bulkDeleteMetadata.
/**
* Delete the blob metadata documents from CosmosDB using the BulkDelete stored procedure.
* @param blobMetadataList the list of blob metadata documents to delete.
* @return the number of documents deleted.
* @throws DocumentClientException if the operation failed.
*/
private int bulkDeleteMetadata(List<CloudBlobMetadata> blobMetadataList) throws DocumentClientException {
String partitionPath = blobMetadataList.get(0).getPartitionId();
RequestOptions options = getRequestOptions(partitionPath);
// stored proc link provided in config. Test for it at startup and use if available.
String quotedBlobIds = blobMetadataList.stream().map(metadata -> '"' + metadata.getId() + '"').collect(Collectors.joining(","));
String query = String.format(BULK_DELETE_QUERY, quotedBlobIds);
String sprocLink = cosmosCollectionLink + BULK_DELETE_SPROC;
boolean more = true;
int deleteCount = 0;
double requestCharge = 0;
try {
while (more) {
StoredProcedureResponse response = asyncDocumentClient.executeStoredProcedure(sprocLink, options, new String[] { query }).toBlocking().single();
requestCharge += response.getRequestCharge();
Document responseDoc = response.getResponseAsDocument();
more = responseDoc.getBoolean(PROPERTY_CONTINUATION);
deleteCount += responseDoc.getInt(PROPERTY_DELETED);
}
if (requestCharge >= requestChargeThreshold) {
logger.info("Bulk delete partition {} request charge {} for {} records", partitionPath, requestCharge, deleteCount);
}
return deleteCount;
} catch (RuntimeException rex) {
if (rex.getCause() instanceof DocumentClientException) {
throw (DocumentClientException) rex.getCause();
} else {
throw rex;
}
}
}
use of com.github.ambry.cloud.CloudBlobMetadata in project ambry by linkedin.
the class CosmosDataAccessor method queryMetadata.
/**
* Get the list of blobs in the specified partition matching the specified DocumentDB query spec.
* @param partitionPath the partition to query.
* @param querySpec the DocumentDB {@link SqlQuerySpec} to execute.
* @param timer the {@link Timer} to use to record query time (excluding waiting).
* @return a List of {@link CloudBlobMetadata} referencing the matching blobs.
*/
List<CloudBlobMetadata> queryMetadata(String partitionPath, SqlQuerySpec querySpec, Timer timer) throws DocumentClientException {
FeedOptions feedOptions = new FeedOptions();
// TODO: set maxItemCount
feedOptions.setResponseContinuationTokenLimitInKb(continuationTokenLimitKb);
feedOptions.setPartitionKey(new PartitionKey(partitionPath));
// TODO: consolidate error count here
try {
Iterator<FeedResponse<Document>> iterator = executeCosmosQuery(partitionPath, querySpec, feedOptions, timer).getIterator();
List<CloudBlobMetadata> metadataList = new ArrayList<>();
double requestCharge = 0.0;
while (iterator.hasNext()) {
FeedResponse<Document> response = iterator.next();
requestCharge += response.getRequestCharge();
response.getResults().iterator().forEachRemaining(doc -> metadataList.add(createMetadataFromDocument(doc)));
}
if (requestCharge >= requestChargeThreshold) {
logger.info("Query partition {} request charge {} for {} records", partitionPath, requestCharge, metadataList.size());
}
return metadataList;
} catch (RuntimeException rex) {
if (rex.getCause() instanceof DocumentClientException) {
logger.warn("Query {} on partition {} got {}", querySpec.getQueryText(), partitionPath, ((DocumentClientException) rex.getCause()).getStatusCode());
throw (DocumentClientException) rex.getCause();
}
throw rex;
}
}
use of com.github.ambry.cloud.CloudBlobMetadata in project ambry by linkedin.
the class AzureBlobDataAccessorTest method testPurge.
/**
* Test purge
*/
@Test
public void testPurge() throws Exception {
// purge 3 blobs, response status (202, 404, 503)
String blobNameOkStatus = "andromeda";
String blobNameNotFoundStatus = "sirius";
String blobNameErrorStatus = "mutant";
BlobBatch mockBatch = mock(BlobBatch.class);
when(mockBatchClient.getBlobBatch()).thenReturn(mockBatch);
Response<Void> okResponse = mock(Response.class);
when(okResponse.getStatusCode()).thenReturn(202);
when(mockBatch.deleteBlob(anyString(), endsWith(blobNameOkStatus))).thenReturn(okResponse);
BlobStorageException notFoundException = mock(BlobStorageException.class);
when(notFoundException.getStatusCode()).thenReturn(404);
Response<Void> notFoundResponse = mock(Response.class);
when(notFoundResponse.getStatusCode()).thenThrow(notFoundException);
when(mockBatch.deleteBlob(anyString(), endsWith(blobNameNotFoundStatus))).thenReturn(notFoundResponse);
BlobStorageException badException = mock(BlobStorageException.class);
when(badException.getStatusCode()).thenReturn(503);
Response<Void> badResponse = mock(Response.class);
when(badResponse.getStatusCode()).thenThrow(badException);
when(mockBatch.deleteBlob(anyString(), endsWith(blobNameErrorStatus))).thenReturn(badResponse);
List<CloudBlobMetadata> purgeList = new ArrayList<>();
purgeList.add(new CloudBlobMetadata().setId(blobNameOkStatus));
purgeList.add(new CloudBlobMetadata().setId(blobNameNotFoundStatus));
// Purge first 2 and expect success
List<CloudBlobMetadata> purgeResponseList = dataAccessor.purgeBlobs(purgeList);
assertEquals("Wrong response size", 2, purgeResponseList.size());
assertEquals("Wrong blob name", blobNameOkStatus, purgeResponseList.get(0).getId());
assertEquals("Wrong blob name", blobNameNotFoundStatus, purgeResponseList.get(1).getId());
// Including last one should fail
purgeList.add(new CloudBlobMetadata().setId(blobNameErrorStatus));
try {
dataAccessor.purgeBlobs(purgeList);
fail("Expected purge to fail");
} catch (BlobStorageException bex) {
assertEquals("Unexpected status code", 503, bex.getStatusCode());
}
}
Aggregations