Search in sources :

Example 36 with CloudBlobMetadata

use of com.github.ambry.cloud.CloudBlobMetadata in project ambry by linkedin.

the class CosmosChangeFeedBasedReplicationFeed method getNextEntriesAndUpdatedToken.

/**
 * Get next set of change feed entries for the specified partition, after the {@code curFindToken}.
 * The number of entries is capped by maxEntriesSize.
 * This method creates a cache for change feed entries. If the {@code curFindToken} is not valid,
 * or if all the items in the cache are consumed, then it queries Cosmos for new entries.
 * @param curFindToken {@link FindToken} after which the next entries have to be returned.
 * @param maxTotalSizeOfEntries maximum size of all the blobs returned.
 * @param partitionPath Partition for which change feed entries have to be returned.
 * @return {@link FindResult} instance that contains updated {@link FindToken} object which can act as a bookmark for
 * subsequent requests, and {@link List} of {@link CloudBlobMetadata} entries.
 * @throws DocumentClientException if any cosmos query encounters error.
 */
@Override
public FindResult getNextEntriesAndUpdatedToken(FindToken curFindToken, long maxTotalSizeOfEntries, String partitionPath) throws DocumentClientException {
    Timer.Context operationTimer = azureMetrics.replicationFeedQueryTime.time();
    try {
        List<CloudBlobMetadata> nextEntries = new ArrayList<>();
        CosmosChangeFeedFindToken cosmosChangeFeedFindToken = (CosmosChangeFeedFindToken) curFindToken;
        int index = cosmosChangeFeedFindToken.getIndex();
        ChangeFeedCacheEntry changeFeedCacheEntry = changeFeedCache.get(cosmosChangeFeedFindToken.getCacheSessionId());
        boolean cacheHit = true;
        if (changeFeedCacheEntry == null || !isCacheValid(partitionPath, cosmosChangeFeedFindToken, changeFeedCacheEntry)) {
            // the cache may not be valid. So we cannot use session id
            azureMetrics.changeFeedCacheMissRate.mark();
            cacheHit = false;
            changeFeedCacheEntry = getNextChangeFeed(partitionPath, cosmosChangeFeedFindToken.getStartContinuationToken());
            // invalidate the previous token's cache
            changeFeedCache.remove(cosmosChangeFeedFindToken.getCacheSessionId());
            index = 0;
        }
        long resultSize = 0;
        List<CloudBlobMetadata> fetchedEntries = changeFeedCacheEntry.getFetchedEntries();
        while (true) {
            if (index < fetchedEntries.size()) {
                if (cacheHit) {
                    azureMetrics.changeFeedCacheHitRate.mark();
                    cacheHit = false;
                }
                if (resultSize + fetchedEntries.get(index).getSize() < maxTotalSizeOfEntries || resultSize == 0) {
                    nextEntries.add(fetchedEntries.get(index));
                    resultSize = resultSize + fetchedEntries.get(index).getSize();
                    index++;
                } else {
                    break;
                }
            } else {
                // we can reuse the session id in this case, because we know that the cache ran out of new items.
                changeFeedCacheEntry = getNextChangeFeed(partitionPath, changeFeedCacheEntry.getEndContinuationToken(), changeFeedCacheEntry.getCacheSessionId());
                fetchedEntries = changeFeedCacheEntry.getFetchedEntries();
                if (fetchedEntries.isEmpty()) {
                    // return updated token. The source replication logic will retry replication with updated token.
                    break;
                } else {
                    azureMetrics.changeFeedCacheRefreshRate.mark();
                }
                index = 0;
            }
        }
        FindToken updatedToken = new CosmosChangeFeedFindToken(cosmosChangeFeedFindToken.getBytesRead() + resultSize, changeFeedCacheEntry.getStartContinuationToken(), changeFeedCacheEntry.getEndContinuationToken(), index, changeFeedCacheEntry.getFetchedEntries().size(), changeFeedCacheEntry.getCacheSessionId(), cosmosChangeFeedFindToken.getVersion());
        changeFeedCache.put(changeFeedCacheEntry.getCacheSessionId(), new ChangeFeedCacheEntry(changeFeedCacheEntry));
        return new FindResult(nextEntries, updatedToken);
    } finally {
        operationTimer.stop();
    }
}
Also used : Timer(com.codahale.metrics.Timer) CloudBlobMetadata(com.github.ambry.cloud.CloudBlobMetadata) FindToken(com.github.ambry.replication.FindToken) ArrayList(java.util.ArrayList) FindResult(com.github.ambry.cloud.FindResult)

Example 37 with CloudBlobMetadata

use of com.github.ambry.cloud.CloudBlobMetadata in project ambry by linkedin.

the class CosmosDataAccessor method getContainerBlobs.

/**
 * Get the list of blobs in the specified partition that belong to the specified container.
 * @param partitionPath the partition to query.
 * @param accountId account id of the container.
 * @param containerId container id of the container.
 * @param queryLimit max number of blobs to return.
 * @return a List of {@link CloudBlobMetadata} referencing the blobs belonging to the deprecated containers.
 * @throws DocumentClientException in case of any error.
 */
List<CloudBlobMetadata> getContainerBlobs(String partitionPath, short accountId, short containerId, int queryLimit) throws DocumentClientException {
    SqlQuerySpec querySpec = new SqlQuerySpec(CONTAINER_BLOBS_QUERY, new SqlParameterCollection(new SqlParameter(LIMIT_PARAM, queryLimit), new SqlParameter(CONTAINER_ID_PARAM, containerId), new SqlParameter(ACCOUNT_ID_PARAM, accountId)));
    FeedOptions feedOptions = new FeedOptions();
    feedOptions.setMaxItemCount(queryLimit);
    feedOptions.setResponseContinuationTokenLimitInKb(continuationTokenLimitKb);
    feedOptions.setPartitionKey(new PartitionKey(partitionPath));
    try {
        Iterator<FeedResponse<Document>> iterator = executeCosmosQuery(partitionPath, querySpec, feedOptions, azureMetrics.deletedContainerBlobsQueryTime).getIterator();
        List<CloudBlobMetadata> containerBlobsList = new ArrayList<>();
        double requestCharge = 0.0;
        while (iterator.hasNext()) {
            FeedResponse<Document> response = iterator.next();
            requestCharge += response.getRequestCharge();
            response.getResults().iterator().forEachRemaining(doc -> containerBlobsList.add(createMetadataFromDocument(doc)));
        }
        if (requestCharge >= requestChargeThreshold) {
            logger.info("Deleted container blobs query partition {} containerId {} accountId {} request charge {} for {} records", partitionPath, containerId, accountId, requestCharge, containerBlobsList.size());
        }
        return containerBlobsList;
    } catch (RuntimeException rex) {
        if (rex.getCause() instanceof DocumentClientException) {
            logger.warn("Dead blobs query {} partition {} got {}", querySpec.getQueryText(), partitionPath, ((DocumentClientException) rex.getCause()).getStatusCode());
            throw (DocumentClientException) rex.getCause();
        }
        throw rex;
    }
}
Also used : SqlParameter(com.microsoft.azure.cosmosdb.SqlParameter) CloudBlobMetadata(com.github.ambry.cloud.CloudBlobMetadata) ArrayList(java.util.ArrayList) FeedResponse(com.microsoft.azure.cosmosdb.FeedResponse) Document(com.microsoft.azure.cosmosdb.Document) SqlParameterCollection(com.microsoft.azure.cosmosdb.SqlParameterCollection) SqlQuerySpec(com.microsoft.azure.cosmosdb.SqlQuerySpec) ChangeFeedOptions(com.microsoft.azure.cosmosdb.ChangeFeedOptions) FeedOptions(com.microsoft.azure.cosmosdb.FeedOptions) PartitionKey(com.microsoft.azure.cosmosdb.PartitionKey) DocumentClientException(com.microsoft.azure.cosmosdb.DocumentClientException)

Example 38 with CloudBlobMetadata

use of com.github.ambry.cloud.CloudBlobMetadata in project ambry by linkedin.

the class CosmosDataAccessor method bulkDeleteMetadata.

/**
 * Delete the blob metadata documents from CosmosDB using the BulkDelete stored procedure.
 * @param blobMetadataList the list of blob metadata documents to delete.
 * @return the number of documents deleted.
 * @throws DocumentClientException if the operation failed.
 */
private int bulkDeleteMetadata(List<CloudBlobMetadata> blobMetadataList) throws DocumentClientException {
    String partitionPath = blobMetadataList.get(0).getPartitionId();
    RequestOptions options = getRequestOptions(partitionPath);
    // stored proc link provided in config.  Test for it at startup and use if available.
    String quotedBlobIds = blobMetadataList.stream().map(metadata -> '"' + metadata.getId() + '"').collect(Collectors.joining(","));
    String query = String.format(BULK_DELETE_QUERY, quotedBlobIds);
    String sprocLink = cosmosCollectionLink + BULK_DELETE_SPROC;
    boolean more = true;
    int deleteCount = 0;
    double requestCharge = 0;
    try {
        while (more) {
            StoredProcedureResponse response = asyncDocumentClient.executeStoredProcedure(sprocLink, options, new String[] { query }).toBlocking().single();
            requestCharge += response.getRequestCharge();
            Document responseDoc = response.getResponseAsDocument();
            more = responseDoc.getBoolean(PROPERTY_CONTINUATION);
            deleteCount += responseDoc.getInt(PROPERTY_DELETED);
        }
        if (requestCharge >= requestChargeThreshold) {
            logger.info("Bulk delete partition {} request charge {} for {} records", partitionPath, requestCharge, deleteCount);
        }
        return deleteCount;
    } catch (RuntimeException rex) {
        if (rex.getCause() instanceof DocumentClientException) {
            throw (DocumentClientException) rex.getCause();
        } else {
            throw rex;
        }
    }
}
Also used : HttpURLConnection(java.net.HttpURLConnection) SqlParameterCollection(com.microsoft.azure.cosmosdb.SqlParameterCollection) ConnectionMode(com.microsoft.azure.cosmosdb.ConnectionMode) DocumentClientException(com.microsoft.azure.cosmosdb.DocumentClientException) Date(java.util.Date) LoggerFactory(org.slf4j.LoggerFactory) VcrMetrics(com.github.ambry.cloud.VcrMetrics) ConsistencyLevel(com.microsoft.azure.cosmosdb.ConsistencyLevel) FeedResponse(com.microsoft.azure.cosmosdb.FeedResponse) JSONObject(org.json.JSONObject) Map(java.util.Map) SecretClient(com.azure.security.keyvault.secrets.SecretClient) Container(com.github.ambry.account.Container) StoredProcedureResponse(com.microsoft.azure.cosmosdb.StoredProcedureResponse) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) Collectors(java.util.stream.Collectors) PartitionKey(com.microsoft.azure.cosmosdb.PartitionKey) List(java.util.List) CloudRequestAgent(com.github.ambry.cloud.CloudRequestAgent) SqlQuerySpec(com.microsoft.azure.cosmosdb.SqlQuerySpec) Document(com.microsoft.azure.cosmosdb.Document) Timer(com.codahale.metrics.Timer) BlobId(com.github.ambry.commons.BlobId) ResourceResponse(com.microsoft.azure.cosmosdb.ResourceResponse) RequestOptions(com.microsoft.azure.cosmosdb.RequestOptions) DocumentCollection(com.microsoft.azure.cosmosdb.DocumentCollection) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Callable(java.util.concurrent.Callable) ArrayList(java.util.ArrayList) CloudConfig(com.github.ambry.config.CloudConfig) HashSet(java.util.HashSet) BiConsumer(java.util.function.BiConsumer) CloudStorageException(com.github.ambry.cloud.CloudStorageException) Properties(java.util.Properties) Logger(org.slf4j.Logger) AsyncDocumentClient(com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient) Iterator(java.util.Iterator) VerifiableProperties(com.github.ambry.config.VerifiableProperties) ChangeFeedOptions(com.microsoft.azure.cosmosdb.ChangeFeedOptions) AccessCondition(com.microsoft.azure.cosmosdb.AccessCondition) ConnectionPolicy(com.microsoft.azure.cosmosdb.ConnectionPolicy) SecretClientBuilder(com.azure.security.keyvault.secrets.SecretClientBuilder) HttpConstants(com.microsoft.azure.cosmosdb.internal.HttpConstants) RetryOptions(com.microsoft.azure.cosmosdb.RetryOptions) FeedOptions(com.microsoft.azure.cosmosdb.FeedOptions) StoredProcedure(com.microsoft.azure.cosmosdb.StoredProcedure) SqlParameter(com.microsoft.azure.cosmosdb.SqlParameter) CloudBlobMetadata(com.github.ambry.cloud.CloudBlobMetadata) BlockingObservable(rx.observables.BlockingObservable) RequestOptions(com.microsoft.azure.cosmosdb.RequestOptions) StoredProcedureResponse(com.microsoft.azure.cosmosdb.StoredProcedureResponse) Document(com.microsoft.azure.cosmosdb.Document) DocumentClientException(com.microsoft.azure.cosmosdb.DocumentClientException)

Example 39 with CloudBlobMetadata

use of com.github.ambry.cloud.CloudBlobMetadata in project ambry by linkedin.

the class CosmosDataAccessor method queryMetadata.

/**
 * Get the list of blobs in the specified partition matching the specified DocumentDB query spec.
 * @param partitionPath the partition to query.
 * @param querySpec the DocumentDB {@link SqlQuerySpec} to execute.
 * @param timer the {@link Timer} to use to record query time (excluding waiting).
 * @return a List of {@link CloudBlobMetadata} referencing the matching blobs.
 */
List<CloudBlobMetadata> queryMetadata(String partitionPath, SqlQuerySpec querySpec, Timer timer) throws DocumentClientException {
    FeedOptions feedOptions = new FeedOptions();
    // TODO: set maxItemCount
    feedOptions.setResponseContinuationTokenLimitInKb(continuationTokenLimitKb);
    feedOptions.setPartitionKey(new PartitionKey(partitionPath));
    // TODO: consolidate error count here
    try {
        Iterator<FeedResponse<Document>> iterator = executeCosmosQuery(partitionPath, querySpec, feedOptions, timer).getIterator();
        List<CloudBlobMetadata> metadataList = new ArrayList<>();
        double requestCharge = 0.0;
        while (iterator.hasNext()) {
            FeedResponse<Document> response = iterator.next();
            requestCharge += response.getRequestCharge();
            response.getResults().iterator().forEachRemaining(doc -> metadataList.add(createMetadataFromDocument(doc)));
        }
        if (requestCharge >= requestChargeThreshold) {
            logger.info("Query partition {} request charge {} for {} records", partitionPath, requestCharge, metadataList.size());
        }
        return metadataList;
    } catch (RuntimeException rex) {
        if (rex.getCause() instanceof DocumentClientException) {
            logger.warn("Query {} on partition {} got {}", querySpec.getQueryText(), partitionPath, ((DocumentClientException) rex.getCause()).getStatusCode());
            throw (DocumentClientException) rex.getCause();
        }
        throw rex;
    }
}
Also used : ChangeFeedOptions(com.microsoft.azure.cosmosdb.ChangeFeedOptions) FeedOptions(com.microsoft.azure.cosmosdb.FeedOptions) CloudBlobMetadata(com.github.ambry.cloud.CloudBlobMetadata) ArrayList(java.util.ArrayList) PartitionKey(com.microsoft.azure.cosmosdb.PartitionKey) FeedResponse(com.microsoft.azure.cosmosdb.FeedResponse) Document(com.microsoft.azure.cosmosdb.Document) DocumentClientException(com.microsoft.azure.cosmosdb.DocumentClientException)

Example 40 with CloudBlobMetadata

use of com.github.ambry.cloud.CloudBlobMetadata in project ambry by linkedin.

the class AzureBlobDataAccessorTest method testPurge.

/**
 * Test purge
 */
@Test
public void testPurge() throws Exception {
    // purge 3 blobs, response status (202, 404, 503)
    String blobNameOkStatus = "andromeda";
    String blobNameNotFoundStatus = "sirius";
    String blobNameErrorStatus = "mutant";
    BlobBatch mockBatch = mock(BlobBatch.class);
    when(mockBatchClient.getBlobBatch()).thenReturn(mockBatch);
    Response<Void> okResponse = mock(Response.class);
    when(okResponse.getStatusCode()).thenReturn(202);
    when(mockBatch.deleteBlob(anyString(), endsWith(blobNameOkStatus))).thenReturn(okResponse);
    BlobStorageException notFoundException = mock(BlobStorageException.class);
    when(notFoundException.getStatusCode()).thenReturn(404);
    Response<Void> notFoundResponse = mock(Response.class);
    when(notFoundResponse.getStatusCode()).thenThrow(notFoundException);
    when(mockBatch.deleteBlob(anyString(), endsWith(blobNameNotFoundStatus))).thenReturn(notFoundResponse);
    BlobStorageException badException = mock(BlobStorageException.class);
    when(badException.getStatusCode()).thenReturn(503);
    Response<Void> badResponse = mock(Response.class);
    when(badResponse.getStatusCode()).thenThrow(badException);
    when(mockBatch.deleteBlob(anyString(), endsWith(blobNameErrorStatus))).thenReturn(badResponse);
    List<CloudBlobMetadata> purgeList = new ArrayList<>();
    purgeList.add(new CloudBlobMetadata().setId(blobNameOkStatus));
    purgeList.add(new CloudBlobMetadata().setId(blobNameNotFoundStatus));
    // Purge first 2 and expect success
    List<CloudBlobMetadata> purgeResponseList = dataAccessor.purgeBlobs(purgeList);
    assertEquals("Wrong response size", 2, purgeResponseList.size());
    assertEquals("Wrong blob name", blobNameOkStatus, purgeResponseList.get(0).getId());
    assertEquals("Wrong blob name", blobNameNotFoundStatus, purgeResponseList.get(1).getId());
    // Including last one should fail
    purgeList.add(new CloudBlobMetadata().setId(blobNameErrorStatus));
    try {
        dataAccessor.purgeBlobs(purgeList);
        fail("Expected purge to fail");
    } catch (BlobStorageException bex) {
        assertEquals("Unexpected status code", 503, bex.getStatusCode());
    }
}
Also used : CloudBlobMetadata(com.github.ambry.cloud.CloudBlobMetadata) ArrayList(java.util.ArrayList) BlobStorageException(com.azure.storage.blob.models.BlobStorageException) BlobBatch(com.azure.storage.blob.batch.BlobBatch) Test(org.junit.Test)

Aggregations

CloudBlobMetadata (com.github.ambry.cloud.CloudBlobMetadata)55 BlobId (com.github.ambry.commons.BlobId)27 Test (org.junit.Test)25 ArrayList (java.util.ArrayList)19 Document (com.microsoft.azure.cosmosdb.Document)14 PartitionId (com.github.ambry.clustermap.PartitionId)12 FeedResponse (com.microsoft.azure.cosmosdb.FeedResponse)12 FeedOptions (com.microsoft.azure.cosmosdb.FeedOptions)11 SqlQuerySpec (com.microsoft.azure.cosmosdb.SqlQuerySpec)11 MockPartitionId (com.github.ambry.clustermap.MockPartitionId)10 InputStream (java.io.InputStream)10 VerifiableProperties (com.github.ambry.config.VerifiableProperties)9 ByteArrayInputStream (java.io.ByteArrayInputStream)8 Timer (com.codahale.metrics.Timer)7 FindResult (com.github.ambry.cloud.FindResult)7 MetricRegistry (com.codahale.metrics.MetricRegistry)6 CloudStorageException (com.github.ambry.cloud.CloudStorageException)6 ChangeFeedOptions (com.microsoft.azure.cosmosdb.ChangeFeedOptions)6 DocumentClientException (com.microsoft.azure.cosmosdb.DocumentClientException)6 VcrMetrics (com.github.ambry.cloud.VcrMetrics)5