use of com.github.ambry.cloud.FindResult in project ambry by linkedin.
the class AzureCloudDestinationTest method testFindEntriesSinceWithUniqueUpdateTimes.
/**
* Test findEntriesSince with all entries having unique updateTimes.
* @throws Exception
*/
private void testFindEntriesSinceWithUniqueUpdateTimes(AzureCloudDestination azureDest) throws Exception {
long chunkSize = 110000;
// between 9 and 10 chunks
long maxTotalSize = 1000000;
long startTime = System.currentTimeMillis() - TimeUnit.DAYS.toMillis(1);
int totalBlobs = 20;
// create metadata list where total size > maxTotalSize
List<Document> docList = new ArrayList<>();
List<String> blobIdList = new ArrayList<>();
for (int j = 0; j < totalBlobs; j++) {
BlobId blobId = generateBlobId();
blobIdList.add(blobId.getID());
CloudBlobMetadata inputMetadata = new CloudBlobMetadata(blobId, creationTime, Utils.Infinite_Time, chunkSize, CloudBlobMetadata.EncryptionOrigin.NONE);
inputMetadata.setUploadTime(startTime + j);
docList.add(AzureTestUtils.createDocumentFromCloudBlobMetadata(inputMetadata, startTime + j));
}
Observable<FeedResponse<Document>> mockResponse = mock(Observable.class);
mockObservableForQuery(docList, mockResponse);
when(mockumentClient.queryDocuments(anyString(), any(SqlQuerySpec.class), any(FeedOptions.class))).thenReturn(mockResponse);
CosmosUpdateTimeFindToken findToken = new CosmosUpdateTimeFindToken();
// Run the query
FindResult findResult = azureDest.findEntriesSince(blobId.getPartition().toPathString(), findToken, maxTotalSize);
List<CloudBlobMetadata> firstResult = findResult.getMetadataList();
findToken = (CosmosUpdateTimeFindToken) findResult.getUpdatedFindToken();
assertEquals("Did not get expected doc count", maxTotalSize / chunkSize, firstResult.size());
docList = docList.subList(firstResult.size(), docList.size());
assertEquals("Find token has wrong last update time", findToken.getLastUpdateTime(), firstResult.get(firstResult.size() - 1).getLastUpdateTime());
assertEquals("Find token has wrong lastUpdateTimeReadBlobIds", findToken.getLastUpdateTimeReadBlobIds(), new HashSet<>(Collections.singletonList(firstResult.get(firstResult.size() - 1).getId())));
mockObservableForQuery(docList, mockResponse);
findResult = azureDest.findEntriesSince(blobId.getPartition().toPathString(), findToken, maxTotalSize);
List<CloudBlobMetadata> secondResult = findResult.getMetadataList();
findToken = (CosmosUpdateTimeFindToken) findResult.getUpdatedFindToken();
assertEquals("Unexpected doc count", maxTotalSize / chunkSize, secondResult.size());
assertEquals("Unexpected first blobId", blobIdList.get(firstResult.size()), secondResult.get(0).getId());
mockObservableForQuery(docList, mockResponse);
findResult = azureDest.findEntriesSince(blobId.getPartition().toPathString(), findToken, chunkSize / 2);
// Rerun with max size below blob size, and make sure it returns one result
assertEquals("Expected one result", 1, findResult.getMetadataList().size());
}
use of com.github.ambry.cloud.FindResult in project ambry by linkedin.
the class AzureCloudDestinationTest method testFindEntriesSinceWithNonUniqueUpdateTimes.
/**
* Test findEntriesSince with entries having non unique updateTimes.
* @throws Exception
*/
private void testFindEntriesSinceWithNonUniqueUpdateTimes(AzureCloudDestination azureDest) throws Exception {
long chunkSize = 110000;
// between 9 and 10 chunks
long maxTotalSize = 1000000;
long startTime = System.currentTimeMillis() - TimeUnit.DAYS.toMillis(1);
int totalBlobs = 20;
// create metadata list where total size > maxTotalSize
List<Document> docList = new ArrayList<>();
List<String> blobIdList = new ArrayList<>();
for (int j = 0; j < totalBlobs - 1; j++) {
BlobId blobId = generateBlobId();
blobIdList.add(blobId.getID());
CloudBlobMetadata inputMetadata = new CloudBlobMetadata(blobId, creationTime, Utils.Infinite_Time, chunkSize, CloudBlobMetadata.EncryptionOrigin.NONE);
docList.add(AzureTestUtils.createDocumentFromCloudBlobMetadata(inputMetadata, startTime));
}
BlobId blobId = generateBlobId();
blobIdList.add(blobId.getID());
CloudBlobMetadata inputMetadata = new CloudBlobMetadata(blobId, creationTime, Utils.Infinite_Time, chunkSize, CloudBlobMetadata.EncryptionOrigin.NONE);
docList.add(AzureTestUtils.createDocumentFromCloudBlobMetadata(inputMetadata, startTime + 1));
Observable<FeedResponse<Document>> mockResponse = mock(Observable.class);
mockObservableForQuery(docList, mockResponse);
when(mockumentClient.queryDocuments(anyString(), any(SqlQuerySpec.class), any(FeedOptions.class))).thenReturn(mockResponse);
CosmosUpdateTimeFindToken findToken = new CosmosUpdateTimeFindToken();
// Run the query
FindResult findResult = azureDest.findEntriesSince(blobId.getPartition().toPathString(), findToken, maxTotalSize);
List<CloudBlobMetadata> firstResult = findResult.getMetadataList();
findToken = (CosmosUpdateTimeFindToken) findResult.getUpdatedFindToken();
assertEquals("Did not get expected doc count", maxTotalSize / chunkSize, firstResult.size());
assertEquals("Find token has wrong last update time", findToken.getLastUpdateTime(), firstResult.get(firstResult.size() - 1).getLastUpdateTime());
Set<String> resultBlobIdSet = firstResult.stream().map(CloudBlobMetadata::getId).collect(Collectors.toSet());
assertEquals("Find token has wrong lastUpdateTimeReadBlobIds", findToken.getLastUpdateTimeReadBlobIds(), resultBlobIdSet);
mockObservableForQuery(docList, mockResponse);
findResult = azureDest.findEntriesSince(blobId.getPartition().toPathString(), findToken, maxTotalSize);
List<CloudBlobMetadata> secondResult = findResult.getMetadataList();
CosmosUpdateTimeFindToken secondFindToken = (CosmosUpdateTimeFindToken) findResult.getUpdatedFindToken();
assertEquals("Unexpected doc count", maxTotalSize / chunkSize, secondResult.size());
assertEquals("Unexpected first blobId", blobIdList.get(firstResult.size()), secondResult.get(0).getId());
assertEquals("Find token has wrong last update time", secondFindToken.getLastUpdateTime(), firstResult.get(firstResult.size() - 1).getLastUpdateTime());
resultBlobIdSet.addAll(secondResult.stream().map(CloudBlobMetadata::getId).collect(Collectors.toSet()));
assertEquals("Find token has wrong lastUpdateTimeReadBlobIds", secondFindToken.getLastUpdateTimeReadBlobIds(), resultBlobIdSet);
mockObservableForQuery(docList, mockResponse);
// Rerun with max size below blob size, and make sure it returns one result
findResult = azureDest.findEntriesSince(blobId.getPartition().toPathString(), findToken, chunkSize / 2);
List<CloudBlobMetadata> finalResult = findResult.getMetadataList();
assertEquals("Expected one result", 1, finalResult.size());
mockObservableForQuery(docList, mockResponse);
// Rerun final time, and make sure that it returns all the remaining blobs
findResult = azureDest.findEntriesSince(blobId.getPartition().toPathString(), secondFindToken, maxTotalSize);
List<CloudBlobMetadata> thirdResult = findResult.getMetadataList();
CosmosUpdateTimeFindToken thirdFindToken = (CosmosUpdateTimeFindToken) findResult.getUpdatedFindToken();
assertEquals("Unexpected doc count", totalBlobs - (firstResult.size() + secondResult.size()), thirdResult.size());
assertEquals("Unexpected first blobId", blobIdList.get(firstResult.size() + secondResult.size()), thirdResult.get(0).getId());
assertEquals("Find token has wrong last update time", thirdFindToken.getLastUpdateTime(), startTime + 1);
assertEquals("Find token has wrong lastUpdateTimeReadBlobIds", thirdFindToken.getLastUpdateTimeReadBlobIds(), new HashSet<>(Collections.singletonList(thirdResult.get(thirdResult.size() - 1).getId())));
}
use of com.github.ambry.cloud.FindResult in project ambry by linkedin.
the class CosmosChangeFeedBasedReplicationFeed method getNextEntriesAndUpdatedToken.
/**
* Get next set of change feed entries for the specified partition, after the {@code curFindToken}.
* The number of entries is capped by maxEntriesSize.
* This method creates a cache for change feed entries. If the {@code curFindToken} is not valid,
* or if all the items in the cache are consumed, then it queries Cosmos for new entries.
* @param curFindToken {@link FindToken} after which the next entries have to be returned.
* @param maxTotalSizeOfEntries maximum size of all the blobs returned.
* @param partitionPath Partition for which change feed entries have to be returned.
* @return {@link FindResult} instance that contains updated {@link FindToken} object which can act as a bookmark for
* subsequent requests, and {@link List} of {@link CloudBlobMetadata} entries.
* @throws DocumentClientException if any cosmos query encounters error.
*/
@Override
public FindResult getNextEntriesAndUpdatedToken(FindToken curFindToken, long maxTotalSizeOfEntries, String partitionPath) throws DocumentClientException {
Timer.Context operationTimer = azureMetrics.replicationFeedQueryTime.time();
try {
List<CloudBlobMetadata> nextEntries = new ArrayList<>();
CosmosChangeFeedFindToken cosmosChangeFeedFindToken = (CosmosChangeFeedFindToken) curFindToken;
int index = cosmosChangeFeedFindToken.getIndex();
ChangeFeedCacheEntry changeFeedCacheEntry = changeFeedCache.get(cosmosChangeFeedFindToken.getCacheSessionId());
boolean cacheHit = true;
if (changeFeedCacheEntry == null || !isCacheValid(partitionPath, cosmosChangeFeedFindToken, changeFeedCacheEntry)) {
// the cache may not be valid. So we cannot use session id
azureMetrics.changeFeedCacheMissRate.mark();
cacheHit = false;
changeFeedCacheEntry = getNextChangeFeed(partitionPath, cosmosChangeFeedFindToken.getStartContinuationToken());
// invalidate the previous token's cache
changeFeedCache.remove(cosmosChangeFeedFindToken.getCacheSessionId());
index = 0;
}
long resultSize = 0;
List<CloudBlobMetadata> fetchedEntries = changeFeedCacheEntry.getFetchedEntries();
while (true) {
if (index < fetchedEntries.size()) {
if (cacheHit) {
azureMetrics.changeFeedCacheHitRate.mark();
cacheHit = false;
}
if (resultSize + fetchedEntries.get(index).getSize() < maxTotalSizeOfEntries || resultSize == 0) {
nextEntries.add(fetchedEntries.get(index));
resultSize = resultSize + fetchedEntries.get(index).getSize();
index++;
} else {
break;
}
} else {
// we can reuse the session id in this case, because we know that the cache ran out of new items.
changeFeedCacheEntry = getNextChangeFeed(partitionPath, changeFeedCacheEntry.getEndContinuationToken(), changeFeedCacheEntry.getCacheSessionId());
fetchedEntries = changeFeedCacheEntry.getFetchedEntries();
if (fetchedEntries.isEmpty()) {
// return updated token. The source replication logic will retry replication with updated token.
break;
} else {
azureMetrics.changeFeedCacheRefreshRate.mark();
}
index = 0;
}
}
FindToken updatedToken = new CosmosChangeFeedFindToken(cosmosChangeFeedFindToken.getBytesRead() + resultSize, changeFeedCacheEntry.getStartContinuationToken(), changeFeedCacheEntry.getEndContinuationToken(), index, changeFeedCacheEntry.getFetchedEntries().size(), changeFeedCacheEntry.getCacheSessionId(), cosmosChangeFeedFindToken.getVersion());
changeFeedCache.put(changeFeedCacheEntry.getCacheSessionId(), new ChangeFeedCacheEntry(changeFeedCacheEntry));
return new FindResult(nextEntries, updatedToken);
} finally {
operationTimer.stop();
}
}
use of com.github.ambry.cloud.FindResult in project ambry by linkedin.
the class AzureCloudDestinationTest method testFindEntriesSinceUsingChangeFeed.
/**
* Test findEntriesSince when cloud destination uses change feed based token.
*/
@Test
public void testFindEntriesSinceUsingChangeFeed() throws Exception {
long chunkSize = 110000;
// between 9 and 10 chunks
long maxTotalSize = 1000000;
long startTime = System.currentTimeMillis() - TimeUnit.DAYS.toMillis(1);
int totalBlobs = 20;
// create metadata list where total size > maxTotalSize
List<String> blobIdList = new ArrayList<>();
List<CloudBlobMetadata> cloudBlobMetadataList = new ArrayList<>();
for (int j = 0; j < totalBlobs; j++) {
BlobId blobId = generateBlobId();
blobIdList.add(blobId.getID());
CloudBlobMetadata inputMetadata = new CloudBlobMetadata(blobId, creationTime, Utils.Infinite_Time, chunkSize, CloudBlobMetadata.EncryptionOrigin.NONE);
inputMetadata.setUploadTime(startTime + j);
cloudBlobMetadataList.add(inputMetadata);
}
MockChangeFeedQuery mockChangeFeedQuery = new MockChangeFeedQuery();
AzureReplicationFeed azureReplicationFeed = null;
try {
azureReplicationFeed = new CosmosChangeFeedBasedReplicationFeed(mockChangeFeedQuery, azureMetrics, azureDest.getQueryBatchSize());
FieldSetter.setField(azureDest, azureDest.getClass().getDeclaredField("azureReplicationFeed"), azureReplicationFeed);
cloudBlobMetadataList.stream().forEach(doc -> mockChangeFeedQuery.add(doc));
CosmosChangeFeedFindToken findToken = new CosmosChangeFeedFindToken();
// Run the query
FindResult findResult = azureDest.findEntriesSince(blobId.getPartition().toPathString(), findToken, maxTotalSize);
List<CloudBlobMetadata> firstResult = findResult.getMetadataList();
findToken = (CosmosChangeFeedFindToken) findResult.getUpdatedFindToken();
assertEquals("Did not get expected doc count", maxTotalSize / chunkSize, firstResult.size());
assertEquals("Find token has wrong end continuation token", (findToken).getIndex(), firstResult.size());
assertEquals("Find token has wrong totalItems count", (findToken).getTotalItems(), Math.min(blobIdList.size(), azureDest.getQueryBatchSize()));
assertEquals("Unexpected change feed cache miss count", 1, azureMetrics.changeFeedCacheMissRate.getCount());
assertEquals("Unexpected change feed cache refresh count", 0, azureMetrics.changeFeedCacheRefreshRate.getCount());
assertEquals("Unexpected change feed cache hit count", 0, azureMetrics.changeFeedCacheHitRate.getCount());
cloudBlobMetadataList = cloudBlobMetadataList.subList(firstResult.size(), cloudBlobMetadataList.size());
findResult = azureDest.findEntriesSince(blobId.getPartition().toPathString(), findToken, maxTotalSize);
List<CloudBlobMetadata> secondResult = findResult.getMetadataList();
findToken = (CosmosChangeFeedFindToken) findResult.getUpdatedFindToken();
assertEquals("Unexpected doc count", maxTotalSize / chunkSize, secondResult.size());
assertEquals("Unexpected first blobId", blobIdList.get(firstResult.size()), secondResult.get(0).getId());
assertEquals("Find token has wrong totalItems count", (findToken).getTotalItems(), Math.min(blobIdList.size(), azureDest.getQueryBatchSize()));
assertEquals("Unexpected change feed cache miss count", 1, azureMetrics.changeFeedCacheMissRate.getCount());
assertEquals("Unexpected change feed cache refresh count", 0, azureMetrics.changeFeedCacheRefreshRate.getCount());
assertEquals("Unexpected change feed cache hit count", 1, azureMetrics.changeFeedCacheHitRate.getCount());
// Rerun with max size below blob size, and make sure it returns one result
findResult = azureDest.findEntriesSince(blobId.getPartition().toPathString(), findToken, chunkSize - 1);
List<CloudBlobMetadata> thirdResult = findResult.getMetadataList();
assertEquals("Expected one result", 1, thirdResult.size());
findToken = (CosmosChangeFeedFindToken) findResult.getUpdatedFindToken();
assertEquals("Unexpected change feed cache miss count", 1, azureMetrics.changeFeedCacheMissRate.getCount());
assertEquals("Unexpected change feed cache refresh count", 0, azureMetrics.changeFeedCacheRefreshRate.getCount());
assertEquals("Unexpected change feed cache hit count", 2, azureMetrics.changeFeedCacheHitRate.getCount());
// Add more than AzureCloudConfig.cosmosQueryBatchSize blobs and test for correct change feed cache hits and misses.
AzureCloudConfig azureConfig = new AzureCloudConfig(new VerifiableProperties(configProps));
for (int j = 0; j < azureConfig.cosmosQueryBatchSize + 5; j++) {
BlobId blobId = generateBlobId();
blobIdList.add(blobId.getID());
CloudBlobMetadata inputMetadata = new CloudBlobMetadata(blobId, creationTime, Utils.Infinite_Time, 10, CloudBlobMetadata.EncryptionOrigin.NONE);
inputMetadata.setUploadTime(startTime + j);
cloudBlobMetadataList.add(inputMetadata);
}
cloudBlobMetadataList.stream().forEach(doc -> mockChangeFeedQuery.add(doc));
// Final correct query to drain out all the blobs and trigger a cache refresh.
String prevEndToken = findToken.getEndContinuationToken();
findResult = azureDest.findEntriesSince(blobId.getPartition().toPathString(), findToken, 1000000);
findToken = (CosmosChangeFeedFindToken) findResult.getUpdatedFindToken();
assertEquals("Unexpected change feed cache miss count", 1, azureMetrics.changeFeedCacheMissRate.getCount());
assertEquals("Unexpected change feed cache refresh count", 1, azureMetrics.changeFeedCacheRefreshRate.getCount());
assertEquals("Unexpected change feed cache hit count", 3, azureMetrics.changeFeedCacheHitRate.getCount());
assertEquals("Since this would have triggered refresh, start token should have been previous token's end token", prevEndToken, findToken.getStartContinuationToken());
// Query changefeed with invalid token and check for cache miss
testFindEntriesSinceUsingChangeFeedWithInvalidToken(findToken);
} finally {
if (azureReplicationFeed != null) {
azureReplicationFeed.close();
}
}
}
use of com.github.ambry.cloud.FindResult in project ambry by linkedin.
the class AzureCloudDestinationTest method testFindEntriesSinceUsingChangeFeedWithInvalidToken.
/**
* Query changefeed with invalid token and check for cache miss.
* @param findToken {@link CosmosChangeFeedFindToken} to continue from.
* @throws CloudStorageException
*/
private void testFindEntriesSinceUsingChangeFeedWithInvalidToken(CosmosChangeFeedFindToken findToken) throws CloudStorageException {
// Invalid session id.
CosmosChangeFeedFindToken invalidFindToken = new CosmosChangeFeedFindToken(findToken.getBytesRead(), findToken.getStartContinuationToken(), findToken.getEndContinuationToken(), findToken.getIndex(), findToken.getTotalItems(), UUID.randomUUID().toString(), findToken.getVersion());
FindResult findResult = azureDest.findEntriesSince(blobId.getPartition().toPathString(), invalidFindToken, 10);
findToken = (CosmosChangeFeedFindToken) findResult.getUpdatedFindToken();
assertEquals("Unexpected change feed cache miss count", 2, azureMetrics.changeFeedCacheMissRate.getCount());
assertEquals("Unexpected change feed cache refresh count", 1, azureMetrics.changeFeedCacheRefreshRate.getCount());
assertEquals("Unexpected change feed cache hit count", 3, azureMetrics.changeFeedCacheHitRate.getCount());
// invalid end token.
invalidFindToken = new CosmosChangeFeedFindToken(findToken.getBytesRead(), findToken.getStartContinuationToken(), "5000", findToken.getIndex(), findToken.getTotalItems(), findToken.getCacheSessionId(), findToken.getVersion());
findResult = azureDest.findEntriesSince(blobId.getPartition().toPathString(), invalidFindToken, 10);
findToken = (CosmosChangeFeedFindToken) findResult.getUpdatedFindToken();
assertEquals("Unexpected change feed cache miss count", 3, azureMetrics.changeFeedCacheMissRate.getCount());
assertEquals("Unexpected change feed cache refresh count", 1, azureMetrics.changeFeedCacheRefreshRate.getCount());
assertEquals("Unexpected change feed cache hit count", 3, azureMetrics.changeFeedCacheHitRate.getCount());
// invalid start token.
invalidFindToken = new CosmosChangeFeedFindToken(findToken.getBytesRead(), "5000", findToken.getEndContinuationToken(), findToken.getIndex(), findToken.getTotalItems(), findToken.getCacheSessionId(), findToken.getVersion());
try {
azureDest.findEntriesSince(blobId.getPartition().toPathString(), invalidFindToken, 10);
} catch (Exception ex) {
}
assertEquals("Unexpected change feed cache miss count", 4, azureMetrics.changeFeedCacheMissRate.getCount());
assertEquals("Unexpected change feed cache refresh count", 1, azureMetrics.changeFeedCacheRefreshRate.getCount());
assertEquals("Unexpected change feed cache hit count", 3, azureMetrics.changeFeedCacheHitRate.getCount());
// invalid total items.
invalidFindToken = new CosmosChangeFeedFindToken(findToken.getBytesRead(), findToken.getStartContinuationToken(), findToken.getEndContinuationToken(), findToken.getIndex(), 9000, findToken.getCacheSessionId(), findToken.getVersion());
azureDest.findEntriesSince(blobId.getPartition().toPathString(), invalidFindToken, 10);
assertEquals("Unexpected change feed cache miss count", 5, azureMetrics.changeFeedCacheMissRate.getCount());
assertEquals("Unexpected change feed cache refresh count", 1, azureMetrics.changeFeedCacheRefreshRate.getCount());
assertEquals("Unexpected change feed cache hit count", 3, azureMetrics.changeFeedCacheHitRate.getCount());
}
Aggregations