use of com.github.ambry.utils.Time in project ambry by linkedin.
the class MockSelector method testConnectionReplenishment.
/**
* Test that connections get replenished in {@link SocketNetworkClient#sendAndPoll(List, Set, int)} to maintain the minimum
* number of active connections.
*/
@Test
public void testConnectionReplenishment() {
AtomicInteger nextCorrelationId = new AtomicInteger(1);
Function<Integer, List<RequestInfo>> requestGen = numRequests -> IntStream.range(0, numRequests).mapToObj(i -> new RequestInfo(sslHost, sslPort, new MockSend(nextCorrelationId.getAndIncrement()), replicaOnSslNode, null)).collect(Collectors.toList());
// 1 host x 1 port x 3 connections x 100%
int warmUpPercentage = 100;
AtomicInteger expectedConnectCalls = new AtomicInteger(warmUpPercentage * 3 / 100);
Runnable checkConnectCalls = () -> Assert.assertEquals(expectedConnectCalls.get(), selector.connectCallCount());
networkClient.warmUpConnections(Collections.singletonList(replicaOnSslNode.getDataNodeId()), warmUpPercentage, TIME_FOR_WARM_UP_MS, new ArrayList<>());
checkConnectCalls.run();
selector.setState(MockSelectorState.Good);
// 1. this sendAndPoll() should use one of the pre-warmed connections
List<ResponseInfo> responseInfoList = networkClient.sendAndPoll(requestGen.apply(3), Collections.emptySet(), POLL_TIMEOUT_MS);
checkConnectCalls.run();
Assert.assertEquals(3, responseInfoList.size());
responseInfoList.forEach(ResponseInfo::release);
// 2. this sendAndPoll() should disconnect two of the pre-warmed connections
selector.setState(MockSelectorState.DisconnectOnSend);
responseInfoList = networkClient.sendAndPoll(requestGen.apply(2), Collections.emptySet(), POLL_TIMEOUT_MS);
checkConnectCalls.run();
Assert.assertEquals(2, responseInfoList.size());
responseInfoList.forEach(ResponseInfo::release);
// 3. the two connections lost in the previous sendAndPoll should not be replenished yet since a second has not yet
// passed since startup
selector.setState(MockSelectorState.Good);
responseInfoList = networkClient.sendAndPoll(requestGen.apply(1), Collections.emptySet(), POLL_TIMEOUT_MS);
checkConnectCalls.run();
Assert.assertEquals(1, responseInfoList.size());
responseInfoList.forEach(ResponseInfo::release);
// 4. one of the connection lost in sendAndPoll 3 should be replenished
time.setCurrentMilliseconds(time.milliseconds() + Time.MsPerSec);
selector.setState(MockSelectorState.Good);
responseInfoList = networkClient.sendAndPoll(requestGen.apply(0), Collections.emptySet(), POLL_TIMEOUT_MS);
expectedConnectCalls.addAndGet(1);
checkConnectCalls.run();
Assert.assertEquals(0, responseInfoList.size());
// 5. no connections replenished this time since only half a second passed.
time.setCurrentMilliseconds(time.milliseconds() + 500);
selector.setState(MockSelectorState.Good);
responseInfoList = networkClient.sendAndPoll(requestGen.apply(0), Collections.emptySet(), POLL_TIMEOUT_MS);
checkConnectCalls.run();
Assert.assertEquals(0, responseInfoList.size());
// 6. the second connection lost in sendAndPoll 3 should be replenished
time.setCurrentMilliseconds(time.milliseconds() + 500);
selector.setState(MockSelectorState.Good);
responseInfoList = networkClient.sendAndPoll(requestGen.apply(0), Collections.emptySet(), POLL_TIMEOUT_MS);
expectedConnectCalls.addAndGet(1);
checkConnectCalls.run();
Assert.assertEquals(0, responseInfoList.size());
// 7. this call should use the existing connections in the pool
selector.setState(MockSelectorState.Good);
responseInfoList = networkClient.sendAndPoll(requestGen.apply(3), Collections.emptySet(), POLL_TIMEOUT_MS);
checkConnectCalls.run();
Assert.assertEquals(3, responseInfoList.size());
responseInfoList.forEach(ResponseInfo::release);
}
use of com.github.ambry.utils.Time in project ambry by linkedin.
the class ReplicaThread method replicate.
/**
* Do replication for replicas grouped by {@link DataNodeId}
* A replication cycle between two replicas involves the following steps:
* 1. Exchange metadata : fetch the metadata of blobs added to remote replica since the last synchronization point
* and filter the ones missing in local store.
* 2. Fetch missing blobs: fetch the missing blobs by issuing GET request to remote replica and write them to
* the local store
*
* During cross-colo replication, depending on the {@link ReplicationModelType}, the missing blobs are either fetched
* from all remote replicas (if modelType == ALL_TO_ALL) or only fetched for local leader replicas from their remote
* leader replicas (if modelType == LEADER_BASED). In the latter case, non-leader replica pairs (leader <-> standby,
* standby <-> leader, standby <-> standby) will get their missing blobs from their corresponding leader<->leader
* exchanges and intra-dc replication.
*
* Here is a table listing on what is exchanged between local and remote replicas based on their roles
* (leader/standby) when {@link ReplicationModelType is LEADER_BASED}.
*
* | Local Leader | Local Standby | Remote Leader | Remote Standby
* -------------------------------------------------------------------------------------
* Leader: | --- | metadata and data | metadata and data | metadata only
* Standby: | metadata and data | metadata and data | metadata only | metadata only
*/
public void replicate() {
boolean allCaughtUp = true;
Map<DataNodeId, List<RemoteReplicaInfo>> dataNodeToRemoteReplicaInfo = getRemoteReplicaInfos();
logger.trace("Replicating from {} DataNodes.", replicasToReplicateGroupedByNode.size());
for (Map.Entry<DataNodeId, List<RemoteReplicaInfo>> entry : dataNodeToRemoteReplicaInfo.entrySet()) {
DataNodeId remoteNode = entry.getKey();
if (!running) {
break;
}
List<RemoteReplicaInfo> replicasToReplicatePerNode = entry.getValue();
Timer.Context context = null;
Timer.Context portTypeBasedContext = null;
if (replicatingFromRemoteColo) {
context = replicationMetrics.interColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
if (replicatingOverSsl) {
portTypeBasedContext = replicationMetrics.sslInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
} else {
portTypeBasedContext = replicationMetrics.plainTextInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
}
} else {
context = replicationMetrics.intraColoReplicationLatency.time();
if (replicatingOverSsl) {
portTypeBasedContext = replicationMetrics.sslIntraColoReplicationLatency.time();
} else {
portTypeBasedContext = replicationMetrics.plainTextIntraColoReplicationLatency.time();
}
}
ConnectedChannel connectedChannel = null;
long checkoutConnectionTimeInMs = -1;
long exchangeMetadataTimeInMs = -1;
long fixMissingStoreKeysTimeInMs = -1;
long replicationStartTimeInMs = time.milliseconds();
long startTimeInMs = replicationStartTimeInMs;
// Get a list of active replicas that needs be included for this replication cycle
List<RemoteReplicaInfo> activeReplicasPerNode = new ArrayList<>();
List<RemoteReplicaInfo> standbyReplicasWithNoProgress = new ArrayList<>();
for (RemoteReplicaInfo remoteReplicaInfo : replicasToReplicatePerNode) {
ReplicaId replicaId = remoteReplicaInfo.getReplicaId();
boolean inBackoff = time.milliseconds() < remoteReplicaInfo.getReEnableReplicationTime();
if (replicaId.isDown() || inBackoff || remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE || replicationDisabledPartitions.contains(replicaId.getPartitionId())) {
logger.debug("Skipping replication on replica {} because one of following conditions is true: remote replica is down " + "= {}; in backoff = {}; local store is offline = {}; replication is disabled = {}.", replicaId.getPartitionId().toPathString(), replicaId.isDown(), inBackoff, remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE, replicationDisabledPartitions.contains(replicaId.getPartitionId()));
continue;
}
if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
// check if all missing keys for standby replicas from previous replication cycle are now obtained
// via leader replica. If we still have missing keys, don't include them in current replication cycle
// to avoid sending duplicate metadata requests since their token wouldn't have advanced.
processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
if (containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo)) {
standbyReplicasWithNoProgress.add(remoteReplicaInfo);
continue;
}
}
activeReplicasPerNode.add(remoteReplicaInfo);
}
logger.trace("Replicating from {} RemoteReplicaInfos.", activeReplicasPerNode.size());
// use a variable to track current replica list to replicate (for logging purpose)
List<RemoteReplicaInfo> currentReplicaList = activeReplicasPerNode;
try {
if (activeReplicasPerNode.size() > 0) {
allCaughtUp = false;
// if maxReplicaCountPerRequest > 0, split remote replicas on same node into multiple lists; otherwise there is
// no limit.
List<List<RemoteReplicaInfo>> activeReplicaSubLists = maxReplicaCountPerRequest > 0 ? Utils.partitionList(activeReplicasPerNode, maxReplicaCountPerRequest) : Collections.singletonList(activeReplicasPerNode);
startTimeInMs = time.milliseconds();
connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), activeReplicasPerNode.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
// we checkout ConnectedChannel once and replicate remote replicas in batch via same ConnectedChannel
for (List<RemoteReplicaInfo> replicaSubList : activeReplicaSubLists) {
exchangeMetadataTimeInMs = -1;
fixMissingStoreKeysTimeInMs = -1;
currentReplicaList = replicaSubList;
logger.debug("Exchanging metadata with {} remote replicas on {}", currentReplicaList.size(), remoteNode);
startTimeInMs = time.milliseconds();
List<ExchangeMetadataResponse> exchangeMetadataResponseList = exchangeMetadata(connectedChannel, replicaSubList);
exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
// If leader based replication is enabled and we are replicating from remote colo, fetch the missing blobs
// only for local leader replicas from their corresponding peer leader replicas (Leader <-> Leader).
// Non-leader replica pairs (standby <-> leaders, leader <-> standby, standby <-> standby) will get their
// missing blobs from their leader pair exchanges and intra-dc replication.
List<RemoteReplicaInfo> leaderReplicaList = new ArrayList<>();
List<ExchangeMetadataResponse> exchangeMetadataResponseListForLeaderReplicas = new ArrayList<>();
getLeaderReplicaList(replicaSubList, exchangeMetadataResponseList, leaderReplicaList, exchangeMetadataResponseListForLeaderReplicas);
replicaSubList = leaderReplicaList;
exchangeMetadataResponseList = exchangeMetadataResponseListForLeaderReplicas;
}
if (replicaSubList.size() > 0) {
startTimeInMs = time.milliseconds();
fixMissingStoreKeys(connectedChannel, replicaSubList, exchangeMetadataResponseList, false);
fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
}
}
}
if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
// Get a list of blocked standby replicas whose missing keys haven't arrived for long time.
// Use case: In leader-based cross colo replication, standby replicas don't send GET requests for missing keys
// found in metadata exchange and expect them to come via leader <-> leader replication.
// This is a safety condition to ensure that standby replicas are not stuck waiting for the keys to come from leader
// by fetching the missing keys themselves.
// TODO: As an improvement to this, we can first fetch missing blobs from local leader/other replicas in intra-dc first.
// TODO: If the result to fetch a blob from local dc is Blob_Not_Found, then we can fetch it from replicas in remote datacenter.
// This will involve co-ordination between replica threads containing replicas of same partition.
List<RemoteReplicaInfo> standbyReplicasTimedOutOnNoProgress = getRemoteStandbyReplicasTimedOutOnNoProgress(standbyReplicasWithNoProgress);
if (standbyReplicasTimedOutOnNoProgress.size() > 0) {
allCaughtUp = false;
currentReplicaList = standbyReplicasTimedOutOnNoProgress;
if (connectedChannel == null) {
checkoutConnectionTimeInMs = -1;
startTimeInMs = time.milliseconds();
connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), standbyReplicasTimedOutOnNoProgress.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
}
List<ExchangeMetadataResponse> exchangeMetadataResponseListForBlockedReplicas = standbyReplicasTimedOutOnNoProgress.stream().map(remoteReplicaInfo -> new ExchangeMetadataResponse(remoteReplicaInfo.getExchangeMetadataResponse())).collect(Collectors.toList());
// Convert (and cache) the remote keys that are being fetched as the StoreKeyConverter would have cleared
// these keys from its cache while it is replicating with other replicas before time out happened for these standby replicas.
List<StoreKey> storeKeysToConvert = exchangeMetadataResponseListForBlockedReplicas.stream().map(ExchangeMetadataResponse::getMissingStoreKeys).flatMap(Collection::stream).collect(Collectors.toList());
convertStoreKeys(storeKeysToConvert);
exchangeMetadataTimeInMs = 0;
fixMissingStoreKeysTimeInMs = -1;
logger.debug("Sending GET request to fetch missing keys for standby remote replicas {} timed out on no progress", currentReplicaList);
startTimeInMs = time.milliseconds();
fixMissingStoreKeys(connectedChannel, standbyReplicasTimedOutOnNoProgress, exchangeMetadataResponseListForBlockedReplicas, true);
fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
}
}
} catch (Throwable e) {
if (checkoutConnectionTimeInMs == -1) {
// throwable happened in checkout connection phase
checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
responseHandler.onEvent(currentReplicaList.get(0).getReplicaId(), e);
} else if (exchangeMetadataTimeInMs == -1) {
// throwable happened in exchange metadata phase
exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
} else if (fixMissingStoreKeysTimeInMs == -1) {
// throwable happened in fix missing store phase
fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
}
logger.error("Error while talking to peer: Remote node: {}, Thread name: {}, Remote replicas: {}, Current active " + "remote replica list: {}, Checkout connection time: {}, Exchange metadata time: {}, Fix missing " + "store key time {}", remoteNode, threadName, replicasToReplicatePerNode, currentReplicaList, checkoutConnectionTimeInMs, exchangeMetadataTimeInMs, fixMissingStoreKeysTimeInMs, e);
replicationMetrics.incrementReplicationErrors(replicatingOverSsl);
if (connectedChannel != null) {
connectionPool.destroyConnection(connectedChannel);
connectedChannel = null;
}
} finally {
long totalReplicationTime = time.milliseconds() - replicationStartTimeInMs;
replicationMetrics.updateTotalReplicationTime(totalReplicationTime, replicatingFromRemoteColo, replicatingOverSsl, datacenterName);
if (connectedChannel != null) {
connectionPool.checkInConnection(connectedChannel);
}
context.stop();
portTypeBasedContext.stop();
}
}
long sleepDurationMs = 0;
if (allCaughtUp && replicationConfig.replicationReplicaThreadIdleSleepDurationMs > 0) {
sleepDurationMs = replicationConfig.replicationReplicaThreadIdleSleepDurationMs;
idleCount.inc();
} else if (threadThrottleDurationMs > 0) {
sleepDurationMs = threadThrottleDurationMs;
throttleCount.inc();
}
if (sleepDurationMs > 0) {
try {
long currentTime = time.milliseconds();
time.sleep(sleepDurationMs);
logger.trace("Replica thread: {} slept for {} ms", threadName, time.milliseconds() - currentTime);
} catch (InterruptedException e) {
logger.error("Received interrupted exception during throttling", e);
}
}
}
use of com.github.ambry.utils.Time in project ambry by linkedin.
the class BlobStoreCompactorTest method compactAndVerify.
/**
* Compacts the {@code segmentsUnderCompaction} and verifies sanity of store and data. Also verifies that no change
* occurred if no change was expected and vice versa. Ensures that reloading the log does not have effects on the
* integrity of the store and data.
* @param segmentsUnderCompaction the names of the log segments under compaction.
* @param deleteReferenceTimeMs the reference time in ms to use to decide whether deletes are valid.
* @param changeExpected {@code true} if compaction will cause a change in size of data. {@code false} otherwise.
* @throws Exception
*/
private void compactAndVerify(List<LogSegmentName> segmentsUnderCompaction, long deleteReferenceTimeMs, boolean changeExpected) throws Exception {
long logSegmentSizeSumBeforeCompaction = getSumOfLogSegmentEndOffsets();
long logSegmentCountBeforeCompaction = state.index.getLogSegmentCount();
long indexSegmentCountBeforeCompaction = state.index.getIndexSegments().size();
ScheduledExecutorService scheduler = Utils.newScheduler(1, true);
BlobStoreStats stats = new BlobStoreStats("", state.index, 0, Time.MsPerSec, 0, 100, Time.SecsPerMin, false, purgeDeleteTombstone, state.time, scheduler, scheduler, DISK_IO_SCHEDULER, new StoreMetrics(new MetricRegistry()), 1, false);
NavigableMap<LogSegmentName, Long> validDataSizeFromBlobStoreStats = stats.getValidDataSizeByLogSegment(new TimeRange(deleteReferenceTimeMs, 0L), getFileSpanForLogSegments(segmentsUnderCompaction)).getSecond();
scheduler.shutdown();
long totalSizeAfterCompactionFromBlobStoreStats = segmentsUnderCompaction.stream().mapToLong(validDataSizeFromBlobStoreStats::get).sum();
CompactionDetails details = new CompactionDetails(deleteReferenceTimeMs, segmentsUnderCompaction, null);
long expectedValidDataSize = getValidDataSize(segmentsUnderCompaction, deleteReferenceTimeMs, purgeDeleteTombstone);
assertEquals("Valid size from blob store should be the same as compacted size", expectedValidDataSize, totalSizeAfterCompactionFromBlobStoreStats);
List<LogSegmentName> unaffectedSegments = getUnaffectedSegments(segmentsUnderCompaction);
Pair<Set<MockId>, Set<MockId>> expiredDeletes = new Pair<>(new HashSet<>(), new HashSet<>());
List<LogEntry> validLogEntriesInOrder = getValidLogEntriesInOrder(segmentsUnderCompaction, deleteReferenceTimeMs, expiredDeletes, purgeDeleteTombstone);
Set<MockId> idsInCompactedLogSegments = getIdsWithPutInSegments(segmentsUnderCompaction);
// "compactedDeletes" are those tombstones that should be compacted in single run (if no exception occurs);
// "deletesWithPuts" are those tombstones temporarily with PUTs but may be eligible to be compacted in subsequent cycle
Set<MockId> compactedDeletes = expiredDeletes.getFirst();
Set<MockId> deletesWithPuts = expiredDeletes.getSecond();
compactor = getCompactor(state.log, DISK_IO_SCHEDULER, null, false);
compactor.initialize(state.index);
try {
compactor.compact(details, bundleReadBuffer);
} finally {
compactor.close(0);
}
Set<MockId> remainingBlobIds = getCurrentBlobIdsFromWholeIndex(state.index, null, purgeDeleteTombstone);
// since this method aims to verify success compaction case, we only need to account for some deletes with PUTs are
// compacted in the multi-cycle compaction (i.e. PUT is 1st log segment and gets compacted in 1st cycle. DELETE is
// in 2nd log segment and the 2nd cycle compaction may compact the DELETE as well because source index is updated
// when switching out the 1st log segment and original PUT is not found in source index)
deletesWithPuts.removeAll(remainingBlobIds);
expectedValidDataSize -= deletesWithPuts.size() * DELETE_RECORD_SIZE;
compactedDeletes.addAll(deletesWithPuts);
// remove these deletes from valid log entries (if any)
validLogEntriesInOrder = deletesWithPuts.isEmpty() ? validLogEntriesInOrder : validLogEntriesInOrder.stream().filter(logEntry -> !deletesWithPuts.contains(logEntry.getId())).collect(Collectors.toList());
assertFalse("No compaction should be in progress", CompactionLog.isCompactionInProgress(tempDirStr, STORE_ID));
assertEquals("Swap segments should not be found", 0, compactor.getSwapSegmentsInUse().length);
long logSegmentSizeAfterCompaction = getSumOfLogSegmentEndOffsets();
long logSegmentCountAfterCompaction = state.index.getLogSegmentCount();
long indexSegmentCountAfterCompaction = state.index.getIndexSegments().size();
verifyCompaction(segmentsUnderCompaction, unaffectedSegments, expectedValidDataSize, validLogEntriesInOrder, idsInCompactedLogSegments, deleteReferenceTimeMs, compactedDeletes);
state.reloadLog(true);
verifyCompaction(segmentsUnderCompaction, unaffectedSegments, expectedValidDataSize, validLogEntriesInOrder, idsInCompactedLogSegments, deleteReferenceTimeMs, compactedDeletes);
assertEquals("Sum of log segment capacities changed after reload", logSegmentSizeAfterCompaction, getSumOfLogSegmentEndOffsets());
assertEquals("Log segment count changed after reload", logSegmentCountAfterCompaction, state.index.getLogSegmentCount());
assertEquals("Index segment count changed after reload", indexSegmentCountAfterCompaction, state.index.getIndexSegments().size());
checkVitals(changeExpected, logSegmentSizeSumBeforeCompaction, logSegmentCountBeforeCompaction, indexSegmentCountBeforeCompaction);
verifySavedBytesCount(logSegmentCountBeforeCompaction, 0);
}
use of com.github.ambry.utils.Time in project ambry by linkedin.
the class BlobStoreStats method getContainerStorageStats.
/**
* Gets the storage stats for all serviceIds and their containerIds as of now (the time when the API is called).
* Storage stats is comprised of 3 values: 1. valid data size (logicalStorageUsage) 2. physical data size 3. number of blobs.
* The following data are considered as valid data for this API:
* 1. PUT with no expiry and no corresponding DELETE
* 2. PUT expiring at t_exp but t_exp_ref < t_exp
* 3. PUT with corresponding DELETE at time t_delete but t_del_ref < t_delete
* For this API, t_ref is specified by the given reference time.
* For physical data size, all the records will be taken into consideration, including DELETED PUT, even DELETE record itself.
* For number of blobs, it includes all different blob ids.
* @param referenceTimeInMs the reference time in ms until which deletes and expiration are relevant
* @return the storage stats of each container in the form of a nested {@link Map} of accountId to another map of containerId
* to {@link ContainerStorageStats}.
*/
Map<Short, Map<Short, ContainerStorageStats>> getContainerStorageStats(long referenceTimeInMs) throws StoreException {
if (!enabled.get()) {
throw new StoreException(String.format("BlobStoreStats is not enabled or closing for store %s", storeId), StoreErrorCodes.Store_Shutting_Down);
}
Map<Short, Map<Short, Long>> validSizeMap = null;
Map<Short, Map<Short, Long>> physicalUsageMap = null;
Map<Short, Map<Short, Long>> numberStoreKeyMap = null;
ScanResults currentScanResults = scanResults.get();
if (currentScanResults != null && isWithinRange(currentScanResults.containerForecastStartTimeMs, currentScanResults.containerForecastEndTimeMs, referenceTimeInMs)) {
validSizeMap = currentScanResults.getValidSizePerContainer(referenceTimeInMs);
physicalUsageMap = currentScanResults.getContainerPhysicalStorageUsage();
numberStoreKeyMap = currentScanResults.getContainerNumberOfStoreKeys();
} else {
if (isScanning && isWithinRange(indexScanner.newScanResults.containerForecastStartTimeMs, indexScanner.newScanResults.containerForecastEndTimeMs, referenceTimeInMs)) {
scanLock.lock();
try {
if (isScanning) {
if (waitCondition.await(waitTimeoutInSecs, TimeUnit.SECONDS)) {
currentScanResults = scanResults.get();
if (isWithinRange(currentScanResults.containerForecastStartTimeMs, currentScanResults.containerForecastEndTimeMs, referenceTimeInMs)) {
validSizeMap = currentScanResults.getValidSizePerContainer(referenceTimeInMs);
physicalUsageMap = currentScanResults.getContainerPhysicalStorageUsage();
numberStoreKeyMap = currentScanResults.getContainerNumberOfStoreKeys();
}
} else {
metrics.blobStoreStatsIndexScannerErrorCount.inc();
logger.error("Timed out while waiting for BlobStoreStats index scan to complete for store {}", storeId);
}
} else {
currentScanResults = scanResults.get();
if (isWithinRange(currentScanResults.containerForecastStartTimeMs, currentScanResults.containerForecastEndTimeMs, referenceTimeInMs)) {
validSizeMap = currentScanResults.getValidSizePerContainer(referenceTimeInMs);
physicalUsageMap = currentScanResults.getContainerPhysicalStorageUsage();
numberStoreKeyMap = currentScanResults.getContainerNumberOfStoreKeys();
}
}
} catch (InterruptedException e) {
metrics.blobStoreStatsIndexScannerErrorCount.inc();
throw new IllegalStateException(String.format("Illegal state, wait for scan to complete is interrupted for store %s", storeId), e);
} finally {
scanLock.unlock();
}
}
if (validSizeMap == null) {
// 3. rare edge case where currentScanResults updated twice since the start of the wait.
return collectContainerStorageStats(referenceTimeInMs);
}
}
Map<Short, Map<Short, ContainerStorageStats>> retValue = new HashMap<>();
for (short accountId : validSizeMap.keySet()) {
for (short containerId : validSizeMap.get(accountId).keySet()) {
retValue.computeIfAbsent(accountId, k -> new HashMap<>()).put(containerId, new ContainerStorageStats(containerId, validSizeMap.get(accountId).get(containerId), physicalUsageMap.get(accountId).get(containerId), numberStoreKeyMap.get(accountId).get(containerId)));
}
}
return retValue;
}
use of com.github.ambry.utils.Time in project ambry by linkedin.
the class BlobStoreStats method collectContainerStorageStats.
/**
* Walk through the entire index and collect storage stats per container.
* @param referenceTimeInMs the reference time in ms until which deletes and expiration are relevant
* @return a nested {@link Map} of serviceId to containerId to {@link ContainerStorageStats}.
*/
private Map<Short, Map<Short, ContainerStorageStats>> collectContainerStorageStats(long referenceTimeInMs) throws StoreException {
logger.trace("On demand index scanning to collect container valid data sizes for store {} wrt ref time {}", storeId, referenceTimeInMs);
long startTimeMs = time.milliseconds();
Map<StoreKey, IndexFinalState> keyFinalStates = new HashMap<>();
Map<Short, Map<Short, Long>> validDataSizePerContainer = new HashMap<>();
Map<Short, Map<Short, Long>> physicalDataSizePerContainer = new HashMap<>();
Map<Short, Map<Short, Long>> storeKeysPerContainer = new HashMap<>();
Map<Short, Map<Short, ContainerStorageStats>> result = new HashMap<>();
int indexSegmentCount = 0;
for (IndexSegment indexSegment : index.getIndexSegments().descendingMap().values()) {
if (!enabled.get()) {
throw new StoreException(String.format("BlobStoreStats is not enabled or closing for store %s", storeId), StoreErrorCodes.Store_Shutting_Down);
}
long indexSegmentStartProcessTimeMs = time.milliseconds();
diskIOScheduler.getSlice(BlobStoreStats.IO_SCHEDULER_JOB_TYPE, BlobStoreStats.IO_SCHEDULER_JOB_ID, indexSegment.size());
forEachIndexEntry(indexSegment, referenceTimeInMs, time.milliseconds(), null, keyFinalStates, true, (entry, isValid) -> {
IndexValue indexValue = entry.getValue();
if (isValid && indexValue.isPut()) {
// delete and TTL update records does not count towards valid data size for usage (containers)
updateNestedMapHelper(validDataSizePerContainer, indexValue.getAccountId(), indexValue.getContainerId(), indexValue.getSize());
}
updateNestedMapHelper(physicalDataSizePerContainer, indexValue.getAccountId(), indexValue.getContainerId(), indexValue.getSize());
updateNestedMapHelper(storeKeysPerContainer, indexValue.getAccountId(), indexValue.getContainerId(), (long) (indexValue.isPut() ? 1 : 0));
});
metrics.statsOnDemandScanTimePerIndexSegmentMs.update(time.milliseconds() - indexSegmentStartProcessTimeMs, TimeUnit.MILLISECONDS);
indexSegmentCount++;
if (indexSegmentCount == 1 || indexSegmentCount % 10 == 0) {
logger.info("Container Stats: Index segment {} processing complete (on-demand scanning) for store {}", indexSegment.getFile().getName(), storeId);
}
}
for (short accountId : validDataSizePerContainer.keySet()) {
for (short containerId : validDataSizePerContainer.get(accountId).keySet()) {
result.computeIfAbsent(accountId, k -> new HashMap<>()).put(containerId, new ContainerStorageStats(containerId, validDataSizePerContainer.get(accountId).get(containerId), physicalDataSizePerContainer.get(accountId).get(containerId), storeKeysPerContainer.get(accountId).get(containerId)));
}
}
// The remaining index entries in keyFinalStates are DELETE tombstones left by compaction (whose associated PUT is not found)
updateDeleteTombstoneStats(keyFinalStates.values());
metrics.statsOnDemandScanTotalTimeMs.update(time.milliseconds() - startTimeMs, TimeUnit.MILLISECONDS);
return result;
}
Aggregations