Search in sources :

Example 6 with Time

use of com.github.ambry.utils.Time in project ambry by linkedin.

the class MockSelector method testConnectionReplenishment.

/**
 * Test that connections get replenished in {@link SocketNetworkClient#sendAndPoll(List, Set, int)} to maintain the minimum
 * number of active connections.
 */
@Test
public void testConnectionReplenishment() {
    AtomicInteger nextCorrelationId = new AtomicInteger(1);
    Function<Integer, List<RequestInfo>> requestGen = numRequests -> IntStream.range(0, numRequests).mapToObj(i -> new RequestInfo(sslHost, sslPort, new MockSend(nextCorrelationId.getAndIncrement()), replicaOnSslNode, null)).collect(Collectors.toList());
    // 1 host x 1 port x 3 connections x 100%
    int warmUpPercentage = 100;
    AtomicInteger expectedConnectCalls = new AtomicInteger(warmUpPercentage * 3 / 100);
    Runnable checkConnectCalls = () -> Assert.assertEquals(expectedConnectCalls.get(), selector.connectCallCount());
    networkClient.warmUpConnections(Collections.singletonList(replicaOnSslNode.getDataNodeId()), warmUpPercentage, TIME_FOR_WARM_UP_MS, new ArrayList<>());
    checkConnectCalls.run();
    selector.setState(MockSelectorState.Good);
    // 1. this sendAndPoll() should use one of the pre-warmed connections
    List<ResponseInfo> responseInfoList = networkClient.sendAndPoll(requestGen.apply(3), Collections.emptySet(), POLL_TIMEOUT_MS);
    checkConnectCalls.run();
    Assert.assertEquals(3, responseInfoList.size());
    responseInfoList.forEach(ResponseInfo::release);
    // 2. this sendAndPoll() should disconnect two of the pre-warmed connections
    selector.setState(MockSelectorState.DisconnectOnSend);
    responseInfoList = networkClient.sendAndPoll(requestGen.apply(2), Collections.emptySet(), POLL_TIMEOUT_MS);
    checkConnectCalls.run();
    Assert.assertEquals(2, responseInfoList.size());
    responseInfoList.forEach(ResponseInfo::release);
    // 3. the two connections lost in the previous sendAndPoll should not be replenished yet since a second has not yet
    // passed since startup
    selector.setState(MockSelectorState.Good);
    responseInfoList = networkClient.sendAndPoll(requestGen.apply(1), Collections.emptySet(), POLL_TIMEOUT_MS);
    checkConnectCalls.run();
    Assert.assertEquals(1, responseInfoList.size());
    responseInfoList.forEach(ResponseInfo::release);
    // 4. one of the connection lost in sendAndPoll 3 should be replenished
    time.setCurrentMilliseconds(time.milliseconds() + Time.MsPerSec);
    selector.setState(MockSelectorState.Good);
    responseInfoList = networkClient.sendAndPoll(requestGen.apply(0), Collections.emptySet(), POLL_TIMEOUT_MS);
    expectedConnectCalls.addAndGet(1);
    checkConnectCalls.run();
    Assert.assertEquals(0, responseInfoList.size());
    // 5. no connections replenished this time since only half a second passed.
    time.setCurrentMilliseconds(time.milliseconds() + 500);
    selector.setState(MockSelectorState.Good);
    responseInfoList = networkClient.sendAndPoll(requestGen.apply(0), Collections.emptySet(), POLL_TIMEOUT_MS);
    checkConnectCalls.run();
    Assert.assertEquals(0, responseInfoList.size());
    // 6. the second connection lost in sendAndPoll 3 should be replenished
    time.setCurrentMilliseconds(time.milliseconds() + 500);
    selector.setState(MockSelectorState.Good);
    responseInfoList = networkClient.sendAndPoll(requestGen.apply(0), Collections.emptySet(), POLL_TIMEOUT_MS);
    expectedConnectCalls.addAndGet(1);
    checkConnectCalls.run();
    Assert.assertEquals(0, responseInfoList.size());
    // 7. this call should use the existing connections in the pool
    selector.setState(MockSelectorState.Good);
    responseInfoList = networkClient.sendAndPoll(requestGen.apply(3), Collections.emptySet(), POLL_TIMEOUT_MS);
    checkConnectCalls.run();
    Assert.assertEquals(3, responseInfoList.size());
    responseInfoList.forEach(ResponseInfo::release);
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) ByteBufferChannel(com.github.ambry.utils.ByteBufferChannel) DataNodeId(com.github.ambry.clustermap.DataNodeId) Function(java.util.function.Function) ByteBuffer(java.nio.ByteBuffer) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ByteBuf(io.netty.buffer.ByteBuf) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) After(org.junit.After) NetworkConfig(com.github.ambry.config.NetworkConfig) NettyByteBufLeakHelper(com.github.ambry.utils.NettyByteBufLeakHelper) Time(com.github.ambry.utils.Time) Before(org.junit.Before) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MetricRegistry(com.codahale.metrics.MetricRegistry) Properties(java.util.Properties) VerifiableProperties(com.github.ambry.config.VerifiableProperties) Set(java.util.Set) IOException(java.io.IOException) Test(org.junit.Test) PooledByteBufAllocator(io.netty.buffer.PooledByteBufAllocator) InetSocketAddress(java.net.InetSocketAddress) Collectors(java.util.stream.Collectors) AbstractByteBufHolder(com.github.ambry.utils.AbstractByteBufHolder) List(java.util.List) MockTime(com.github.ambry.utils.MockTime) ReplicaId(com.github.ambry.clustermap.ReplicaId) WritableByteChannel(java.nio.channels.WritableByteChannel) Assert(org.junit.Assert) Collections(java.util.Collections) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.Test)

Example 7 with Time

use of com.github.ambry.utils.Time in project ambry by linkedin.

the class ReplicaThread method replicate.

/**
 * Do replication for replicas grouped by {@link DataNodeId}
 * A replication cycle between two replicas involves the following steps:
 *    1. Exchange metadata : fetch the metadata of blobs added to remote replica since the last synchronization point
 *    and filter the ones missing in local store.
 *    2. Fetch missing blobs: fetch the missing blobs by issuing GET request to remote replica and write them to
 *       the local store
 *
 *  During cross-colo replication, depending on the {@link ReplicationModelType}, the missing blobs are either fetched
 *  from all remote replicas (if modelType == ALL_TO_ALL) or only fetched for local leader replicas from their remote
 *  leader replicas (if modelType == LEADER_BASED). In the latter case, non-leader replica pairs (leader <-> standby,
 *  standby <-> leader, standby <-> standby) will get their missing blobs from their corresponding leader<->leader
 *  exchanges and intra-dc replication.
 *
 *  Here is a table listing on what is exchanged between local and remote replicas based on their roles
 *  (leader/standby) when {@link ReplicationModelType is LEADER_BASED}.
 *
 *              |   Local Leader    |     Local Standby   |   Remote Leader   |  Remote Standby
 *            -------------------------------------------------------------------------------------
 *     Leader:  |        ---        |  metadata and data  | metadata and data |   metadata only
 *     Standby: | metadata and data |  metadata and data  | metadata only     |   metadata only
 */
public void replicate() {
    boolean allCaughtUp = true;
    Map<DataNodeId, List<RemoteReplicaInfo>> dataNodeToRemoteReplicaInfo = getRemoteReplicaInfos();
    logger.trace("Replicating from {} DataNodes.", replicasToReplicateGroupedByNode.size());
    for (Map.Entry<DataNodeId, List<RemoteReplicaInfo>> entry : dataNodeToRemoteReplicaInfo.entrySet()) {
        DataNodeId remoteNode = entry.getKey();
        if (!running) {
            break;
        }
        List<RemoteReplicaInfo> replicasToReplicatePerNode = entry.getValue();
        Timer.Context context = null;
        Timer.Context portTypeBasedContext = null;
        if (replicatingFromRemoteColo) {
            context = replicationMetrics.interColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            if (replicatingOverSsl) {
                portTypeBasedContext = replicationMetrics.sslInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            } else {
                portTypeBasedContext = replicationMetrics.plainTextInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            }
        } else {
            context = replicationMetrics.intraColoReplicationLatency.time();
            if (replicatingOverSsl) {
                portTypeBasedContext = replicationMetrics.sslIntraColoReplicationLatency.time();
            } else {
                portTypeBasedContext = replicationMetrics.plainTextIntraColoReplicationLatency.time();
            }
        }
        ConnectedChannel connectedChannel = null;
        long checkoutConnectionTimeInMs = -1;
        long exchangeMetadataTimeInMs = -1;
        long fixMissingStoreKeysTimeInMs = -1;
        long replicationStartTimeInMs = time.milliseconds();
        long startTimeInMs = replicationStartTimeInMs;
        // Get a list of active replicas that needs be included for this replication cycle
        List<RemoteReplicaInfo> activeReplicasPerNode = new ArrayList<>();
        List<RemoteReplicaInfo> standbyReplicasWithNoProgress = new ArrayList<>();
        for (RemoteReplicaInfo remoteReplicaInfo : replicasToReplicatePerNode) {
            ReplicaId replicaId = remoteReplicaInfo.getReplicaId();
            boolean inBackoff = time.milliseconds() < remoteReplicaInfo.getReEnableReplicationTime();
            if (replicaId.isDown() || inBackoff || remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE || replicationDisabledPartitions.contains(replicaId.getPartitionId())) {
                logger.debug("Skipping replication on replica {} because one of following conditions is true: remote replica is down " + "= {}; in backoff = {}; local store is offline = {}; replication is disabled = {}.", replicaId.getPartitionId().toPathString(), replicaId.isDown(), inBackoff, remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE, replicationDisabledPartitions.contains(replicaId.getPartitionId()));
                continue;
            }
            if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                // check if all missing keys for standby replicas from previous replication cycle are now obtained
                // via leader replica. If we still have missing keys, don't include them in current replication cycle
                // to avoid sending duplicate metadata requests since their token wouldn't have advanced.
                processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
                if (containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo)) {
                    standbyReplicasWithNoProgress.add(remoteReplicaInfo);
                    continue;
                }
            }
            activeReplicasPerNode.add(remoteReplicaInfo);
        }
        logger.trace("Replicating from {} RemoteReplicaInfos.", activeReplicasPerNode.size());
        // use a variable to track current replica list to replicate (for logging purpose)
        List<RemoteReplicaInfo> currentReplicaList = activeReplicasPerNode;
        try {
            if (activeReplicasPerNode.size() > 0) {
                allCaughtUp = false;
                // if maxReplicaCountPerRequest > 0, split remote replicas on same node into multiple lists; otherwise there is
                // no limit.
                List<List<RemoteReplicaInfo>> activeReplicaSubLists = maxReplicaCountPerRequest > 0 ? Utils.partitionList(activeReplicasPerNode, maxReplicaCountPerRequest) : Collections.singletonList(activeReplicasPerNode);
                startTimeInMs = time.milliseconds();
                connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), activeReplicasPerNode.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
                checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
                // we checkout ConnectedChannel once and replicate remote replicas in batch via same ConnectedChannel
                for (List<RemoteReplicaInfo> replicaSubList : activeReplicaSubLists) {
                    exchangeMetadataTimeInMs = -1;
                    fixMissingStoreKeysTimeInMs = -1;
                    currentReplicaList = replicaSubList;
                    logger.debug("Exchanging metadata with {} remote replicas on {}", currentReplicaList.size(), remoteNode);
                    startTimeInMs = time.milliseconds();
                    List<ExchangeMetadataResponse> exchangeMetadataResponseList = exchangeMetadata(connectedChannel, replicaSubList);
                    exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
                    if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                        // If leader based replication is enabled and we are replicating from remote colo, fetch the missing blobs
                        // only for local leader replicas from their corresponding peer leader replicas (Leader <-> Leader).
                        // Non-leader replica pairs (standby <-> leaders, leader <-> standby, standby <-> standby) will get their
                        // missing blobs from their leader pair exchanges and intra-dc replication.
                        List<RemoteReplicaInfo> leaderReplicaList = new ArrayList<>();
                        List<ExchangeMetadataResponse> exchangeMetadataResponseListForLeaderReplicas = new ArrayList<>();
                        getLeaderReplicaList(replicaSubList, exchangeMetadataResponseList, leaderReplicaList, exchangeMetadataResponseListForLeaderReplicas);
                        replicaSubList = leaderReplicaList;
                        exchangeMetadataResponseList = exchangeMetadataResponseListForLeaderReplicas;
                    }
                    if (replicaSubList.size() > 0) {
                        startTimeInMs = time.milliseconds();
                        fixMissingStoreKeys(connectedChannel, replicaSubList, exchangeMetadataResponseList, false);
                        fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
                    }
                }
            }
            if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                // Get a list of blocked standby replicas whose missing keys haven't arrived for long time.
                // Use case: In leader-based cross colo replication, standby replicas don't send GET requests for missing keys
                // found in metadata exchange and expect them to come via leader <-> leader replication.
                // This is a safety condition to ensure that standby replicas are not stuck waiting for the keys to come from leader
                // by fetching the missing keys themselves.
                // TODO: As an improvement to this, we can first fetch missing blobs from local leader/other replicas in intra-dc first.
                // TODO: If the result to fetch a blob from local dc is Blob_Not_Found, then we can fetch it from replicas in remote datacenter.
                // This will involve co-ordination between replica threads containing replicas of same partition.
                List<RemoteReplicaInfo> standbyReplicasTimedOutOnNoProgress = getRemoteStandbyReplicasTimedOutOnNoProgress(standbyReplicasWithNoProgress);
                if (standbyReplicasTimedOutOnNoProgress.size() > 0) {
                    allCaughtUp = false;
                    currentReplicaList = standbyReplicasTimedOutOnNoProgress;
                    if (connectedChannel == null) {
                        checkoutConnectionTimeInMs = -1;
                        startTimeInMs = time.milliseconds();
                        connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), standbyReplicasTimedOutOnNoProgress.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
                        checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
                    }
                    List<ExchangeMetadataResponse> exchangeMetadataResponseListForBlockedReplicas = standbyReplicasTimedOutOnNoProgress.stream().map(remoteReplicaInfo -> new ExchangeMetadataResponse(remoteReplicaInfo.getExchangeMetadataResponse())).collect(Collectors.toList());
                    // Convert (and cache) the remote keys that are being fetched as the StoreKeyConverter would have cleared
                    // these keys from its cache while it is replicating with other replicas before time out happened for these standby replicas.
                    List<StoreKey> storeKeysToConvert = exchangeMetadataResponseListForBlockedReplicas.stream().map(ExchangeMetadataResponse::getMissingStoreKeys).flatMap(Collection::stream).collect(Collectors.toList());
                    convertStoreKeys(storeKeysToConvert);
                    exchangeMetadataTimeInMs = 0;
                    fixMissingStoreKeysTimeInMs = -1;
                    logger.debug("Sending GET request to fetch missing keys for standby remote replicas {} timed out on no progress", currentReplicaList);
                    startTimeInMs = time.milliseconds();
                    fixMissingStoreKeys(connectedChannel, standbyReplicasTimedOutOnNoProgress, exchangeMetadataResponseListForBlockedReplicas, true);
                    fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
                }
            }
        } catch (Throwable e) {
            if (checkoutConnectionTimeInMs == -1) {
                // throwable happened in checkout connection phase
                checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
                responseHandler.onEvent(currentReplicaList.get(0).getReplicaId(), e);
            } else if (exchangeMetadataTimeInMs == -1) {
                // throwable happened in exchange metadata phase
                exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
            } else if (fixMissingStoreKeysTimeInMs == -1) {
                // throwable happened in fix missing store phase
                fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
            }
            logger.error("Error while talking to peer: Remote node: {}, Thread name: {}, Remote replicas: {}, Current active " + "remote replica list: {}, Checkout connection time: {}, Exchange metadata time: {}, Fix missing " + "store key time {}", remoteNode, threadName, replicasToReplicatePerNode, currentReplicaList, checkoutConnectionTimeInMs, exchangeMetadataTimeInMs, fixMissingStoreKeysTimeInMs, e);
            replicationMetrics.incrementReplicationErrors(replicatingOverSsl);
            if (connectedChannel != null) {
                connectionPool.destroyConnection(connectedChannel);
                connectedChannel = null;
            }
        } finally {
            long totalReplicationTime = time.milliseconds() - replicationStartTimeInMs;
            replicationMetrics.updateTotalReplicationTime(totalReplicationTime, replicatingFromRemoteColo, replicatingOverSsl, datacenterName);
            if (connectedChannel != null) {
                connectionPool.checkInConnection(connectedChannel);
            }
            context.stop();
            portTypeBasedContext.stop();
        }
    }
    long sleepDurationMs = 0;
    if (allCaughtUp && replicationConfig.replicationReplicaThreadIdleSleepDurationMs > 0) {
        sleepDurationMs = replicationConfig.replicationReplicaThreadIdleSleepDurationMs;
        idleCount.inc();
    } else if (threadThrottleDurationMs > 0) {
        sleepDurationMs = threadThrottleDurationMs;
        throttleCount.inc();
    }
    if (sleepDurationMs > 0) {
        try {
            long currentTime = time.milliseconds();
            time.sleep(sleepDurationMs);
            logger.trace("Replica thread: {} slept for {} ms", threadName, time.milliseconds() - currentTime);
        } catch (InterruptedException e) {
            logger.error("Received interrupted exception during throttling", e);
        }
    }
}
Also used : GetOption(com.github.ambry.protocol.GetOption) StoreKeyConverter(com.github.ambry.store.StoreKeyConverter) DataNodeId(com.github.ambry.clustermap.DataNodeId) LoggerFactory(org.slf4j.LoggerFactory) MessageFormatWriteSet(com.github.ambry.messageformat.MessageFormatWriteSet) StoreErrorCodes(com.github.ambry.store.StoreErrorCodes) GetResponse(com.github.ambry.protocol.GetResponse) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Counter(com.codahale.metrics.Counter) ReplicaMetadataRequest(com.github.ambry.protocol.ReplicaMetadataRequest) GetRequest(com.github.ambry.protocol.GetRequest) ReplicationConfig(com.github.ambry.config.ReplicationConfig) NotificationSystem(com.github.ambry.notification.NotificationSystem) ReplicaSyncUpManager(com.github.ambry.clustermap.ReplicaSyncUpManager) PartitionResponseInfo(com.github.ambry.protocol.PartitionResponseInfo) Predicate(java.util.function.Predicate) Collection(java.util.Collection) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) Collectors(java.util.stream.Collectors) ConnectedChannel(com.github.ambry.network.ConnectedChannel) ReplicaMetadataRequestInfo(com.github.ambry.protocol.ReplicaMetadataRequestInfo) CountDownLatch(java.util.concurrent.CountDownLatch) StoreKey(com.github.ambry.store.StoreKey) List(java.util.List) ReplicaMetadataResponse(com.github.ambry.protocol.ReplicaMetadataResponse) MessageFormatFlags(com.github.ambry.messageformat.MessageFormatFlags) UpdateType(com.github.ambry.notification.UpdateType) Timer(com.codahale.metrics.Timer) MessageSievingInputStream(com.github.ambry.messageformat.MessageSievingInputStream) PartitionId(com.github.ambry.clustermap.PartitionId) BlobId(com.github.ambry.commons.BlobId) ResponseHandler(com.github.ambry.commons.ResponseHandler) PartitionRequestInfo(com.github.ambry.protocol.PartitionRequestInfo) BlobReplicaSourceType(com.github.ambry.notification.BlobReplicaSourceType) ServerErrorCode(com.github.ambry.server.ServerErrorCode) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) NettyByteBufDataInputStream(com.github.ambry.utils.NettyByteBufDataInputStream) HashSet(java.util.HashSet) Transformer(com.github.ambry.store.Transformer) ChannelOutput(com.github.ambry.network.ChannelOutput) StoreException(com.github.ambry.store.StoreException) ReplicaMetadataResponseInfo(com.github.ambry.protocol.ReplicaMetadataResponseInfo) CloudDataNode(com.github.ambry.clustermap.CloudDataNode) Time(com.github.ambry.utils.Time) ReplicaState(com.github.ambry.clustermap.ReplicaState) MetricRegistry(com.codahale.metrics.MetricRegistry) Logger(org.slf4j.Logger) ReentrantLock(java.util.concurrent.locks.ReentrantLock) ConnectionPool(com.github.ambry.network.ConnectionPool) ClusterMap(com.github.ambry.clustermap.ClusterMap) IOException(java.io.IOException) Condition(java.util.concurrent.locks.Condition) MessageInfo(com.github.ambry.store.MessageInfo) ReplicaId(com.github.ambry.clustermap.ReplicaId) BlobStore(com.github.ambry.store.BlobStore) Collections(java.util.Collections) ArrayList(java.util.ArrayList) ConnectedChannel(com.github.ambry.network.ConnectedChannel) StoreKey(com.github.ambry.store.StoreKey) ReplicaId(com.github.ambry.clustermap.ReplicaId) Timer(com.codahale.metrics.Timer) List(java.util.List) ArrayList(java.util.ArrayList) DataNodeId(com.github.ambry.clustermap.DataNodeId) Map(java.util.Map) HashMap(java.util.HashMap) ClusterMap(com.github.ambry.clustermap.ClusterMap)

Example 8 with Time

use of com.github.ambry.utils.Time in project ambry by linkedin.

the class BlobStoreCompactorTest method compactAndVerify.

/**
 * Compacts the {@code segmentsUnderCompaction} and verifies sanity of store and data. Also verifies that no change
 * occurred if no change was expected and vice versa. Ensures that reloading the log does not have effects on the
 * integrity of the store and data.
 * @param segmentsUnderCompaction the names of the log segments under compaction.
 * @param deleteReferenceTimeMs the reference time in ms to use to decide whether deletes are valid.
 * @param changeExpected {@code true} if compaction will cause a change in size of data. {@code false} otherwise.
 * @throws Exception
 */
private void compactAndVerify(List<LogSegmentName> segmentsUnderCompaction, long deleteReferenceTimeMs, boolean changeExpected) throws Exception {
    long logSegmentSizeSumBeforeCompaction = getSumOfLogSegmentEndOffsets();
    long logSegmentCountBeforeCompaction = state.index.getLogSegmentCount();
    long indexSegmentCountBeforeCompaction = state.index.getIndexSegments().size();
    ScheduledExecutorService scheduler = Utils.newScheduler(1, true);
    BlobStoreStats stats = new BlobStoreStats("", state.index, 0, Time.MsPerSec, 0, 100, Time.SecsPerMin, false, purgeDeleteTombstone, state.time, scheduler, scheduler, DISK_IO_SCHEDULER, new StoreMetrics(new MetricRegistry()), 1, false);
    NavigableMap<LogSegmentName, Long> validDataSizeFromBlobStoreStats = stats.getValidDataSizeByLogSegment(new TimeRange(deleteReferenceTimeMs, 0L), getFileSpanForLogSegments(segmentsUnderCompaction)).getSecond();
    scheduler.shutdown();
    long totalSizeAfterCompactionFromBlobStoreStats = segmentsUnderCompaction.stream().mapToLong(validDataSizeFromBlobStoreStats::get).sum();
    CompactionDetails details = new CompactionDetails(deleteReferenceTimeMs, segmentsUnderCompaction, null);
    long expectedValidDataSize = getValidDataSize(segmentsUnderCompaction, deleteReferenceTimeMs, purgeDeleteTombstone);
    assertEquals("Valid size from blob store should be the same as compacted size", expectedValidDataSize, totalSizeAfterCompactionFromBlobStoreStats);
    List<LogSegmentName> unaffectedSegments = getUnaffectedSegments(segmentsUnderCompaction);
    Pair<Set<MockId>, Set<MockId>> expiredDeletes = new Pair<>(new HashSet<>(), new HashSet<>());
    List<LogEntry> validLogEntriesInOrder = getValidLogEntriesInOrder(segmentsUnderCompaction, deleteReferenceTimeMs, expiredDeletes, purgeDeleteTombstone);
    Set<MockId> idsInCompactedLogSegments = getIdsWithPutInSegments(segmentsUnderCompaction);
    // "compactedDeletes" are those tombstones that should be compacted in single run (if no exception occurs);
    // "deletesWithPuts" are those tombstones temporarily with PUTs but may be eligible to be compacted in subsequent cycle
    Set<MockId> compactedDeletes = expiredDeletes.getFirst();
    Set<MockId> deletesWithPuts = expiredDeletes.getSecond();
    compactor = getCompactor(state.log, DISK_IO_SCHEDULER, null, false);
    compactor.initialize(state.index);
    try {
        compactor.compact(details, bundleReadBuffer);
    } finally {
        compactor.close(0);
    }
    Set<MockId> remainingBlobIds = getCurrentBlobIdsFromWholeIndex(state.index, null, purgeDeleteTombstone);
    // since this method aims to verify success compaction case, we only need to account for some deletes with PUTs are
    // compacted in the multi-cycle compaction (i.e. PUT is 1st log segment and gets compacted in 1st cycle. DELETE is
    // in 2nd log segment and the 2nd cycle compaction may compact the DELETE as well because source index is updated
    // when switching out the 1st log segment and original PUT is not found in source index)
    deletesWithPuts.removeAll(remainingBlobIds);
    expectedValidDataSize -= deletesWithPuts.size() * DELETE_RECORD_SIZE;
    compactedDeletes.addAll(deletesWithPuts);
    // remove these deletes from valid log entries (if any)
    validLogEntriesInOrder = deletesWithPuts.isEmpty() ? validLogEntriesInOrder : validLogEntriesInOrder.stream().filter(logEntry -> !deletesWithPuts.contains(logEntry.getId())).collect(Collectors.toList());
    assertFalse("No compaction should be in progress", CompactionLog.isCompactionInProgress(tempDirStr, STORE_ID));
    assertEquals("Swap segments should not be found", 0, compactor.getSwapSegmentsInUse().length);
    long logSegmentSizeAfterCompaction = getSumOfLogSegmentEndOffsets();
    long logSegmentCountAfterCompaction = state.index.getLogSegmentCount();
    long indexSegmentCountAfterCompaction = state.index.getIndexSegments().size();
    verifyCompaction(segmentsUnderCompaction, unaffectedSegments, expectedValidDataSize, validLogEntriesInOrder, idsInCompactedLogSegments, deleteReferenceTimeMs, compactedDeletes);
    state.reloadLog(true);
    verifyCompaction(segmentsUnderCompaction, unaffectedSegments, expectedValidDataSize, validLogEntriesInOrder, idsInCompactedLogSegments, deleteReferenceTimeMs, compactedDeletes);
    assertEquals("Sum of log segment capacities changed after reload", logSegmentSizeAfterCompaction, getSumOfLogSegmentEndOffsets());
    assertEquals("Log segment count changed after reload", logSegmentCountAfterCompaction, state.index.getLogSegmentCount());
    assertEquals("Index segment count changed after reload", indexSegmentCountAfterCompaction, state.index.getIndexSegments().size());
    checkVitals(changeExpected, logSegmentSizeSumBeforeCompaction, logSegmentCountBeforeCompaction, indexSegmentCountBeforeCompaction);
    verifySavedBytesCount(logSegmentCountBeforeCompaction, 0);
}
Also used : Arrays(java.util.Arrays) ListIterator(java.util.ListIterator) LoggerFactory(org.slf4j.LoggerFactory) ContainerBuilder(com.github.ambry.account.ContainerBuilder) ByteBuffer(java.nio.ByteBuffer) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) PortType(com.github.ambry.network.PortType) TestUtils(com.github.ambry.utils.TestUtils) Map(java.util.Map) After(org.junit.After) EnumSet(java.util.EnumSet) Parameterized(org.junit.runners.Parameterized) Container(com.github.ambry.account.Container) ConcurrentNavigableMap(java.util.concurrent.ConcurrentNavigableMap) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) UUID(java.util.UUID) NavigableMap(java.util.NavigableMap) Collectors(java.util.stream.Collectors) List(java.util.List) MockTime(com.github.ambry.utils.MockTime) SortedMap(java.util.SortedMap) Histogram(com.codahale.metrics.Histogram) SlidingTimeWindowArrayReservoir(com.codahale.metrics.SlidingTimeWindowArrayReservoir) RunWith(org.junit.runner.RunWith) AccountService(com.github.ambry.account.AccountService) HashMap(java.util.HashMap) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Assume(org.junit.Assume) CuratedLogIndexState(com.github.ambry.store.CuratedLogIndexState) Time(com.github.ambry.utils.Time) ByteBufferOutputStream(com.github.ambry.utils.ByteBufferOutputStream) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) StoreConfig(com.github.ambry.config.StoreConfig) MetricRegistry(com.codahale.metrics.MetricRegistry) Properties(java.util.Properties) Logger(org.slf4j.Logger) Pair(com.github.ambry.utils.Pair) Iterator(java.util.Iterator) Files(java.nio.file.Files) VerifiableProperties(com.github.ambry.config.VerifiableProperties) Channels(java.nio.channels.Channels) StoreTestUtils(com.github.ambry.store.StoreTestUtils) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) IOException(java.io.IOException) Test(org.junit.Test) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) Mockito(org.mockito.Mockito) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) TreeMap(java.util.TreeMap) Paths(java.nio.file.Paths) WritableByteChannel(java.nio.channels.WritableByteChannel) Port(com.github.ambry.network.Port) Assert(org.junit.Assert) Collections(java.util.Collections) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) EnumSet(java.util.EnumSet) Set(java.util.Set) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) MetricRegistry(com.codahale.metrics.MetricRegistry) AtomicLong(java.util.concurrent.atomic.AtomicLong) Pair(com.github.ambry.utils.Pair)

Example 9 with Time

use of com.github.ambry.utils.Time in project ambry by linkedin.

the class BlobStoreStats method getContainerStorageStats.

/**
 * Gets the storage stats for all serviceIds and their containerIds as of now (the time when the API is called).
 * Storage stats is comprised of 3 values: 1. valid data size (logicalStorageUsage) 2. physical data size 3. number of blobs.
 * The following data are considered as valid data for this API:
 * 1. PUT with no expiry and no corresponding DELETE
 * 2. PUT expiring at t_exp but t_exp_ref < t_exp
 * 3. PUT with corresponding DELETE at time t_delete but t_del_ref < t_delete
 * For this API, t_ref is specified by the given reference time.
 * For physical data size, all the records will be taken into consideration, including DELETED PUT, even DELETE record itself.
 * For number of blobs, it includes all different blob ids.
 * @param referenceTimeInMs the reference time in ms until which deletes and expiration are relevant
 * @return the storage stats of each container in the form of a nested {@link Map} of accountId to another map of containerId
 * to {@link ContainerStorageStats}.
 */
Map<Short, Map<Short, ContainerStorageStats>> getContainerStorageStats(long referenceTimeInMs) throws StoreException {
    if (!enabled.get()) {
        throw new StoreException(String.format("BlobStoreStats is not enabled or closing for store %s", storeId), StoreErrorCodes.Store_Shutting_Down);
    }
    Map<Short, Map<Short, Long>> validSizeMap = null;
    Map<Short, Map<Short, Long>> physicalUsageMap = null;
    Map<Short, Map<Short, Long>> numberStoreKeyMap = null;
    ScanResults currentScanResults = scanResults.get();
    if (currentScanResults != null && isWithinRange(currentScanResults.containerForecastStartTimeMs, currentScanResults.containerForecastEndTimeMs, referenceTimeInMs)) {
        validSizeMap = currentScanResults.getValidSizePerContainer(referenceTimeInMs);
        physicalUsageMap = currentScanResults.getContainerPhysicalStorageUsage();
        numberStoreKeyMap = currentScanResults.getContainerNumberOfStoreKeys();
    } else {
        if (isScanning && isWithinRange(indexScanner.newScanResults.containerForecastStartTimeMs, indexScanner.newScanResults.containerForecastEndTimeMs, referenceTimeInMs)) {
            scanLock.lock();
            try {
                if (isScanning) {
                    if (waitCondition.await(waitTimeoutInSecs, TimeUnit.SECONDS)) {
                        currentScanResults = scanResults.get();
                        if (isWithinRange(currentScanResults.containerForecastStartTimeMs, currentScanResults.containerForecastEndTimeMs, referenceTimeInMs)) {
                            validSizeMap = currentScanResults.getValidSizePerContainer(referenceTimeInMs);
                            physicalUsageMap = currentScanResults.getContainerPhysicalStorageUsage();
                            numberStoreKeyMap = currentScanResults.getContainerNumberOfStoreKeys();
                        }
                    } else {
                        metrics.blobStoreStatsIndexScannerErrorCount.inc();
                        logger.error("Timed out while waiting for BlobStoreStats index scan to complete for store {}", storeId);
                    }
                } else {
                    currentScanResults = scanResults.get();
                    if (isWithinRange(currentScanResults.containerForecastStartTimeMs, currentScanResults.containerForecastEndTimeMs, referenceTimeInMs)) {
                        validSizeMap = currentScanResults.getValidSizePerContainer(referenceTimeInMs);
                        physicalUsageMap = currentScanResults.getContainerPhysicalStorageUsage();
                        numberStoreKeyMap = currentScanResults.getContainerNumberOfStoreKeys();
                    }
                }
            } catch (InterruptedException e) {
                metrics.blobStoreStatsIndexScannerErrorCount.inc();
                throw new IllegalStateException(String.format("Illegal state, wait for scan to complete is interrupted for store %s", storeId), e);
            } finally {
                scanLock.unlock();
            }
        }
        if (validSizeMap == null) {
            // 3. rare edge case where currentScanResults updated twice since the start of the wait.
            return collectContainerStorageStats(referenceTimeInMs);
        }
    }
    Map<Short, Map<Short, ContainerStorageStats>> retValue = new HashMap<>();
    for (short accountId : validSizeMap.keySet()) {
        for (short containerId : validSizeMap.get(accountId).keySet()) {
            retValue.computeIfAbsent(accountId, k -> new HashMap<>()).put(containerId, new ContainerStorageStats(containerId, validSizeMap.get(accountId).get(containerId), physicalUsageMap.get(accountId).get(containerId), numberStoreKeyMap.get(accountId).get(containerId)));
        }
    }
    return retValue;
}
Also used : ScheduledFuture(java.util.concurrent.ScheduledFuture) ListIterator(java.util.ListIterator) LoggerFactory(org.slf4j.LoggerFactory) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) ContainerStorageStats(com.github.ambry.server.storagestats.ContainerStorageStats) ArrayList(java.util.ArrayList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) SystemTime(com.github.ambry.utils.SystemTime) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Time(com.github.ambry.utils.Time) EnumSet(java.util.EnumSet) StatsUtils(com.github.ambry.store.StatsUtils) StoreConfig(com.github.ambry.config.StoreConfig) Logger(org.slf4j.Logger) Pair(com.github.ambry.utils.Pair) Iterator(java.util.Iterator) ReentrantLock(java.util.concurrent.locks.ReentrantLock) Predicate(java.util.function.Predicate) Collection(java.util.Collection) ConcurrentNavigableMap(java.util.concurrent.ConcurrentNavigableMap) Utils(com.github.ambry.utils.Utils) NavigableMap(java.util.NavigableMap) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Objects(java.util.Objects) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Condition(java.util.concurrent.locks.Condition) TreeMap(java.util.TreeMap) Closeable(java.io.Closeable) Queue(java.util.Queue) SortedMap(java.util.SortedMap) ContainerStorageStats(com.github.ambry.server.storagestats.ContainerStorageStats) HashMap(java.util.HashMap) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentNavigableMap(java.util.concurrent.ConcurrentNavigableMap) NavigableMap(java.util.NavigableMap) TreeMap(java.util.TreeMap) SortedMap(java.util.SortedMap)

Example 10 with Time

use of com.github.ambry.utils.Time in project ambry by linkedin.

the class BlobStoreStats method collectContainerStorageStats.

/**
 * Walk through the entire index and collect storage stats per container.
 * @param referenceTimeInMs the reference time in ms until which deletes and expiration are relevant
 * @return a nested {@link Map} of serviceId to containerId to {@link ContainerStorageStats}.
 */
private Map<Short, Map<Short, ContainerStorageStats>> collectContainerStorageStats(long referenceTimeInMs) throws StoreException {
    logger.trace("On demand index scanning to collect container valid data sizes for store {} wrt ref time {}", storeId, referenceTimeInMs);
    long startTimeMs = time.milliseconds();
    Map<StoreKey, IndexFinalState> keyFinalStates = new HashMap<>();
    Map<Short, Map<Short, Long>> validDataSizePerContainer = new HashMap<>();
    Map<Short, Map<Short, Long>> physicalDataSizePerContainer = new HashMap<>();
    Map<Short, Map<Short, Long>> storeKeysPerContainer = new HashMap<>();
    Map<Short, Map<Short, ContainerStorageStats>> result = new HashMap<>();
    int indexSegmentCount = 0;
    for (IndexSegment indexSegment : index.getIndexSegments().descendingMap().values()) {
        if (!enabled.get()) {
            throw new StoreException(String.format("BlobStoreStats is not enabled or closing for store %s", storeId), StoreErrorCodes.Store_Shutting_Down);
        }
        long indexSegmentStartProcessTimeMs = time.milliseconds();
        diskIOScheduler.getSlice(BlobStoreStats.IO_SCHEDULER_JOB_TYPE, BlobStoreStats.IO_SCHEDULER_JOB_ID, indexSegment.size());
        forEachIndexEntry(indexSegment, referenceTimeInMs, time.milliseconds(), null, keyFinalStates, true, (entry, isValid) -> {
            IndexValue indexValue = entry.getValue();
            if (isValid && indexValue.isPut()) {
                // delete and TTL update records does not count towards valid data size for usage (containers)
                updateNestedMapHelper(validDataSizePerContainer, indexValue.getAccountId(), indexValue.getContainerId(), indexValue.getSize());
            }
            updateNestedMapHelper(physicalDataSizePerContainer, indexValue.getAccountId(), indexValue.getContainerId(), indexValue.getSize());
            updateNestedMapHelper(storeKeysPerContainer, indexValue.getAccountId(), indexValue.getContainerId(), (long) (indexValue.isPut() ? 1 : 0));
        });
        metrics.statsOnDemandScanTimePerIndexSegmentMs.update(time.milliseconds() - indexSegmentStartProcessTimeMs, TimeUnit.MILLISECONDS);
        indexSegmentCount++;
        if (indexSegmentCount == 1 || indexSegmentCount % 10 == 0) {
            logger.info("Container Stats: Index segment {} processing complete (on-demand scanning) for store {}", indexSegment.getFile().getName(), storeId);
        }
    }
    for (short accountId : validDataSizePerContainer.keySet()) {
        for (short containerId : validDataSizePerContainer.get(accountId).keySet()) {
            result.computeIfAbsent(accountId, k -> new HashMap<>()).put(containerId, new ContainerStorageStats(containerId, validDataSizePerContainer.get(accountId).get(containerId), physicalDataSizePerContainer.get(accountId).get(containerId), storeKeysPerContainer.get(accountId).get(containerId)));
        }
    }
    // The remaining index entries in keyFinalStates are DELETE tombstones left by compaction (whose associated PUT is not found)
    updateDeleteTombstoneStats(keyFinalStates.values());
    metrics.statsOnDemandScanTotalTimeMs.update(time.milliseconds() - startTimeMs, TimeUnit.MILLISECONDS);
    return result;
}
Also used : ScheduledFuture(java.util.concurrent.ScheduledFuture) ListIterator(java.util.ListIterator) LoggerFactory(org.slf4j.LoggerFactory) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) ContainerStorageStats(com.github.ambry.server.storagestats.ContainerStorageStats) ArrayList(java.util.ArrayList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) SystemTime(com.github.ambry.utils.SystemTime) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Time(com.github.ambry.utils.Time) EnumSet(java.util.EnumSet) StatsUtils(com.github.ambry.store.StatsUtils) StoreConfig(com.github.ambry.config.StoreConfig) Logger(org.slf4j.Logger) Pair(com.github.ambry.utils.Pair) Iterator(java.util.Iterator) ReentrantLock(java.util.concurrent.locks.ReentrantLock) Predicate(java.util.function.Predicate) Collection(java.util.Collection) ConcurrentNavigableMap(java.util.concurrent.ConcurrentNavigableMap) Utils(com.github.ambry.utils.Utils) NavigableMap(java.util.NavigableMap) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Objects(java.util.Objects) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Condition(java.util.concurrent.locks.Condition) TreeMap(java.util.TreeMap) Closeable(java.io.Closeable) Queue(java.util.Queue) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) ContainerStorageStats(com.github.ambry.server.storagestats.ContainerStorageStats) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentNavigableMap(java.util.concurrent.ConcurrentNavigableMap) NavigableMap(java.util.NavigableMap) TreeMap(java.util.TreeMap) SortedMap(java.util.SortedMap)

Aggregations

Time (com.github.ambry.utils.Time)14 ArrayList (java.util.ArrayList)9 List (java.util.List)8 StoreConfig (com.github.ambry.config.StoreConfig)7 VerifiableProperties (com.github.ambry.config.VerifiableProperties)7 MockTime (com.github.ambry.utils.MockTime)7 SystemTime (com.github.ambry.utils.SystemTime)7 IOException (java.io.IOException)7 HashSet (java.util.HashSet)7 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)7 Test (org.junit.Test)7 MetricRegistry (com.codahale.metrics.MetricRegistry)6 Utils (com.github.ambry.utils.Utils)6 Collections (java.util.Collections)6 Set (java.util.Set)6 Collectors (java.util.stream.Collectors)6 Pair (com.github.ambry.utils.Pair)5 Arrays (java.util.Arrays)5 HashMap (java.util.HashMap)5 Map (java.util.Map)5