Search in sources :

Example 1 with StoreKeyConverter

use of com.github.ambry.store.StoreKeyConverter in project ambry by linkedin.

the class ReplicaThread method replicate.

/**
 * Do replication for replicas grouped by {@link DataNodeId}
 * A replication cycle between two replicas involves the following steps:
 *    1. Exchange metadata : fetch the metadata of blobs added to remote replica since the last synchronization point
 *    and filter the ones missing in local store.
 *    2. Fetch missing blobs: fetch the missing blobs by issuing GET request to remote replica and write them to
 *       the local store
 *
 *  During cross-colo replication, depending on the {@link ReplicationModelType}, the missing blobs are either fetched
 *  from all remote replicas (if modelType == ALL_TO_ALL) or only fetched for local leader replicas from their remote
 *  leader replicas (if modelType == LEADER_BASED). In the latter case, non-leader replica pairs (leader <-> standby,
 *  standby <-> leader, standby <-> standby) will get their missing blobs from their corresponding leader<->leader
 *  exchanges and intra-dc replication.
 *
 *  Here is a table listing on what is exchanged between local and remote replicas based on their roles
 *  (leader/standby) when {@link ReplicationModelType is LEADER_BASED}.
 *
 *              |   Local Leader    |     Local Standby   |   Remote Leader   |  Remote Standby
 *            -------------------------------------------------------------------------------------
 *     Leader:  |        ---        |  metadata and data  | metadata and data |   metadata only
 *     Standby: | metadata and data |  metadata and data  | metadata only     |   metadata only
 */
public void replicate() {
    boolean allCaughtUp = true;
    Map<DataNodeId, List<RemoteReplicaInfo>> dataNodeToRemoteReplicaInfo = getRemoteReplicaInfos();
    logger.trace("Replicating from {} DataNodes.", replicasToReplicateGroupedByNode.size());
    for (Map.Entry<DataNodeId, List<RemoteReplicaInfo>> entry : dataNodeToRemoteReplicaInfo.entrySet()) {
        DataNodeId remoteNode = entry.getKey();
        if (!running) {
            break;
        }
        List<RemoteReplicaInfo> replicasToReplicatePerNode = entry.getValue();
        Timer.Context context = null;
        Timer.Context portTypeBasedContext = null;
        if (replicatingFromRemoteColo) {
            context = replicationMetrics.interColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            if (replicatingOverSsl) {
                portTypeBasedContext = replicationMetrics.sslInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            } else {
                portTypeBasedContext = replicationMetrics.plainTextInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
            }
        } else {
            context = replicationMetrics.intraColoReplicationLatency.time();
            if (replicatingOverSsl) {
                portTypeBasedContext = replicationMetrics.sslIntraColoReplicationLatency.time();
            } else {
                portTypeBasedContext = replicationMetrics.plainTextIntraColoReplicationLatency.time();
            }
        }
        ConnectedChannel connectedChannel = null;
        long checkoutConnectionTimeInMs = -1;
        long exchangeMetadataTimeInMs = -1;
        long fixMissingStoreKeysTimeInMs = -1;
        long replicationStartTimeInMs = time.milliseconds();
        long startTimeInMs = replicationStartTimeInMs;
        // Get a list of active replicas that needs be included for this replication cycle
        List<RemoteReplicaInfo> activeReplicasPerNode = new ArrayList<>();
        List<RemoteReplicaInfo> standbyReplicasWithNoProgress = new ArrayList<>();
        for (RemoteReplicaInfo remoteReplicaInfo : replicasToReplicatePerNode) {
            ReplicaId replicaId = remoteReplicaInfo.getReplicaId();
            boolean inBackoff = time.milliseconds() < remoteReplicaInfo.getReEnableReplicationTime();
            if (replicaId.isDown() || inBackoff || remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE || replicationDisabledPartitions.contains(replicaId.getPartitionId())) {
                logger.debug("Skipping replication on replica {} because one of following conditions is true: remote replica is down " + "= {}; in backoff = {}; local store is offline = {}; replication is disabled = {}.", replicaId.getPartitionId().toPathString(), replicaId.isDown(), inBackoff, remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE, replicationDisabledPartitions.contains(replicaId.getPartitionId()));
                continue;
            }
            if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                // check if all missing keys for standby replicas from previous replication cycle are now obtained
                // via leader replica. If we still have missing keys, don't include them in current replication cycle
                // to avoid sending duplicate metadata requests since their token wouldn't have advanced.
                processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
                if (containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo)) {
                    standbyReplicasWithNoProgress.add(remoteReplicaInfo);
                    continue;
                }
            }
            activeReplicasPerNode.add(remoteReplicaInfo);
        }
        logger.trace("Replicating from {} RemoteReplicaInfos.", activeReplicasPerNode.size());
        // use a variable to track current replica list to replicate (for logging purpose)
        List<RemoteReplicaInfo> currentReplicaList = activeReplicasPerNode;
        try {
            if (activeReplicasPerNode.size() > 0) {
                allCaughtUp = false;
                // if maxReplicaCountPerRequest > 0, split remote replicas on same node into multiple lists; otherwise there is
                // no limit.
                List<List<RemoteReplicaInfo>> activeReplicaSubLists = maxReplicaCountPerRequest > 0 ? Utils.partitionList(activeReplicasPerNode, maxReplicaCountPerRequest) : Collections.singletonList(activeReplicasPerNode);
                startTimeInMs = time.milliseconds();
                connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), activeReplicasPerNode.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
                checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
                // we checkout ConnectedChannel once and replicate remote replicas in batch via same ConnectedChannel
                for (List<RemoteReplicaInfo> replicaSubList : activeReplicaSubLists) {
                    exchangeMetadataTimeInMs = -1;
                    fixMissingStoreKeysTimeInMs = -1;
                    currentReplicaList = replicaSubList;
                    logger.debug("Exchanging metadata with {} remote replicas on {}", currentReplicaList.size(), remoteNode);
                    startTimeInMs = time.milliseconds();
                    List<ExchangeMetadataResponse> exchangeMetadataResponseList = exchangeMetadata(connectedChannel, replicaSubList);
                    exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
                    if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                        // If leader based replication is enabled and we are replicating from remote colo, fetch the missing blobs
                        // only for local leader replicas from their corresponding peer leader replicas (Leader <-> Leader).
                        // Non-leader replica pairs (standby <-> leaders, leader <-> standby, standby <-> standby) will get their
                        // missing blobs from their leader pair exchanges and intra-dc replication.
                        List<RemoteReplicaInfo> leaderReplicaList = new ArrayList<>();
                        List<ExchangeMetadataResponse> exchangeMetadataResponseListForLeaderReplicas = new ArrayList<>();
                        getLeaderReplicaList(replicaSubList, exchangeMetadataResponseList, leaderReplicaList, exchangeMetadataResponseListForLeaderReplicas);
                        replicaSubList = leaderReplicaList;
                        exchangeMetadataResponseList = exchangeMetadataResponseListForLeaderReplicas;
                    }
                    if (replicaSubList.size() > 0) {
                        startTimeInMs = time.milliseconds();
                        fixMissingStoreKeys(connectedChannel, replicaSubList, exchangeMetadataResponseList, false);
                        fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
                    }
                }
            }
            if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
                // Get a list of blocked standby replicas whose missing keys haven't arrived for long time.
                // Use case: In leader-based cross colo replication, standby replicas don't send GET requests for missing keys
                // found in metadata exchange and expect them to come via leader <-> leader replication.
                // This is a safety condition to ensure that standby replicas are not stuck waiting for the keys to come from leader
                // by fetching the missing keys themselves.
                // TODO: As an improvement to this, we can first fetch missing blobs from local leader/other replicas in intra-dc first.
                // TODO: If the result to fetch a blob from local dc is Blob_Not_Found, then we can fetch it from replicas in remote datacenter.
                // This will involve co-ordination between replica threads containing replicas of same partition.
                List<RemoteReplicaInfo> standbyReplicasTimedOutOnNoProgress = getRemoteStandbyReplicasTimedOutOnNoProgress(standbyReplicasWithNoProgress);
                if (standbyReplicasTimedOutOnNoProgress.size() > 0) {
                    allCaughtUp = false;
                    currentReplicaList = standbyReplicasTimedOutOnNoProgress;
                    if (connectedChannel == null) {
                        checkoutConnectionTimeInMs = -1;
                        startTimeInMs = time.milliseconds();
                        connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), standbyReplicasTimedOutOnNoProgress.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
                        checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
                    }
                    List<ExchangeMetadataResponse> exchangeMetadataResponseListForBlockedReplicas = standbyReplicasTimedOutOnNoProgress.stream().map(remoteReplicaInfo -> new ExchangeMetadataResponse(remoteReplicaInfo.getExchangeMetadataResponse())).collect(Collectors.toList());
                    // Convert (and cache) the remote keys that are being fetched as the StoreKeyConverter would have cleared
                    // these keys from its cache while it is replicating with other replicas before time out happened for these standby replicas.
                    List<StoreKey> storeKeysToConvert = exchangeMetadataResponseListForBlockedReplicas.stream().map(ExchangeMetadataResponse::getMissingStoreKeys).flatMap(Collection::stream).collect(Collectors.toList());
                    convertStoreKeys(storeKeysToConvert);
                    exchangeMetadataTimeInMs = 0;
                    fixMissingStoreKeysTimeInMs = -1;
                    logger.debug("Sending GET request to fetch missing keys for standby remote replicas {} timed out on no progress", currentReplicaList);
                    startTimeInMs = time.milliseconds();
                    fixMissingStoreKeys(connectedChannel, standbyReplicasTimedOutOnNoProgress, exchangeMetadataResponseListForBlockedReplicas, true);
                    fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
                }
            }
        } catch (Throwable e) {
            if (checkoutConnectionTimeInMs == -1) {
                // throwable happened in checkout connection phase
                checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
                responseHandler.onEvent(currentReplicaList.get(0).getReplicaId(), e);
            } else if (exchangeMetadataTimeInMs == -1) {
                // throwable happened in exchange metadata phase
                exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
            } else if (fixMissingStoreKeysTimeInMs == -1) {
                // throwable happened in fix missing store phase
                fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
            }
            logger.error("Error while talking to peer: Remote node: {}, Thread name: {}, Remote replicas: {}, Current active " + "remote replica list: {}, Checkout connection time: {}, Exchange metadata time: {}, Fix missing " + "store key time {}", remoteNode, threadName, replicasToReplicatePerNode, currentReplicaList, checkoutConnectionTimeInMs, exchangeMetadataTimeInMs, fixMissingStoreKeysTimeInMs, e);
            replicationMetrics.incrementReplicationErrors(replicatingOverSsl);
            if (connectedChannel != null) {
                connectionPool.destroyConnection(connectedChannel);
                connectedChannel = null;
            }
        } finally {
            long totalReplicationTime = time.milliseconds() - replicationStartTimeInMs;
            replicationMetrics.updateTotalReplicationTime(totalReplicationTime, replicatingFromRemoteColo, replicatingOverSsl, datacenterName);
            if (connectedChannel != null) {
                connectionPool.checkInConnection(connectedChannel);
            }
            context.stop();
            portTypeBasedContext.stop();
        }
    }
    long sleepDurationMs = 0;
    if (allCaughtUp && replicationConfig.replicationReplicaThreadIdleSleepDurationMs > 0) {
        sleepDurationMs = replicationConfig.replicationReplicaThreadIdleSleepDurationMs;
        idleCount.inc();
    } else if (threadThrottleDurationMs > 0) {
        sleepDurationMs = threadThrottleDurationMs;
        throttleCount.inc();
    }
    if (sleepDurationMs > 0) {
        try {
            long currentTime = time.milliseconds();
            time.sleep(sleepDurationMs);
            logger.trace("Replica thread: {} slept for {} ms", threadName, time.milliseconds() - currentTime);
        } catch (InterruptedException e) {
            logger.error("Received interrupted exception during throttling", e);
        }
    }
}
Also used : GetOption(com.github.ambry.protocol.GetOption) StoreKeyConverter(com.github.ambry.store.StoreKeyConverter) DataNodeId(com.github.ambry.clustermap.DataNodeId) LoggerFactory(org.slf4j.LoggerFactory) MessageFormatWriteSet(com.github.ambry.messageformat.MessageFormatWriteSet) StoreErrorCodes(com.github.ambry.store.StoreErrorCodes) GetResponse(com.github.ambry.protocol.GetResponse) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Counter(com.codahale.metrics.Counter) ReplicaMetadataRequest(com.github.ambry.protocol.ReplicaMetadataRequest) GetRequest(com.github.ambry.protocol.GetRequest) ReplicationConfig(com.github.ambry.config.ReplicationConfig) NotificationSystem(com.github.ambry.notification.NotificationSystem) ReplicaSyncUpManager(com.github.ambry.clustermap.ReplicaSyncUpManager) PartitionResponseInfo(com.github.ambry.protocol.PartitionResponseInfo) Predicate(java.util.function.Predicate) Collection(java.util.Collection) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) Collectors(java.util.stream.Collectors) ConnectedChannel(com.github.ambry.network.ConnectedChannel) ReplicaMetadataRequestInfo(com.github.ambry.protocol.ReplicaMetadataRequestInfo) CountDownLatch(java.util.concurrent.CountDownLatch) StoreKey(com.github.ambry.store.StoreKey) List(java.util.List) ReplicaMetadataResponse(com.github.ambry.protocol.ReplicaMetadataResponse) MessageFormatFlags(com.github.ambry.messageformat.MessageFormatFlags) UpdateType(com.github.ambry.notification.UpdateType) Timer(com.codahale.metrics.Timer) MessageSievingInputStream(com.github.ambry.messageformat.MessageSievingInputStream) PartitionId(com.github.ambry.clustermap.PartitionId) BlobId(com.github.ambry.commons.BlobId) ResponseHandler(com.github.ambry.commons.ResponseHandler) PartitionRequestInfo(com.github.ambry.protocol.PartitionRequestInfo) BlobReplicaSourceType(com.github.ambry.notification.BlobReplicaSourceType) ServerErrorCode(com.github.ambry.server.ServerErrorCode) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) NettyByteBufDataInputStream(com.github.ambry.utils.NettyByteBufDataInputStream) HashSet(java.util.HashSet) Transformer(com.github.ambry.store.Transformer) ChannelOutput(com.github.ambry.network.ChannelOutput) StoreException(com.github.ambry.store.StoreException) ReplicaMetadataResponseInfo(com.github.ambry.protocol.ReplicaMetadataResponseInfo) CloudDataNode(com.github.ambry.clustermap.CloudDataNode) Time(com.github.ambry.utils.Time) ReplicaState(com.github.ambry.clustermap.ReplicaState) MetricRegistry(com.codahale.metrics.MetricRegistry) Logger(org.slf4j.Logger) ReentrantLock(java.util.concurrent.locks.ReentrantLock) ConnectionPool(com.github.ambry.network.ConnectionPool) ClusterMap(com.github.ambry.clustermap.ClusterMap) IOException(java.io.IOException) Condition(java.util.concurrent.locks.Condition) MessageInfo(com.github.ambry.store.MessageInfo) ReplicaId(com.github.ambry.clustermap.ReplicaId) BlobStore(com.github.ambry.store.BlobStore) Collections(java.util.Collections) ArrayList(java.util.ArrayList) ConnectedChannel(com.github.ambry.network.ConnectedChannel) StoreKey(com.github.ambry.store.StoreKey) ReplicaId(com.github.ambry.clustermap.ReplicaId) Timer(com.codahale.metrics.Timer) List(java.util.List) ArrayList(java.util.ArrayList) DataNodeId(com.github.ambry.clustermap.DataNodeId) Map(java.util.Map) HashMap(java.util.HashMap) ClusterMap(com.github.ambry.clustermap.ClusterMap)

Example 2 with StoreKeyConverter

use of com.github.ambry.store.StoreKeyConverter in project ambry by linkedin.

the class ReplicationEngine method createThreadPool.

/**
 * Create thread pool for a datacenter.
 * @param datacenter The datacenter String.
 * @param numberOfThreads Number of threads to create for the thread pool.
 * @param startThread If thread needs to be started when create.
 */
private List<ReplicaThread> createThreadPool(String datacenter, int numberOfThreads, boolean startThread) {
    nextReplicaThreadIndexByDc.put(datacenter, new AtomicInteger(0));
    List<ReplicaThread> replicaThreads = new ArrayList<>();
    logger.info("Number of replica threads to replicate from {}: {}", datacenter, numberOfThreads);
    ResponseHandler responseHandler = new ResponseHandler(clusterMap);
    for (int i = 0; i < numberOfThreads; i++) {
        boolean replicatingOverSsl = sslEnabledDatacenters.contains(datacenter);
        String threadIdentity = getReplicaThreadName(datacenter, i);
        try {
            StoreKeyConverter threadSpecificKeyConverter = storeKeyConverterFactory.getStoreKeyConverter();
            Transformer threadSpecificTransformer = Utils.getObj(transformerClassName, storeKeyFactory, threadSpecificKeyConverter);
            ReplicaThread replicaThread = new ReplicaThread(threadIdentity, tokenHelper, clusterMap, correlationIdGenerator, dataNodeId, connectionPool, replicationConfig, replicationMetrics, notification, threadSpecificKeyConverter, threadSpecificTransformer, metricRegistry, replicatingOverSsl, datacenter, responseHandler, time, replicaSyncUpManager, skipPredicate, leaderBasedReplicationAdmin);
            replicaThreads.add(replicaThread);
            if (startThread) {
                Thread thread = Utils.newThread(replicaThread.getName(), replicaThread, false);
                thread.start();
                logger.info("Started replica thread {}", thread.getName());
            }
        } catch (Exception e) {
            throw new RuntimeException("Encountered exception instantiating ReplicaThread", e);
        }
    }
    replicationMetrics.trackLiveThreadsCount(replicaThreads, datacenter);
    replicationMetrics.populateSingleColoMetrics(datacenter);
    return replicaThreads;
}
Also used : Transformer(com.github.ambry.store.Transformer) ResponseHandler(com.github.ambry.commons.ResponseHandler) ArrayList(java.util.ArrayList) StoreException(com.github.ambry.store.StoreException) IOException(java.io.IOException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StoreKeyConverter(com.github.ambry.store.StoreKeyConverter)

Example 3 with StoreKeyConverter

use of com.github.ambry.store.StoreKeyConverter in project ambry by linkedin.

the class ReplicationTest method remoteReplicaInfoAddRemoveTest.

/**
 * Tests add/remove replicaInfo to {@link ReplicaThread}
 * @throws Exception
 */
@Test
public void remoteReplicaInfoAddRemoveTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
    MockHost localHost = localAndRemoteHosts.getFirst();
    MockHost remoteHost = localAndRemoteHosts.getSecond();
    StoreKeyFactory storeKeyFactory = Utils.getObj("com.github.ambry.commons.BlobIdFactory", clusterMap);
    MockStoreKeyConverterFactory mockStoreKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
    mockStoreKeyConverterFactory.setReturnInputIfAbsent(true);
    mockStoreKeyConverterFactory.setConversionMap(new HashMap<>());
    StoreKeyConverter storeKeyConverter = mockStoreKeyConverterFactory.getStoreKeyConverter();
    Transformer transformer = new ValidatingTransformer(storeKeyFactory, storeKeyConverter);
    ReplicationMetrics replicationMetrics = new ReplicationMetrics(new MetricRegistry(), clusterMap.getReplicaIds(localHost.dataNodeId));
    replicationMetrics.populateSingleColoMetrics(remoteHost.dataNodeId.getDatacenterName());
    List<RemoteReplicaInfo> remoteReplicaInfoList = localHost.getRemoteReplicaInfos(remoteHost, null);
    Map<DataNodeId, MockHost> hosts = new HashMap<>();
    hosts.put(remoteHost.dataNodeId, remoteHost);
    MockConnectionPool connectionPool = new MockConnectionPool(hosts, clusterMap, 4);
    ReplicaThread replicaThread = new ReplicaThread("threadtest", new MockFindTokenHelper(storeKeyFactory, replicationConfig), clusterMap, new AtomicInteger(0), localHost.dataNodeId, connectionPool, replicationConfig, replicationMetrics, null, mockStoreKeyConverterFactory.getStoreKeyConverter(), transformer, clusterMap.getMetricRegistry(), false, localHost.dataNodeId.getDatacenterName(), new ResponseHandler(clusterMap), time, null, null, null);
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfoList) {
        replicaThread.addRemoteReplicaInfo(remoteReplicaInfo);
    }
    List<RemoteReplicaInfo> actualRemoteReplicaInfoList = replicaThread.getRemoteReplicaInfos().get(remoteHost.dataNodeId);
    Comparator<RemoteReplicaInfo> remoteReplicaInfoComparator = Comparator.comparing(info -> info.getReplicaId().getPartitionId().toPathString());
    Collections.sort(remoteReplicaInfoList, remoteReplicaInfoComparator);
    Collections.sort(actualRemoteReplicaInfoList, remoteReplicaInfoComparator);
    assertEquals("getRemoteReplicaInfos not correct", remoteReplicaInfoList, actualRemoteReplicaInfoList);
    // Test remove remoteReplicaInfo.
    replicaThread.removeRemoteReplicaInfo(remoteReplicaInfoList.get(remoteReplicaInfoList.size() - 1));
    actualRemoteReplicaInfoList = replicaThread.getRemoteReplicaInfos().get(remoteHost.dataNodeId);
    Collections.sort(actualRemoteReplicaInfoList, remoteReplicaInfoComparator);
    remoteReplicaInfoList.remove(remoteReplicaInfoList.size() - 1);
    assertEquals("getRemoteReplicaInfos not correct", remoteReplicaInfoList, actualRemoteReplicaInfoList);
}
Also used : MockStoreKeyConverterFactory(com.github.ambry.store.MockStoreKeyConverterFactory) ValidatingTransformer(com.github.ambry.messageformat.ValidatingTransformer) Transformer(com.github.ambry.store.Transformer) ResponseHandler(com.github.ambry.commons.ResponseHandler) HashMap(java.util.HashMap) MetricRegistry(com.codahale.metrics.MetricRegistry) StoreKeyFactory(com.github.ambry.store.StoreKeyFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ValidatingTransformer(com.github.ambry.messageformat.ValidatingTransformer) StoreKeyConverter(com.github.ambry.store.StoreKeyConverter) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) Test(org.junit.Test)

Example 4 with StoreKeyConverter

use of com.github.ambry.store.StoreKeyConverter in project ambry by linkedin.

the class ReplicationTest method replicationPauseTest.

/**
 * Tests pausing replication for all and individual partitions. Also tests replication will pause on store that is not
 * started and resume when store restarted.
 * @throws Exception
 */
@Test
public void replicationPauseTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
    MockHost localHost = localAndRemoteHosts.getFirst();
    MockHost remoteHost = localAndRemoteHosts.getSecond();
    List<PartitionId> partitionIds = clusterMap.getAllPartitionIds(null);
    for (PartitionId partitionId : partitionIds) {
        // add  10 messages to the remote host only
        addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), 10);
    }
    StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
    MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
    storeKeyConverterFactory.setConversionMap(new HashMap<>());
    storeKeyConverterFactory.setReturnInputIfAbsent(true);
    StoreKeyConverter storeKeyConverter = storeKeyConverterFactory.getStoreKeyConverter();
    Transformer transformer = new ValidatingTransformer(storeKeyFactory, storeKeyConverter);
    int batchSize = 4;
    Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost, storeKeyConverter, transformer, null, null);
    Map<DataNodeId, List<RemoteReplicaInfo>> replicasToReplicate = replicasAndThread.getFirst();
    ReplicaThread replicaThread = replicasAndThread.getSecond();
    Map<PartitionId, Integer> progressTracker = new HashMap<>();
    PartitionId partitionToResumeFirst = clusterMap.getAllPartitionIds(null).get(0);
    PartitionId partitionToShutdownLocally = clusterMap.getAllPartitionIds(null).get(1);
    boolean allStopped = false;
    boolean onlyOneResumed = false;
    boolean allReenabled = false;
    boolean shutdownStoreRestarted = false;
    Set<PartitionId> expectedPaused = new HashSet<>();
    assertEquals("There should be no disabled partitions", expectedPaused, replicaThread.getReplicationDisabledPartitions());
    while (true) {
        replicaThread.replicate();
        boolean replicationDone = true;
        for (RemoteReplicaInfo replicaInfo : replicasToReplicate.get(remoteHost.dataNodeId)) {
            PartitionId id = replicaInfo.getReplicaId().getPartitionId();
            MockFindToken token = (MockFindToken) replicaInfo.getToken();
            int lastProgress = progressTracker.computeIfAbsent(id, id1 -> 0);
            int currentProgress = token.getIndex();
            boolean partDone = currentProgress + 1 == remoteHost.infosByPartition.get(id).size();
            if (allStopped || (onlyOneResumed && !id.equals(partitionToResumeFirst)) || (allReenabled && !shutdownStoreRestarted && id.equals(partitionToShutdownLocally))) {
                assertEquals("There should have been no progress", lastProgress, currentProgress);
            } else if (!partDone) {
                assertTrue("There has been no progress", currentProgress > lastProgress);
                progressTracker.put(id, currentProgress);
            }
            replicationDone = replicationDone && partDone;
        }
        if (!allStopped && !onlyOneResumed && !allReenabled && !shutdownStoreRestarted) {
            replicaThread.controlReplicationForPartitions(clusterMap.getAllPartitionIds(null), false);
            expectedPaused.addAll(clusterMap.getAllPartitionIds(null));
            assertEquals("Disabled partitions sets do not match", expectedPaused, replicaThread.getReplicationDisabledPartitions());
            allStopped = true;
        } else if (!onlyOneResumed && !allReenabled && !shutdownStoreRestarted) {
            // resume replication for first partition
            replicaThread.controlReplicationForPartitions(Collections.singletonList(partitionIds.get(0)), true);
            expectedPaused.remove(partitionIds.get(0));
            assertEquals("Disabled partitions sets do not match", expectedPaused, replicaThread.getReplicationDisabledPartitions());
            allStopped = false;
            onlyOneResumed = true;
        } else if (!allReenabled && !shutdownStoreRestarted) {
            // not removing the first partition
            replicaThread.controlReplicationForPartitions(clusterMap.getAllPartitionIds(null), true);
            // shutdown one local store to pause replication against that store
            localHost.storesByPartition.get(partitionToShutdownLocally).shutdown();
            onlyOneResumed = false;
            allReenabled = true;
            expectedPaused.clear();
            assertEquals("Disabled partitions sets do not match", expectedPaused, replicaThread.getReplicationDisabledPartitions());
        } else if (!shutdownStoreRestarted) {
            localHost.storesByPartition.get(partitionToShutdownLocally).start();
            shutdownStoreRestarted = true;
        }
        if (replicationDone) {
            break;
        }
    }
    Map<PartitionId, List<MessageInfo>> missingInfos = remoteHost.getMissingInfos(localHost.infosByPartition);
    for (Map.Entry<PartitionId, List<MessageInfo>> entry : missingInfos.entrySet()) {
        assertEquals("No infos should be missing", 0, entry.getValue().size());
    }
    Map<PartitionId, List<ByteBuffer>> missingBuffers = remoteHost.getMissingBuffers(localHost.buffersByPartition);
    for (Map.Entry<PartitionId, List<ByteBuffer>> entry : missingBuffers.entrySet()) {
        assertEquals("No buffers should be missing", 0, entry.getValue().size());
    }
}
Also used : ValidatingTransformer(com.github.ambry.messageformat.ValidatingTransformer) Transformer(com.github.ambry.store.Transformer) HashMap(java.util.HashMap) StoreKeyFactory(com.github.ambry.store.StoreKeyFactory) ValidatingTransformer(com.github.ambry.messageformat.ValidatingTransformer) List(java.util.List) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) MockStoreKeyConverterFactory(com.github.ambry.store.MockStoreKeyConverterFactory) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) BlobIdFactory(com.github.ambry.commons.BlobIdFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StoreKeyConverter(com.github.ambry.store.StoreKeyConverter) Map(java.util.Map) HashMap(java.util.HashMap) ClusterMap(com.github.ambry.clustermap.ClusterMap) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) Test(org.junit.Test)

Example 5 with StoreKeyConverter

use of com.github.ambry.store.StoreKeyConverter in project ambry by linkedin.

the class ReplicationTest method limitMaxPartitionCountPerRequestTest.

/**
 * Test that max partition count per request is honored in {@link ReplicaThread} if there are too many partitions to
 * replicate from the remote node.
 * @throws Exception
 */
@Test
public void limitMaxPartitionCountPerRequestTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
    MockHost localHost = localAndRemoteHosts.getFirst();
    MockHost remoteHost = localAndRemoteHosts.getSecond();
    List<PartitionId> partitionIds = clusterMap.getAllPartitionIds(null);
    for (PartitionId partitionId : partitionIds) {
        // add 5 messages into each partition and place it on remote host only
        addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), 5);
    }
    StoreKeyFactory storeKeyFactory = Utils.getObj("com.github.ambry.commons.BlobIdFactory", clusterMap);
    MockStoreKeyConverterFactory mockStoreKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
    mockStoreKeyConverterFactory.setReturnInputIfAbsent(true);
    mockStoreKeyConverterFactory.setConversionMap(new HashMap<>());
    // we set batchSize to 10 in order to get all messages from one partition within single replication cycle
    int batchSize = 10;
    StoreKeyConverter storeKeyConverter = mockStoreKeyConverterFactory.getStoreKeyConverter();
    Transformer transformer = new ValidatingTransformer(storeKeyFactory, storeKeyConverter);
    // we set max partition count per request to 5, which forces thread to replicate replicas in two cycles. (Note that
    // number of partition to replicate is 10, they will be replicated in two batches)
    ReplicationConfig initialReplicationConfig = replicationConfig;
    properties.setProperty("replication.max.partition.count.per.request", String.valueOf(5));
    replicationConfig = new ReplicationConfig(new VerifiableProperties(properties));
    CountDownLatch replicationCompleted = new CountDownLatch(partitionIds.size());
    AtomicReference<Exception> exception = new AtomicReference<>();
    Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost, storeKeyConverter, transformer, (store, messageInfos) -> {
        try {
            replicationCompleted.countDown();
            // for each partition, replication should complete within single cycle (fetch once should suffice), so
            // we shut down local store once blobs are written. This can avoid unnecessary metadata requests sent to
            // remote host.
            store.shutdown();
        } catch (Exception e) {
            exception.set(e);
        }
    }, null);
    ReplicaThread replicaThread = replicasAndThread.getSecond();
    Thread thread = Utils.newThread(replicaThread, false);
    thread.start();
    assertTrue("Replication didn't complete within 10 secs", replicationCompleted.await(10, TimeUnit.SECONDS));
    // verify the # of replicas per metadata request is limited to 5 (note that there are 10 replicas to replicate, they
    // are split into to 2 small batches and get replicated in separate requests)
    assertEquals("There should be 2 metadata requests and each has 5 replicas to replicate", Arrays.asList(5, 5), remoteHost.replicaCountPerRequestTracker);
    // shutdown
    replicaThread.shutdown();
    if (exception.get() != null) {
        throw exception.get();
    }
    replicationConfig = initialReplicationConfig;
}
Also used : MockStoreKeyConverterFactory(com.github.ambry.store.MockStoreKeyConverterFactory) ValidatingTransformer(com.github.ambry.messageformat.ValidatingTransformer) Transformer(com.github.ambry.store.Transformer) ReplicationConfig(com.github.ambry.config.ReplicationConfig) VerifiableProperties(com.github.ambry.config.VerifiableProperties) AtomicReference(java.util.concurrent.atomic.AtomicReference) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) CountDownLatch(java.util.concurrent.CountDownLatch) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) StoreKeyFactory(com.github.ambry.store.StoreKeyFactory) ValidatingTransformer(com.github.ambry.messageformat.ValidatingTransformer) StoreKeyConverter(com.github.ambry.store.StoreKeyConverter) Map(java.util.Map) HashMap(java.util.HashMap) ClusterMap(com.github.ambry.clustermap.ClusterMap) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) Test(org.junit.Test)

Aggregations

StoreKeyConverter (com.github.ambry.store.StoreKeyConverter)8 Transformer (com.github.ambry.store.Transformer)7 HashMap (java.util.HashMap)7 ClusterMap (com.github.ambry.clustermap.ClusterMap)6 PartitionId (com.github.ambry.clustermap.PartitionId)6 ArrayList (java.util.ArrayList)6 Map (java.util.Map)6 DataNodeId (com.github.ambry.clustermap.DataNodeId)5 MockClusterMap (com.github.ambry.clustermap.MockClusterMap)5 MockStoreKeyConverterFactory (com.github.ambry.store.MockStoreKeyConverterFactory)5 StoreKeyFactory (com.github.ambry.store.StoreKeyFactory)5 List (java.util.List)5 MockPartitionId (com.github.ambry.clustermap.MockPartitionId)4 ResponseHandler (com.github.ambry.commons.ResponseHandler)4 HashSet (java.util.HashSet)4 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)4 MetricRegistry (com.codahale.metrics.MetricRegistry)3 ReplicaId (com.github.ambry.clustermap.ReplicaId)3 ReplicationConfig (com.github.ambry.config.ReplicationConfig)3 ValidatingTransformer (com.github.ambry.messageformat.ValidatingTransformer)3