Search in sources :

Example 26 with DataNodeId

use of com.github.ambry.clustermap.DataNodeId in project ambry by linkedin.

the class ReplicationTest method replicationLagMetricAndSyncUpTest.

/**
 * Tests {@link ReplicationMetrics#getMaxLagForPartition(PartitionId)}
 * @throws Exception
 */
@Test
public void replicationLagMetricAndSyncUpTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    AmbryReplicaSyncUpManager replicaSyncUpService = new AmbryReplicaSyncUpManager(clusterMapConfig);
    Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
    MockHost localHost = localAndRemoteHosts.getFirst();
    MockHost remoteHost1 = localAndRemoteHosts.getSecond();
    // create another remoteHost2 that shares spacial partition with localHost and remoteHost1
    PartitionId specialPartitionId = clusterMap.getWritablePartitionIds(MockClusterMap.SPECIAL_PARTITION_CLASS).get(0);
    MockHost remoteHost2 = new MockHost(specialPartitionId.getReplicaIds().get(2).getDataNodeId(), clusterMap);
    MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
    storeKeyConverterFactory.setConversionMap(new HashMap<>());
    storeKeyConverterFactory.setReturnInputIfAbsent(true);
    MockStoreKeyConverterFactory.MockStoreKeyConverter storeKeyConverter = storeKeyConverterFactory.getStoreKeyConverter();
    int batchSize = 4;
    List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
    for (int i = 0; i < partitionIds.size(); i++) {
        PartitionId partitionId = partitionIds.get(i);
        // add batchSize + 1 messages to the remoteHost1 so that two rounds of replication is needed.
        addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost1), batchSize + 1);
    }
    // add batchSize - 1 messages to the remoteHost2 so that localHost can catch up during one cycle of replication
    for (ReplicaId replicaId : clusterMap.getReplicaIds(remoteHost2.dataNodeId)) {
        addPutMessagesToReplicasOfPartition(replicaId.getPartitionId(), Collections.singletonList(remoteHost2), batchSize - 1);
    }
    StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
    Transformer transformer = new BlobIdTransformer(storeKeyFactory, storeKeyConverter);
    Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread1 = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost1, storeKeyConverter, transformer, null, replicaSyncUpService);
    Map<DataNodeId, List<RemoteReplicaInfo>> replicasToReplicate1 = replicasAndThread1.getFirst();
    ReplicaThread replicaThread1 = replicasAndThread1.getSecond();
    // mock Bootstrap-To-Standby transition in ReplicationManager: 1. update store current state; 2. initiate bootstrap
    replicasToReplicate1.get(remoteHost1.dataNodeId).forEach(info -> info.getLocalStore().setCurrentState(ReplicaState.BOOTSTRAP));
    clusterMap.getReplicaIds(localHost.dataNodeId).forEach(replicaSyncUpService::initiateBootstrap);
    List<ReplicaThread.ExchangeMetadataResponse> response = replicaThread1.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHost1, batchSize), replicasToReplicate1.get(remoteHost1.dataNodeId));
    replicaThread1.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHost1, batchSize), replicasToReplicate1.get(remoteHost1.dataNodeId), response, false);
    for (PartitionId partitionId : partitionIds) {
        List<MessageInfo> allMessageInfos = localAndRemoteHosts.getSecond().infosByPartition.get(partitionId);
        long expectedLag = allMessageInfos.subList(batchSize, allMessageInfos.size()).stream().mapToLong(MessageInfo::getSize).sum();
        assertEquals("Replication lag doesn't match expected value", expectedLag, replicaThread1.getReplicationMetrics().getMaxLagForPartition(partitionId));
    }
    response = replicaThread1.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHost1, batchSize), replicasToReplicate1.get(remoteHost1.dataNodeId));
    replicaThread1.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHost1, batchSize), replicasToReplicate1.get(remoteHost1.dataNodeId), response, false);
    for (PartitionId partitionId : partitionIds) {
        assertEquals("Replication lag should equal to 0", 0, replicaThread1.getReplicationMetrics().getMaxLagForPartition(partitionId));
    }
    // replicate with remoteHost2 to ensure special replica has caught up with enough peers
    Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread2 = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost2, storeKeyConverter, transformer, null, replicaSyncUpService);
    Map<DataNodeId, List<RemoteReplicaInfo>> replicasToReplicate2 = replicasAndThread2.getFirst();
    ReplicaThread replicaThread2 = replicasAndThread2.getSecond();
    // initiate bootstrap on replica of special partition
    RemoteReplicaInfo specialReplicaInfo = replicasToReplicate2.get(remoteHost2.dataNodeId).stream().filter(info -> info.getReplicaId().getPartitionId() == specialPartitionId).findFirst().get();
    specialReplicaInfo.getLocalStore().setCurrentState(ReplicaState.BOOTSTRAP);
    replicaSyncUpService.initiateBootstrap(specialReplicaInfo.getLocalReplicaId());
    response = replicaThread2.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHost2, batchSize), replicasToReplicate2.get(remoteHost2.dataNodeId));
    replicaThread2.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHost2, batchSize), replicasToReplicate2.get(remoteHost2.dataNodeId), response, false);
    // verify replica of special partition has completed bootstrap and becomes standby
    assertEquals("Store state is not expected", ReplicaState.STANDBY, specialReplicaInfo.getLocalStore().getCurrentState());
}
Also used : ValidatingTransformer(com.github.ambry.messageformat.ValidatingTransformer) Transformer(com.github.ambry.store.Transformer) StoreKeyFactory(com.github.ambry.store.StoreKeyFactory) List(java.util.List) ArrayList(java.util.ArrayList) MockStoreKeyConverterFactory(com.github.ambry.store.MockStoreKeyConverterFactory) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) BlobIdFactory(com.github.ambry.commons.BlobIdFactory) MessageInfo(com.github.ambry.store.MessageInfo) Map(java.util.Map) HashMap(java.util.HashMap) ClusterMap(com.github.ambry.clustermap.ClusterMap) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) AmbryReplicaSyncUpManager(com.github.ambry.clustermap.AmbryReplicaSyncUpManager) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) Test(org.junit.Test)

Example 27 with DataNodeId

use of com.github.ambry.clustermap.DataNodeId in project ambry by linkedin.

the class ReplicationTest method replicationPauseTest.

/**
 * Tests pausing replication for all and individual partitions. Also tests replication will pause on store that is not
 * started and resume when store restarted.
 * @throws Exception
 */
@Test
public void replicationPauseTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
    MockHost localHost = localAndRemoteHosts.getFirst();
    MockHost remoteHost = localAndRemoteHosts.getSecond();
    List<PartitionId> partitionIds = clusterMap.getAllPartitionIds(null);
    for (PartitionId partitionId : partitionIds) {
        // add  10 messages to the remote host only
        addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), 10);
    }
    StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
    MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
    storeKeyConverterFactory.setConversionMap(new HashMap<>());
    storeKeyConverterFactory.setReturnInputIfAbsent(true);
    StoreKeyConverter storeKeyConverter = storeKeyConverterFactory.getStoreKeyConverter();
    Transformer transformer = new ValidatingTransformer(storeKeyFactory, storeKeyConverter);
    int batchSize = 4;
    Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost, storeKeyConverter, transformer, null, null);
    Map<DataNodeId, List<RemoteReplicaInfo>> replicasToReplicate = replicasAndThread.getFirst();
    ReplicaThread replicaThread = replicasAndThread.getSecond();
    Map<PartitionId, Integer> progressTracker = new HashMap<>();
    PartitionId partitionToResumeFirst = clusterMap.getAllPartitionIds(null).get(0);
    PartitionId partitionToShutdownLocally = clusterMap.getAllPartitionIds(null).get(1);
    boolean allStopped = false;
    boolean onlyOneResumed = false;
    boolean allReenabled = false;
    boolean shutdownStoreRestarted = false;
    Set<PartitionId> expectedPaused = new HashSet<>();
    assertEquals("There should be no disabled partitions", expectedPaused, replicaThread.getReplicationDisabledPartitions());
    while (true) {
        replicaThread.replicate();
        boolean replicationDone = true;
        for (RemoteReplicaInfo replicaInfo : replicasToReplicate.get(remoteHost.dataNodeId)) {
            PartitionId id = replicaInfo.getReplicaId().getPartitionId();
            MockFindToken token = (MockFindToken) replicaInfo.getToken();
            int lastProgress = progressTracker.computeIfAbsent(id, id1 -> 0);
            int currentProgress = token.getIndex();
            boolean partDone = currentProgress + 1 == remoteHost.infosByPartition.get(id).size();
            if (allStopped || (onlyOneResumed && !id.equals(partitionToResumeFirst)) || (allReenabled && !shutdownStoreRestarted && id.equals(partitionToShutdownLocally))) {
                assertEquals("There should have been no progress", lastProgress, currentProgress);
            } else if (!partDone) {
                assertTrue("There has been no progress", currentProgress > lastProgress);
                progressTracker.put(id, currentProgress);
            }
            replicationDone = replicationDone && partDone;
        }
        if (!allStopped && !onlyOneResumed && !allReenabled && !shutdownStoreRestarted) {
            replicaThread.controlReplicationForPartitions(clusterMap.getAllPartitionIds(null), false);
            expectedPaused.addAll(clusterMap.getAllPartitionIds(null));
            assertEquals("Disabled partitions sets do not match", expectedPaused, replicaThread.getReplicationDisabledPartitions());
            allStopped = true;
        } else if (!onlyOneResumed && !allReenabled && !shutdownStoreRestarted) {
            // resume replication for first partition
            replicaThread.controlReplicationForPartitions(Collections.singletonList(partitionIds.get(0)), true);
            expectedPaused.remove(partitionIds.get(0));
            assertEquals("Disabled partitions sets do not match", expectedPaused, replicaThread.getReplicationDisabledPartitions());
            allStopped = false;
            onlyOneResumed = true;
        } else if (!allReenabled && !shutdownStoreRestarted) {
            // not removing the first partition
            replicaThread.controlReplicationForPartitions(clusterMap.getAllPartitionIds(null), true);
            // shutdown one local store to pause replication against that store
            localHost.storesByPartition.get(partitionToShutdownLocally).shutdown();
            onlyOneResumed = false;
            allReenabled = true;
            expectedPaused.clear();
            assertEquals("Disabled partitions sets do not match", expectedPaused, replicaThread.getReplicationDisabledPartitions());
        } else if (!shutdownStoreRestarted) {
            localHost.storesByPartition.get(partitionToShutdownLocally).start();
            shutdownStoreRestarted = true;
        }
        if (replicationDone) {
            break;
        }
    }
    Map<PartitionId, List<MessageInfo>> missingInfos = remoteHost.getMissingInfos(localHost.infosByPartition);
    for (Map.Entry<PartitionId, List<MessageInfo>> entry : missingInfos.entrySet()) {
        assertEquals("No infos should be missing", 0, entry.getValue().size());
    }
    Map<PartitionId, List<ByteBuffer>> missingBuffers = remoteHost.getMissingBuffers(localHost.buffersByPartition);
    for (Map.Entry<PartitionId, List<ByteBuffer>> entry : missingBuffers.entrySet()) {
        assertEquals("No buffers should be missing", 0, entry.getValue().size());
    }
}
Also used : ValidatingTransformer(com.github.ambry.messageformat.ValidatingTransformer) Transformer(com.github.ambry.store.Transformer) HashMap(java.util.HashMap) StoreKeyFactory(com.github.ambry.store.StoreKeyFactory) ValidatingTransformer(com.github.ambry.messageformat.ValidatingTransformer) List(java.util.List) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) MockStoreKeyConverterFactory(com.github.ambry.store.MockStoreKeyConverterFactory) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) BlobIdFactory(com.github.ambry.commons.BlobIdFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StoreKeyConverter(com.github.ambry.store.StoreKeyConverter) Map(java.util.Map) HashMap(java.util.HashMap) ClusterMap(com.github.ambry.clustermap.ClusterMap) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) Test(org.junit.Test)

Example 28 with DataNodeId

use of com.github.ambry.clustermap.DataNodeId in project ambry by linkedin.

the class ReplicationTest method addAndRemoveReplicaTest.

/**
 * Test dynamically add/remove replica in {@link ReplicationManager}
 * @throws Exception
 */
@Test
public void addAndRemoveReplicaTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    StoreConfig storeConfig = new StoreConfig(verifiableProperties);
    DataNodeId dataNodeId = clusterMap.getDataNodeIds().get(0);
    MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
    storeKeyConverterFactory.setConversionMap(new HashMap<>());
    StorageManager storageManager = new StorageManager(storeConfig, new DiskManagerConfig(verifiableProperties), Utils.newScheduler(1, true), new MetricRegistry(), null, clusterMap, dataNodeId, null, null, new MockTime(), null, new InMemAccountService(false, false));
    storageManager.start();
    MockReplicationManager replicationManager = new MockReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, clusterMap, dataNodeId, storeKeyConverterFactory, null);
    ReplicaId replicaToTest = clusterMap.getReplicaIds(dataNodeId).get(0);
    // Attempting to add replica that already exists should fail
    assertFalse("Adding an existing replica should fail", replicationManager.addReplica(replicaToTest));
    // Create a brand new replica that sits on one of the disk of datanode, add it into replication manager
    PartitionId newPartition = clusterMap.createNewPartition(clusterMap.getDataNodes());
    for (ReplicaId replicaId : newPartition.getReplicaIds()) {
        if (replicaId.getDataNodeId() == dataNodeId) {
            replicaToTest = replicaId;
            break;
        }
    }
    // Before adding replica, partitionToPartitionInfo and mountPathToPartitionInfos should not contain new partition
    assertFalse("partitionToPartitionInfo should not contain new partition", replicationManager.getPartitionToPartitionInfoMap().containsKey(newPartition));
    for (PartitionInfo partitionInfo : replicationManager.getMountPathToPartitionInfosMap().get(replicaToTest.getMountPath())) {
        assertNotSame("mountPathToPartitionInfos should not contain new partition", partitionInfo.getPartitionId(), newPartition);
    }
    // Add new replica to replication manager
    assertTrue("Adding new replica to replication manager should succeed", replicationManager.addReplica(replicaToTest));
    // After adding replica, partitionToPartitionInfo and mountPathToPartitionInfos should contain new partition
    assertTrue("partitionToPartitionInfo should contain new partition", replicationManager.getPartitionToPartitionInfoMap().containsKey(newPartition));
    Optional<PartitionInfo> newPartitionInfo = replicationManager.getMountPathToPartitionInfosMap().get(replicaToTest.getMountPath()).stream().filter(partitionInfo -> partitionInfo.getPartitionId() == newPartition).findAny();
    assertTrue("mountPathToPartitionInfos should contain new partition info", newPartitionInfo.isPresent());
    // Verify that all remoteReplicaInfos of new added replica have assigned thread
    for (RemoteReplicaInfo remoteReplicaInfo : newPartitionInfo.get().getRemoteReplicaInfos()) {
        assertNotNull("The remote replica should be assigned to one replica thread", remoteReplicaInfo.getReplicaThread());
    }
    // Remove replica
    assertTrue("Remove replica from replication manager should succeed", replicationManager.removeReplica(replicaToTest));
    // Verify replica is removed, so partitionToPartitionInfo and mountPathToPartitionInfos should not contain new partition
    assertFalse("partitionToPartitionInfo should not contain new partition", replicationManager.getPartitionToPartitionInfoMap().containsKey(newPartition));
    for (PartitionInfo partitionInfo : replicationManager.getMountPathToPartitionInfosMap().get(replicaToTest.getMountPath())) {
        assertNotSame("mountPathToPartitionInfos should not contain new partition", partitionInfo.getPartitionId(), newPartition);
    }
    // Verify that none of remoteReplicaInfo should have assigned thread
    for (RemoteReplicaInfo remoteReplicaInfo : newPartitionInfo.get().getRemoteReplicaInfos()) {
        assertNull("The remote replica should be assigned to one replica thread", remoteReplicaInfo.getReplicaThread());
    }
    // Remove the same replica that doesn't exist should be no-op
    ReplicationManager mockManager = Mockito.spy(replicationManager);
    assertFalse("Remove non-existent replica should return false", replicationManager.removeReplica(replicaToTest));
    verify(mockManager, never()).removeRemoteReplicaInfoFromReplicaThread(anyList());
    storageManager.shutdown();
}
Also used : DiskManagerConfig(com.github.ambry.config.DiskManagerConfig) CoreMatchers(org.hamcrest.CoreMatchers) Arrays(java.util.Arrays) StorageManager(com.github.ambry.store.StorageManager) StoreKeyConverter(com.github.ambry.store.StoreKeyConverter) DataNodeId(com.github.ambry.clustermap.DataNodeId) Random(java.util.Random) ByteBuffer(java.nio.ByteBuffer) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) PortType(com.github.ambry.network.PortType) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TestUtils(com.github.ambry.utils.TestUtils) Map(java.util.Map) DeleteMessageFormatInputStream(com.github.ambry.messageformat.DeleteMessageFormatInputStream) Parameterized(org.junit.runners.Parameterized) ReplicationConfig(com.github.ambry.config.ReplicationConfig) Container(com.github.ambry.account.Container) DiskManagerConfig(com.github.ambry.config.DiskManagerConfig) Predicate(java.util.function.Predicate) ValidatingTransformer(com.github.ambry.messageformat.ValidatingTransformer) Collection(java.util.Collection) StoreKeyFactory(com.github.ambry.store.StoreKeyFactory) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) Collectors(java.util.stream.Collectors) ConnectedChannel(com.github.ambry.network.ConnectedChannel) CountDownLatch(java.util.concurrent.CountDownLatch) StoreKey(com.github.ambry.store.StoreKey) List(java.util.List) ReplicaMetadataResponse(com.github.ambry.protocol.ReplicaMetadataResponse) PartitionStateChangeListener(com.github.ambry.clustermap.PartitionStateChangeListener) MockTime(com.github.ambry.utils.MockTime) Account(com.github.ambry.account.Account) Optional(java.util.Optional) TransitionErrorCode(com.github.ambry.clustermap.StateTransitionException.TransitionErrorCode) MockId(com.github.ambry.store.MockId) InMemAccountService(com.github.ambry.account.InMemAccountService) AmbryReplicaSyncUpManager(com.github.ambry.clustermap.AmbryReplicaSyncUpManager) PartitionId(com.github.ambry.clustermap.PartitionId) BlobId(com.github.ambry.commons.BlobId) ResponseHandler(com.github.ambry.commons.ResponseHandler) ClusterMapChangeListener(com.github.ambry.clustermap.ClusterMapChangeListener) RunWith(org.junit.runner.RunWith) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Transformer(com.github.ambry.store.Transformer) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) CommonTestUtils(com.github.ambry.commons.CommonTestUtils) ReplicaMetadataResponseInfo(com.github.ambry.protocol.ReplicaMetadataResponseInfo) MockStoreKeyConverterFactory(com.github.ambry.store.MockStoreKeyConverterFactory) Time(com.github.ambry.utils.Time) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MockMessageWriteSet(com.github.ambry.store.MockMessageWriteSet) ReplicaState(com.github.ambry.clustermap.ReplicaState) StateModelListenerType(com.github.ambry.clustermap.StateModelListenerType) StoreConfig(com.github.ambry.config.StoreConfig) MetricRegistry(com.codahale.metrics.MetricRegistry) Properties(java.util.Properties) Pair(com.github.ambry.utils.Pair) Iterator(java.util.Iterator) ReplicaType(com.github.ambry.clustermap.ReplicaType) VerifiableProperties(com.github.ambry.config.VerifiableProperties) ClusterMap(com.github.ambry.clustermap.ClusterMap) Test(org.junit.Test) BlobIdFactory(com.github.ambry.commons.BlobIdFactory) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Store(com.github.ambry.store.Store) Mockito(org.mockito.Mockito) MessageInfo(com.github.ambry.store.MessageInfo) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) ReplicaId(com.github.ambry.clustermap.ReplicaId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) Port(com.github.ambry.network.Port) Comparator(java.util.Comparator) Assert(org.junit.Assert) Collections(java.util.Collections) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) MockStoreKeyConverterFactory(com.github.ambry.store.MockStoreKeyConverterFactory) MetricRegistry(com.codahale.metrics.MetricRegistry) StorageManager(com.github.ambry.store.StorageManager) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) InMemAccountService(com.github.ambry.account.InMemAccountService) StoreConfig(com.github.ambry.config.StoreConfig) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MockTime(com.github.ambry.utils.MockTime) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) Test(org.junit.Test)

Example 29 with DataNodeId

use of com.github.ambry.clustermap.DataNodeId in project ambry by linkedin.

the class ReplicationTest method replicaThreadSleepTest.

@Test
public void replicaThreadSleepTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
    MockHost localHost = localAndRemoteHosts.getFirst();
    MockHost remoteHost = localAndRemoteHosts.getSecond();
    long expectedThrottleDurationMs = localHost.dataNodeId.getDatacenterName().equals(remoteHost.dataNodeId.getDatacenterName()) ? replicationConfig.replicationIntraReplicaThreadThrottleSleepDurationMs : replicationConfig.replicationInterReplicaThreadThrottleSleepDurationMs;
    MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
    storeKeyConverterFactory.setConversionMap(new HashMap<>());
    storeKeyConverterFactory.setReturnInputIfAbsent(true);
    MockStoreKeyConverterFactory.MockStoreKeyConverter storeKeyConverter = storeKeyConverterFactory.getStoreKeyConverter();
    StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
    Transformer transformer = new BlobIdTransformer(storeKeyFactory, storeKeyConverter);
    int batchSize = 4;
    Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost, storeKeyConverter, transformer, null, null);
    Map<DataNodeId, List<RemoteReplicaInfo>> replicasToReplicate = replicasAndThread.getFirst();
    ReplicaThread replicaThread = replicasAndThread.getSecond();
    // populate data, add 1 messages to both hosts.
    for (PartitionId partitionId : clusterMap.getAllPartitionIds(null)) {
        addPutMessagesToReplicasOfPartition(partitionId, Arrays.asList(localHost, remoteHost), 1);
    }
    // tests to verify replica thread throttling and idling functions in the following steps:
    // 1. all replicas are in sync, thread level sleep and replica quarantine are both enabled.
    // 2. add put messages to some replica and verify that replication for replicas remain disabled.
    // 3. forward the time so replication for replicas are re-enabled and check replication resumes.
    // 4. add more put messages to ensure replication happens continuously when needed and is throttled appropriately.
    // 1. verify that the replica thread sleeps and replicas are temporarily disable when all replicas are synced.
    List<List<RemoteReplicaInfo>> replicasToReplicateList = new ArrayList<>(replicasToReplicate.values());
    // replicate is called and time is moved forward to prepare the replicas for testing.
    replicaThread.replicate();
    time.sleep(replicationConfig.replicationSyncedReplicaBackoffDurationMs + 1);
    long currentTimeMs = time.milliseconds();
    replicaThread.replicate();
    for (List<RemoteReplicaInfo> replicaInfos : replicasToReplicateList) {
        for (RemoteReplicaInfo replicaInfo : replicaInfos) {
            assertEquals("Unexpected re-enable replication time", currentTimeMs + replicationConfig.replicationSyncedReplicaBackoffDurationMs, replicaInfo.getReEnableReplicationTime());
        }
    }
    currentTimeMs = time.milliseconds();
    replicaThread.replicate();
    assertEquals("Replicas are in sync, replica thread should sleep by replication.thread.idle.sleep.duration.ms", currentTimeMs + replicationConfig.replicationReplicaThreadIdleSleepDurationMs, time.milliseconds());
    // 2. add 3 messages to a partition in the remote host only and verify replication for all replicas should be disabled.
    PartitionId partitionId = clusterMap.getWritablePartitionIds(null).get(0);
    addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost), 3);
    int[] missingKeys = new int[replicasToReplicate.get(remoteHost.dataNodeId).size()];
    for (int i = 0; i < missingKeys.length; i++) {
        missingKeys[i] = replicasToReplicate.get(remoteHost.dataNodeId).get(i).getReplicaId().getPartitionId().isEqual(partitionId.toPathString()) ? 3 : 0;
    }
    currentTimeMs = time.milliseconds();
    replicaThread.replicate();
    assertEquals("Replication for all replicas should be disabled and the thread should sleep", currentTimeMs + replicationConfig.replicationReplicaThreadIdleSleepDurationMs, time.milliseconds());
    assertMissingKeys(missingKeys, batchSize, replicaThread, remoteHost, replicasToReplicate);
    // 3. forward the time and run replicate and verify the replication.
    time.sleep(replicationConfig.replicationSyncedReplicaBackoffDurationMs);
    replicaThread.replicate();
    missingKeys = new int[replicasToReplicate.get(remoteHost.dataNodeId).size()];
    assertMissingKeys(missingKeys, batchSize, replicaThread, remoteHost, replicasToReplicate);
    // Since, now we moved setting of remoteReplicaInfo::setReEnableReplicationTime inside replicaThread::exchangeMetaData and
    // above assertMissingKeys() does exchangeMetadata() for replicas up to date, each replica will have
    // ReEnableReplicationTime set by replicationSyncedReplicaBackoffDurationMs. Forward the time here.
    time.sleep(replicationConfig.replicationSyncedReplicaBackoffDurationMs);
    // 4. add more put messages and verify that replication continues and is throttled appropriately.
    addPutMessagesToReplicasOfPartition(partitionId, Arrays.asList(localHost, remoteHost), 3);
    currentTimeMs = time.milliseconds();
    replicaThread.replicate();
    assertEquals("Replica thread should sleep exactly " + expectedThrottleDurationMs + " since remote has new token", currentTimeMs + expectedThrottleDurationMs, time.milliseconds());
    assertMissingKeys(missingKeys, batchSize, replicaThread, remoteHost, replicasToReplicate);
    // Since, now we moved setting of remoteReplicaInfo::setReEnableReplicationTime inside replicaThread::exchangeMetaData and
    // above assertMissingKeys() does exchangeMetadata() for replicas up to date, each replica will have
    // ReEnableReplicationTime set by replicationSyncedReplicaBackoffDurationMs. Forward the time here.
    time.sleep(replicationConfig.replicationSyncedReplicaBackoffDurationMs);
    // verify that throttling on the replica thread is disabled when relevant configs are 0.
    Properties properties = new Properties();
    properties.setProperty("replication.intra.replica.thread.throttle.sleep.duration.ms", "0");
    properties.setProperty("replication.inter.replica.thread.throttle.sleep.duration.ms", "0");
    replicationConfig = new ReplicationConfig(new VerifiableProperties(properties));
    replicasAndThread = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost, storeKeyConverter, transformer, null, null);
    replicaThread = replicasAndThread.getSecond();
    currentTimeMs = time.milliseconds();
    replicaThread.replicate();
    assertEquals("Replica thread should not sleep when throttling is disabled and replicas are out of sync", currentTimeMs, time.milliseconds());
}
Also used : ValidatingTransformer(com.github.ambry.messageformat.ValidatingTransformer) Transformer(com.github.ambry.store.Transformer) ArrayList(java.util.ArrayList) Properties(java.util.Properties) VerifiableProperties(com.github.ambry.config.VerifiableProperties) StoreKeyFactory(com.github.ambry.store.StoreKeyFactory) List(java.util.List) ArrayList(java.util.ArrayList) MockStoreKeyConverterFactory(com.github.ambry.store.MockStoreKeyConverterFactory) ReplicationConfig(com.github.ambry.config.ReplicationConfig) VerifiableProperties(com.github.ambry.config.VerifiableProperties) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) BlobIdFactory(com.github.ambry.commons.BlobIdFactory) Map(java.util.Map) HashMap(java.util.HashMap) ClusterMap(com.github.ambry.clustermap.ClusterMap) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) Test(org.junit.Test)

Example 30 with DataNodeId

use of com.github.ambry.clustermap.DataNodeId in project ambry by linkedin.

the class ReplicationTestHelper method getRemoteReplicasAndReplicaThread.

/**
 * Creates and gets the remote replicas that the local host will deal with and the {@link ReplicaThread} to perform
 * replication with.
 * @param batchSize the number of messages to be returned in each iteration of replication
 * @param clusterMap the {@link ClusterMap} to use
 * @param localHost the local {@link MockHost} (the one running the replica thread)
 * @param remoteHost the remote {@link MockHost} (the target of replication)
 * @param storeKeyConverter the {@link StoreKeyConverter} to be used in {@link ReplicaThread}
 * @param transformer the {@link Transformer} to be used in {@link ReplicaThread}
 * @param listener the {@link StoreEventListener} to use.
 * @param replicaSyncUpManager the {@link ReplicaSyncUpManager} to help create replica thread
 * @return a pair whose first element is the set of remote replicas and the second element is the {@link ReplicaThread}
 */
protected Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> getRemoteReplicasAndReplicaThread(int batchSize, ClusterMap clusterMap, MockHost localHost, MockHost remoteHost, StoreKeyConverter storeKeyConverter, Transformer transformer, StoreEventListener listener, ReplicaSyncUpManager replicaSyncUpManager) throws ReflectiveOperationException {
    ReplicationMetrics replicationMetrics = new ReplicationMetrics(new MetricRegistry(), clusterMap.getReplicaIds(localHost.dataNodeId));
    replicationMetrics.populateSingleColoMetrics(remoteHost.dataNodeId.getDatacenterName());
    List<RemoteReplicaInfo> remoteReplicaInfoList = localHost.getRemoteReplicaInfos(remoteHost, listener);
    Map<DataNodeId, List<RemoteReplicaInfo>> replicasToReplicate = Collections.singletonMap(remoteHost.dataNodeId, remoteReplicaInfoList);
    StoreKeyFactory storeKeyFactory = Utils.getObj("com.github.ambry.commons.BlobIdFactory", clusterMap);
    Map<DataNodeId, MockHost> hosts = new HashMap<>();
    hosts.put(remoteHost.dataNodeId, remoteHost);
    MockConnectionPool connectionPool = new MockConnectionPool(hosts, clusterMap, batchSize);
    ReplicaThread replicaThread = new ReplicaThread("threadtest", new MockFindTokenHelper(storeKeyFactory, replicationConfig), clusterMap, new AtomicInteger(0), localHost.dataNodeId, connectionPool, replicationConfig, replicationMetrics, null, storeKeyConverter, transformer, clusterMap.getMetricRegistry(), false, localHost.dataNodeId.getDatacenterName(), new ResponseHandler(clusterMap), time, replicaSyncUpManager, null, null);
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfoList) {
        replicaThread.addRemoteReplicaInfo(remoteReplicaInfo);
    }
    for (PartitionId partitionId : clusterMap.getAllPartitionIds(null)) {
        replicationMetrics.addLagMetricForPartition(partitionId, true);
    }
    return new Pair<>(replicasToReplicate, replicaThread);
}
Also used : ResponseHandler(com.github.ambry.commons.ResponseHandler) HashMap(java.util.HashMap) MetricRegistry(com.codahale.metrics.MetricRegistry) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) StoreKeyFactory(com.github.ambry.store.StoreKeyFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) List(java.util.List) ArrayList(java.util.ArrayList) DataNodeId(com.github.ambry.clustermap.DataNodeId) Pair(com.github.ambry.utils.Pair)

Aggregations

DataNodeId (com.github.ambry.clustermap.DataNodeId)92 ArrayList (java.util.ArrayList)45 Test (org.junit.Test)45 HashMap (java.util.HashMap)29 PartitionId (com.github.ambry.clustermap.PartitionId)28 MockDataNodeId (com.github.ambry.clustermap.MockDataNodeId)27 ReplicaId (com.github.ambry.clustermap.ReplicaId)25 MockClusterMap (com.github.ambry.clustermap.MockClusterMap)23 VerifiableProperties (com.github.ambry.config.VerifiableProperties)23 MetricRegistry (com.codahale.metrics.MetricRegistry)22 MockPartitionId (com.github.ambry.clustermap.MockPartitionId)22 List (java.util.List)22 Map (java.util.Map)22 Port (com.github.ambry.network.Port)21 ClusterMap (com.github.ambry.clustermap.ClusterMap)20 ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)19 StoreKeyFactory (com.github.ambry.store.StoreKeyFactory)18 BlobIdFactory (com.github.ambry.commons.BlobIdFactory)17 HashSet (java.util.HashSet)16 Properties (java.util.Properties)16