Search in sources :

Example 21 with ClusterMapConfig

use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.

the class LeaderBasedReplicationTest method setUp.

public void setUp() throws IOException {
    properties.setProperty("replication.model.across.datacenters", "LEADER_BASED");
    replicationConfig = new ReplicationConfig(new VerifiableProperties(properties));
    clusterMap = new MockClusterMap();
    clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    MockHelixParticipant.metricRegistry = new MetricRegistry();
    mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
    /*
      Setup:
      we have 3 nodes that have replicas belonging to same partitions:
      a) localNode (local node that hosts partitions)
      b) remoteNodeInLocalDC (remote node in local data center that shares the partitions)
      c) remoteNodeInRemoteDC (remote node in remote data center that shares the partitions)

      Each node have few of its partitions as leaders and others are standby. They are randomly assigned during creation
      of replicas for mock partitions.
     */
    DataNodeId localNode = clusterMap.getDataNodeIds().get(0);
    List<DataNodeId> remoteNodes = getRemoteNodesFromLocalAndRemoteDCs(clusterMap, localNode);
    remoteNodeInLocalDC = remoteNodes.get(0);
    remoteNodeInRemoteDC = remoteNodes.get(1);
    // mock hosts for remote nodes
    localHost = new MockHost(localNode, clusterMap);
    remoteHostInLocalDC = new MockHost(remoteNodeInLocalDC, clusterMap);
    remoteHostInRemoteDC = new MockHost(remoteNodeInRemoteDC, clusterMap);
}
Also used : ReplicationConfig(com.github.ambry.config.ReplicationConfig) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) VerifiableProperties(com.github.ambry.config.VerifiableProperties) MetricRegistry(com.codahale.metrics.MetricRegistry) DataNodeId(com.github.ambry.clustermap.DataNodeId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) MockClusterMap(com.github.ambry.clustermap.MockClusterMap)

Example 22 with ClusterMapConfig

use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.

the class MockReplicationManager method getReplicationManager.

/**
 * Static construction helper
 * @param verifiableProperties the {@link VerifiableProperties} to use for config.
 * @param storageManager the {@link StorageManager} to use.
 * @param clusterMap the {@link ClusterMap} to use.
 * @param dataNodeId the {@link DataNodeId} to use.
 * @param storeKeyConverterFactory the {@link StoreKeyConverterFactory} to use.
 * @return an instance of {@link MockReplicationManager}
 * @throws ReplicationException
 */
public static MockReplicationManager getReplicationManager(VerifiableProperties verifiableProperties, StorageManager storageManager, ClusterMap clusterMap, DataNodeId dataNodeId, StoreKeyConverterFactory storeKeyConverterFactory) throws ReplicationException {
    ReplicationConfig replicationConfig = new ReplicationConfig(verifiableProperties);
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    StoreConfig storeConfig = new StoreConfig(verifiableProperties);
    return new MockReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, clusterMap, dataNodeId, storeKeyConverterFactory, null);
}
Also used : ReplicationConfig(com.github.ambry.config.ReplicationConfig) StoreConfig(com.github.ambry.config.StoreConfig) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig)

Example 23 with ClusterMapConfig

use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.

the class ReplicationTest method replicaFromStandbyToInactiveTest.

/**
 * Test STANDBY -> INACTIVE transition on existing replica (both success and failure cases)
 */
@Test
public void replicaFromStandbyToInactiveTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    MockHelixParticipant.metricRegistry = new MetricRegistry();
    MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
    Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
    StorageManager storageManager = managers.getFirst();
    MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
    // get an existing partition to test both success and failure cases
    PartitionId existingPartition = replicationManager.partitionToPartitionInfo.keySet().iterator().next();
    storageManager.shutdownBlobStore(existingPartition);
    try {
        mockHelixParticipant.onPartitionBecomeInactiveFromStandby(existingPartition.toPathString());
        fail("should fail because store is not started");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
    }
    // restart the store and trigger Standby-To-Inactive transition again
    storageManager.startBlobStore(existingPartition);
    // write a blob with size = 100 into local store (end offset of last PUT = 100 + 18 = 118)
    Store localStore = storageManager.getStore(existingPartition);
    MockId id = new MockId(TestUtils.getRandomString(10), Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM));
    long crc = (new Random()).nextLong();
    long blobSize = 100;
    MessageInfo info = new MessageInfo(id, blobSize, false, false, Utils.Infinite_Time, crc, id.getAccountId(), id.getContainerId(), Utils.Infinite_Time);
    List<MessageInfo> infos = new ArrayList<>();
    List<ByteBuffer> buffers = new ArrayList<>();
    ByteBuffer buffer = ByteBuffer.wrap(TestUtils.getRandomBytes((int) blobSize));
    infos.add(info);
    buffers.add(buffer);
    localStore.put(new MockMessageWriteSet(infos, buffers));
    ReplicaId localReplica = storageManager.getReplica(existingPartition.toPathString());
    // override partition state change listener in ReplicationManager to help thread manipulation
    mockHelixParticipant.registerPartitionStateChangeListener(StateModelListenerType.ReplicationManagerListener, replicationManager.replicationListener);
    CountDownLatch participantLatch = new CountDownLatch(1);
    replicationManager.listenerExecutionLatch = new CountDownLatch(1);
    // create a new thread and trigger STANDBY -> INACTIVE transition
    Utils.newThread(() -> {
        mockHelixParticipant.onPartitionBecomeInactiveFromStandby(existingPartition.toPathString());
        participantLatch.countDown();
    }, false).start();
    assertTrue("Partition state change listener didn't get called within 1 sec", replicationManager.listenerExecutionLatch.await(1, TimeUnit.SECONDS));
    assertEquals("Local store state should be INACTIVE", ReplicaState.INACTIVE, storageManager.getStore(existingPartition).getCurrentState());
    List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(existingPartition).getRemoteReplicaInfos();
    ReplicaId peerReplica1 = remoteReplicaInfos.get(0).getReplicaId();
    assertFalse("Sync up should not complete because not enough replicas have caught up", mockHelixParticipant.getReplicaSyncUpManager().updateReplicaLagAndCheckSyncStatus(localReplica, peerReplica1, 10L, ReplicaState.INACTIVE));
    // pick another remote replica to update the replication lag
    ReplicaId peerReplica2 = remoteReplicaInfos.get(1).getReplicaId();
    replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica1.getDataNodeId().getHostname(), peerReplica1.getReplicaPath(), 118);
    assertFalse("Sync up shouldn't complete because only one replica has caught up with local replica", mockHelixParticipant.getReplicaSyncUpManager().isSyncUpComplete(localReplica));
    // make second peer replica catch up with last PUT in local store
    replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica2.getDataNodeId().getHostname(), peerReplica2.getReplicaPath(), 118);
    assertTrue("Standby-To-Inactive transition didn't complete within 1 sec", participantLatch.await(1, TimeUnit.SECONDS));
    // we purposely update lag against local replica to verify local replica is no longer in ReplicaSyncUpManager because
    // deactivation is complete and local replica should be removed from "replicaToLagInfos" map.
    assertFalse("Sync up should complete (2 replicas have caught up), hence updated should be false", mockHelixParticipant.getReplicaSyncUpManager().updateReplicaLagAndCheckSyncStatus(localReplica, peerReplica2, 0L, ReplicaState.INACTIVE));
    storageManager.shutdown();
}
Also used : MetricRegistry(com.codahale.metrics.MetricRegistry) StorageManager(com.github.ambry.store.StorageManager) ArrayList(java.util.ArrayList) Store(com.github.ambry.store.Store) MockId(com.github.ambry.store.MockId) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) CountDownLatch(java.util.concurrent.CountDownLatch) ByteBuffer(java.nio.ByteBuffer) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) MessageInfo(com.github.ambry.store.MessageInfo) MockMessageWriteSet(com.github.ambry.store.MockMessageWriteSet) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) Random(java.util.Random) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) Test(org.junit.Test)

Example 24 with ClusterMapConfig

use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.

the class ReplicationTest method replicaFromBootstrapToStandbyTest.

/**
 * Test BOOTSTRAP -> STANDBY transition on both existing and new replicas. For new replica, we test both failure and
 * success cases.
 * @throws Exception
 */
@Test
public void replicaFromBootstrapToStandbyTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    MockHelixParticipant.metricRegistry = new MetricRegistry();
    MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
    Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
    StorageManager storageManager = managers.getFirst();
    MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
    // 1. test existing partition trough Bootstrap-To-Standby transition, should be no op.
    PartitionId existingPartition = replicationManager.partitionToPartitionInfo.keySet().iterator().next();
    mockHelixParticipant.onPartitionBecomeStandbyFromBootstrap(existingPartition.toPathString());
    assertEquals("Store state doesn't match", ReplicaState.STANDBY, storageManager.getStore(existingPartition).getCurrentState());
    // 2. test transition failure due to store not started
    storageManager.shutdownBlobStore(existingPartition);
    try {
        mockHelixParticipant.onPartitionBecomeStandbyFromBootstrap(existingPartition.toPathString());
        fail("should fail because store is not started");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
    }
    // 3. create new replica and add it into storage manager, test replica that needs to initiate bootstrap
    ReplicaId newReplicaToAdd = getNewReplicaToAdd(clusterMap);
    assertTrue("Adding new replica to Storage Manager should succeed", storageManager.addBlobStore(newReplicaToAdd));
    // override partition state change listener in ReplicationManager to help thread manipulation
    mockHelixParticipant.registerPartitionStateChangeListener(StateModelListenerType.ReplicationManagerListener, replicationManager.replicationListener);
    CountDownLatch participantLatch = new CountDownLatch(1);
    replicationManager.listenerExecutionLatch = new CountDownLatch(1);
    // create a new thread and trigger BOOTSTRAP -> STANDBY transition
    Utils.newThread(() -> {
        mockHelixParticipant.onPartitionBecomeStandbyFromBootstrap(newReplicaToAdd.getPartitionId().toPathString());
        participantLatch.countDown();
    }, false).start();
    assertTrue("Partition state change listener in ReplicationManager didn't get called within 1 sec", replicationManager.listenerExecutionLatch.await(1, TimeUnit.SECONDS));
    assertEquals("Replica should be in BOOTSTRAP state before bootstrap is complete", ReplicaState.BOOTSTRAP, storageManager.getStore(newReplicaToAdd.getPartitionId()).getCurrentState());
    // make bootstrap succeed
    mockHelixParticipant.getReplicaSyncUpManager().onBootstrapComplete(newReplicaToAdd);
    assertTrue("Bootstrap-To-Standby transition didn't complete within 1 sec", participantLatch.await(1, TimeUnit.SECONDS));
    storageManager.shutdown();
}
Also used : MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) MetricRegistry(com.codahale.metrics.MetricRegistry) StorageManager(com.github.ambry.store.StorageManager) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) CountDownLatch(java.util.concurrent.CountDownLatch) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) Test(org.junit.Test)

Example 25 with ClusterMapConfig

use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.

the class ReplicationTest method replicationLagMetricAndSyncUpTest.

/**
 * Tests {@link ReplicationMetrics#getMaxLagForPartition(PartitionId)}
 * @throws Exception
 */
@Test
public void replicationLagMetricAndSyncUpTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    AmbryReplicaSyncUpManager replicaSyncUpService = new AmbryReplicaSyncUpManager(clusterMapConfig);
    Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
    MockHost localHost = localAndRemoteHosts.getFirst();
    MockHost remoteHost1 = localAndRemoteHosts.getSecond();
    // create another remoteHost2 that shares spacial partition with localHost and remoteHost1
    PartitionId specialPartitionId = clusterMap.getWritablePartitionIds(MockClusterMap.SPECIAL_PARTITION_CLASS).get(0);
    MockHost remoteHost2 = new MockHost(specialPartitionId.getReplicaIds().get(2).getDataNodeId(), clusterMap);
    MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
    storeKeyConverterFactory.setConversionMap(new HashMap<>());
    storeKeyConverterFactory.setReturnInputIfAbsent(true);
    MockStoreKeyConverterFactory.MockStoreKeyConverter storeKeyConverter = storeKeyConverterFactory.getStoreKeyConverter();
    int batchSize = 4;
    List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
    for (int i = 0; i < partitionIds.size(); i++) {
        PartitionId partitionId = partitionIds.get(i);
        // add batchSize + 1 messages to the remoteHost1 so that two rounds of replication is needed.
        addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost1), batchSize + 1);
    }
    // add batchSize - 1 messages to the remoteHost2 so that localHost can catch up during one cycle of replication
    for (ReplicaId replicaId : clusterMap.getReplicaIds(remoteHost2.dataNodeId)) {
        addPutMessagesToReplicasOfPartition(replicaId.getPartitionId(), Collections.singletonList(remoteHost2), batchSize - 1);
    }
    StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
    Transformer transformer = new BlobIdTransformer(storeKeyFactory, storeKeyConverter);
    Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread1 = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost1, storeKeyConverter, transformer, null, replicaSyncUpService);
    Map<DataNodeId, List<RemoteReplicaInfo>> replicasToReplicate1 = replicasAndThread1.getFirst();
    ReplicaThread replicaThread1 = replicasAndThread1.getSecond();
    // mock Bootstrap-To-Standby transition in ReplicationManager: 1. update store current state; 2. initiate bootstrap
    replicasToReplicate1.get(remoteHost1.dataNodeId).forEach(info -> info.getLocalStore().setCurrentState(ReplicaState.BOOTSTRAP));
    clusterMap.getReplicaIds(localHost.dataNodeId).forEach(replicaSyncUpService::initiateBootstrap);
    List<ReplicaThread.ExchangeMetadataResponse> response = replicaThread1.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHost1, batchSize), replicasToReplicate1.get(remoteHost1.dataNodeId));
    replicaThread1.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHost1, batchSize), replicasToReplicate1.get(remoteHost1.dataNodeId), response, false);
    for (PartitionId partitionId : partitionIds) {
        List<MessageInfo> allMessageInfos = localAndRemoteHosts.getSecond().infosByPartition.get(partitionId);
        long expectedLag = allMessageInfos.subList(batchSize, allMessageInfos.size()).stream().mapToLong(MessageInfo::getSize).sum();
        assertEquals("Replication lag doesn't match expected value", expectedLag, replicaThread1.getReplicationMetrics().getMaxLagForPartition(partitionId));
    }
    response = replicaThread1.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHost1, batchSize), replicasToReplicate1.get(remoteHost1.dataNodeId));
    replicaThread1.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHost1, batchSize), replicasToReplicate1.get(remoteHost1.dataNodeId), response, false);
    for (PartitionId partitionId : partitionIds) {
        assertEquals("Replication lag should equal to 0", 0, replicaThread1.getReplicationMetrics().getMaxLagForPartition(partitionId));
    }
    // replicate with remoteHost2 to ensure special replica has caught up with enough peers
    Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread2 = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost2, storeKeyConverter, transformer, null, replicaSyncUpService);
    Map<DataNodeId, List<RemoteReplicaInfo>> replicasToReplicate2 = replicasAndThread2.getFirst();
    ReplicaThread replicaThread2 = replicasAndThread2.getSecond();
    // initiate bootstrap on replica of special partition
    RemoteReplicaInfo specialReplicaInfo = replicasToReplicate2.get(remoteHost2.dataNodeId).stream().filter(info -> info.getReplicaId().getPartitionId() == specialPartitionId).findFirst().get();
    specialReplicaInfo.getLocalStore().setCurrentState(ReplicaState.BOOTSTRAP);
    replicaSyncUpService.initiateBootstrap(specialReplicaInfo.getLocalReplicaId());
    response = replicaThread2.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHost2, batchSize), replicasToReplicate2.get(remoteHost2.dataNodeId));
    replicaThread2.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHost2, batchSize), replicasToReplicate2.get(remoteHost2.dataNodeId), response, false);
    // verify replica of special partition has completed bootstrap and becomes standby
    assertEquals("Store state is not expected", ReplicaState.STANDBY, specialReplicaInfo.getLocalStore().getCurrentState());
}
Also used : ValidatingTransformer(com.github.ambry.messageformat.ValidatingTransformer) Transformer(com.github.ambry.store.Transformer) StoreKeyFactory(com.github.ambry.store.StoreKeyFactory) List(java.util.List) ArrayList(java.util.ArrayList) MockStoreKeyConverterFactory(com.github.ambry.store.MockStoreKeyConverterFactory) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) BlobIdFactory(com.github.ambry.commons.BlobIdFactory) MessageInfo(com.github.ambry.store.MessageInfo) Map(java.util.Map) HashMap(java.util.HashMap) ClusterMap(com.github.ambry.clustermap.ClusterMap) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) AmbryReplicaSyncUpManager(com.github.ambry.clustermap.AmbryReplicaSyncUpManager) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) Test(org.junit.Test)

Aggregations

ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)100 VerifiableProperties (com.github.ambry.config.VerifiableProperties)81 Test (org.junit.Test)56 Properties (java.util.Properties)52 MetricRegistry (com.codahale.metrics.MetricRegistry)47 ArrayList (java.util.ArrayList)31 IOException (java.io.IOException)26 HashSet (java.util.HashSet)25 JSONObject (org.json.JSONObject)25 File (java.io.File)24 ClusterMap (com.github.ambry.clustermap.ClusterMap)23 HashMap (java.util.HashMap)21 MockClusterMap (com.github.ambry.clustermap.MockClusterMap)19 ClusterAgentsFactory (com.github.ambry.clustermap.ClusterAgentsFactory)18 DataNodeId (com.github.ambry.clustermap.DataNodeId)18 StoreConfig (com.github.ambry.config.StoreConfig)18 ReplicaId (com.github.ambry.clustermap.ReplicaId)16 List (java.util.List)16 Map (java.util.Map)16 CountDownLatch (java.util.concurrent.CountDownLatch)16