use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.
the class BlockingChannelConnectionPoolTest method initializeTests.
/**
* Run only once for all tests
*/
@BeforeClass
public static void initializeTests() throws Exception {
trustStoreFile = File.createTempFile("truststore", ".jks");
serverSSLConfig1 = new SSLConfig(TestSSLUtils.createSslProps("DC2,DC3", SSLFactory.Mode.SERVER, trustStoreFile, "server1"));
serverSSLConfig2 = new SSLConfig(TestSSLUtils.createSslProps("DC1,DC3", SSLFactory.Mode.SERVER, trustStoreFile, "server2"));
serverSSLConfig3 = new SSLConfig(TestSSLUtils.createSslProps("DC1,DC2", SSLFactory.Mode.SERVER, trustStoreFile, "server3"));
VerifiableProperties sslClientProps = TestSSLUtils.createSslProps("DC1,DC2,DC3", SSLFactory.Mode.CLIENT, trustStoreFile, "client");
sslConfig = new SSLConfig(sslClientProps);
sslEnabledClusterMapConfig = new ClusterMapConfig(sslClientProps);
Properties props = new Properties();
props.setProperty("clustermap.cluster.name", "test");
props.setProperty("clustermap.datacenter.name", "dc1");
props.setProperty("clustermap.host.name", "localhost");
plainTextClusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props));
sslFactory = SSLFactory.getNewInstance(sslConfig);
SSLContext sslContext = sslFactory.getSSLContext();
sslSocketFactory = sslContext.getSocketFactory();
}
use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.
the class ReplicationTest method replicaFromOfflineToBootstrapTest.
/**
* Test that state transition in replication manager from OFFLINE to BOOTSTRAP
* @throws Exception
*/
@Test
public void replicaFromOfflineToBootstrapTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
MockHelixParticipant.metricRegistry = new MetricRegistry();
MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
DataNodeId currentNode = clusterMap.getDataNodeIds().get(0);
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
assertTrue("State change listener in cluster participant should contain replication manager listener", mockHelixParticipant.getPartitionStateChangeListeners().containsKey(StateModelListenerType.ReplicationManagerListener));
// 1. test partition not found case (should throw exception)
try {
mockHelixParticipant.onPartitionBecomeBootstrapFromOffline("-1");
fail("should fail because replica is not found");
} catch (StateTransitionException e) {
assertEquals("Transition error doesn't match", ReplicaNotFound, e.getErrorCode());
}
// 2. create a new partition and test replica addition success case
ReplicaId newReplicaToAdd = getNewReplicaToAdd(clusterMap);
PartitionId newPartition = newReplicaToAdd.getPartitionId();
assertTrue("Adding new replica to Storage Manager should succeed", storageManager.addBlobStore(newReplicaToAdd));
assertFalse("partitionToPartitionInfo should not contain new partition", replicationManager.partitionToPartitionInfo.containsKey(newPartition));
mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
assertTrue("partitionToPartitionInfo should contain new partition", replicationManager.partitionToPartitionInfo.containsKey(newPartition));
// 3. test replica addition failure case
replicationManager.partitionToPartitionInfo.remove(newPartition);
replicationManager.addReplicaReturnVal = false;
try {
mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
fail("should fail due to replica addition failure");
} catch (StateTransitionException e) {
assertEquals("Transition error doesn't match", ReplicaOperationFailure, e.getErrorCode());
}
replicationManager.addReplicaReturnVal = null;
// 4. test OFFLINE -> BOOTSTRAP on existing replica (should be no-op)
ReplicaId existingReplica = clusterMap.getReplicaIds(currentNode).get(0);
assertTrue("partitionToPartitionInfo should contain existing partition", replicationManager.partitionToPartitionInfo.containsKey(existingReplica.getPartitionId()));
mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(existingReplica.getPartitionId().toPathString());
storageManager.shutdown();
}
use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.
the class ReplicationTest method dcLevelReplicationLagMetricsTest.
/**
* Test that metrics that track remote replicas lag behind local replicas in each dc.
* @throws Exception
*/
@Test
public void dcLevelReplicationLagMetricsTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, null);
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
Set<String> remoteDcNames = new HashSet<>(Arrays.asList("DC1", "DC2", "DC3"));
String localDcName = clusterMap.getDataNodeIds().get(0).getDatacenterName();
remoteDcNames.remove(localDcName);
// before updating replication lag, make sure avg lag in each dc is 0
MetricRegistry metricRegistry = replicationManager.getMetricRegistry();
String prefix = ReplicaThread.class.getName() + ".";
String avgMetricSuffix = "-avgReplicaLagFromLocalInBytes";
assertEquals("Average replication lag in local dc is not expected", 18.0, metricRegistry.getGauges().get(prefix + localDcName + avgMetricSuffix).getValue());
for (String remoteDc : remoteDcNames) {
assertEquals("Average replication lag in remote dc is not expected", 18.0, metricRegistry.getGauges().get(prefix + remoteDc + avgMetricSuffix).getValue());
}
// iterate over all partitions on current node and make sure all their peer replicas in local dc have fully caught up
for (Map.Entry<PartitionId, PartitionInfo> entry : replicationManager.partitionToPartitionInfo.entrySet()) {
PartitionId localPartition = entry.getKey();
PartitionInfo partitionInfo = entry.getValue();
List<RemoteReplicaInfo> remoteReplicaInfos = partitionInfo.getRemoteReplicaInfos().stream().filter(info -> info.getReplicaId().getDataNodeId().getDatacenterName().equals(localDcName)).collect(Collectors.toList());
for (RemoteReplicaInfo remoteReplicaInfoInLocalDc : remoteReplicaInfos) {
ReplicaId peerReplicaInLocalDc = remoteReplicaInfoInLocalDc.getReplicaId();
replicationManager.updateTotalBytesReadByRemoteReplica(localPartition, peerReplicaInLocalDc.getDataNodeId().getHostname(), peerReplicaInLocalDc.getReplicaPath(), 18);
}
}
// verify that after updating replication lag for all peer replicas in local dc, the avg lag in local dc has updated
assertEquals("Average replication lag in local dc is not expected", 0.0, metricRegistry.getGauges().get(prefix + localDcName + avgMetricSuffix).getValue());
// for remote dc, the avg lag is still 18.0
for (String remoteDc : remoteDcNames) {
assertEquals("Average replication lag in remote dc is not expected", 18.0, metricRegistry.getGauges().get(prefix + remoteDc + avgMetricSuffix).getValue());
}
}
use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.
the class ReplicationTest method replicaFromLeaderToStandbyTest.
/**
* Test state transition in replication manager from LEADER to STANDBY
* Test setup: When creating partitions, make sure that there is exactly one replica in LEADER STATE on each data center
* Test condition: When a partition on the current node moves from leader to standby, verify that in-memory map storing
* partition to peer leader replicas is updated correctly
* @throws Exception
*/
@Test
public void replicaFromLeaderToStandbyTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
MockHelixParticipant.metricRegistry = new MetricRegistry();
MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
ReplicationConfig initialReplicationConfig = replicationConfig;
properties.setProperty("replication.model.across.datacenters", "LEADER_BASED");
replicationConfig = new ReplicationConfig(new VerifiableProperties(properties));
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
PartitionId existingPartition = replicationManager.partitionToPartitionInfo.keySet().iterator().next();
mockHelixParticipant.onPartitionBecomeLeaderFromStandby(existingPartition.toPathString());
Map<String, Set<ReplicaId>> peerLeaderReplicasByPartition = replicationManager.leaderBasedReplicationAdmin.getLeaderPartitionToPeerLeaderReplicas();
assertTrue("Partition is not present in the map of partition to peer leader replicas after it moved from standby to leader", peerLeaderReplicasByPartition.containsKey(existingPartition.toPathString()));
mockHelixParticipant.onPartitionBecomeStandbyFromLeader(existingPartition.toPathString());
assertFalse("Partition is still present in the map of partition to peer leader replicas after it moved from leader to standby", peerLeaderReplicasByPartition.containsKey(existingPartition.toPathString()));
storageManager.shutdown();
replicationConfig = initialReplicationConfig;
}
use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.
the class ReplicationTest method onReplicaAddedOrRemovedCallbackTest.
/**
* Test cluster map change callback in {@link ReplicationManager} when any remote replicas are added or removed.
* Test setup: attempt to add 3 replicas and remove 3 replicas respectively. The three replicas are picked as follows:
* (1) 1st replica on current node (should skip)
* (2) 2nd replica on remote node sharing partition with current one (should be added or removed)
* (3) 3rd replica on remote node but doesn't share partition with current one (should skip)
* @throws Exception
*/
@Test
public void onReplicaAddedOrRemovedCallbackTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
StoreConfig storeConfig = new StoreConfig(verifiableProperties);
// pick a node with no special partition as current node
Set<DataNodeId> specialPartitionNodes = clusterMap.getSpecialPartition().getReplicaIds().stream().map(ReplicaId::getDataNodeId).collect(Collectors.toSet());
DataNodeId currentNode = clusterMap.getDataNodes().stream().filter(d -> !specialPartitionNodes.contains(d)).findFirst().get();
MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
storeKeyConverterFactory.setConversionMap(new HashMap<>());
StorageManager storageManager = new StorageManager(storeConfig, new DiskManagerConfig(verifiableProperties), Utils.newScheduler(1, true), new MetricRegistry(), null, clusterMap, currentNode, null, null, new MockTime(), null, new InMemAccountService(false, false));
storageManager.start();
MockReplicationManager replicationManager = new MockReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, clusterMap, currentNode, storeKeyConverterFactory, null);
ClusterMapChangeListener clusterMapChangeListener = clusterMap.getClusterMapChangeListener();
// find the special partition (not on current node) and get an irrelevant replica from it
PartitionId absentPartition = clusterMap.getSpecialPartition();
ReplicaId irrelevantReplica = absentPartition.getReplicaIds().get(0);
// find an existing replica on current node and one of its peer replicas on remote node
ReplicaId existingReplica = clusterMap.getReplicaIds(currentNode).get(0);
ReplicaId peerReplicaToRemove = existingReplica.getPartitionId().getReplicaIds().stream().filter(r -> r != existingReplica).findFirst().get();
// create a new node and place a peer of existing replica on it.
MockDataNodeId remoteNode = createDataNode(getListOfPorts(PLAIN_TEXT_PORT_START_NUMBER + 10, SSL_PORT_START_NUMBER + 10, HTTP2_PORT_START_NUMBER + 10), clusterMap.getDatacenterName((byte) 0), 3);
ReplicaId addedReplica = new MockReplicaId(remoteNode.getPort(), (MockPartitionId) existingReplica.getPartitionId(), remoteNode, 0);
// populate added replica and removed replica lists
List<ReplicaId> replicasToAdd = new ArrayList<>(Arrays.asList(existingReplica, addedReplica, irrelevantReplica));
List<ReplicaId> replicasToRemove = new ArrayList<>(Arrays.asList(existingReplica, peerReplicaToRemove, irrelevantReplica));
PartitionInfo partitionInfo = replicationManager.getPartitionToPartitionInfoMap().get(existingReplica.getPartitionId());
assertNotNull("PartitionInfo is not found", partitionInfo);
RemoteReplicaInfo peerReplicaInfo = partitionInfo.getRemoteReplicaInfos().stream().filter(info -> info.getReplicaId() == peerReplicaToRemove).findFirst().get();
// get the replica-thread for this peer replica
ReplicaThread peerReplicaThread = peerReplicaInfo.getReplicaThread();
// Test Case 1: replication manager encountered exception during startup (remote replica addition/removal will be skipped)
replicationManager.startWithException();
clusterMapChangeListener.onReplicaAddedOrRemoved(replicasToAdd, replicasToRemove);
// verify that PartitionInfo stays unchanged
verifyRemoteReplicaInfo(partitionInfo, addedReplica, false);
verifyRemoteReplicaInfo(partitionInfo, peerReplicaToRemove, true);
// Test Case 2: startup latch is interrupted
CountDownLatch initialLatch = replicationManager.startupLatch;
CountDownLatch mockLatch = Mockito.mock(CountDownLatch.class);
doThrow(new InterruptedException()).when(mockLatch).await();
replicationManager.startupLatch = mockLatch;
try {
clusterMapChangeListener.onReplicaAddedOrRemoved(replicasToAdd, replicasToRemove);
fail("should fail because startup latch is interrupted");
} catch (IllegalStateException e) {
// expected
}
replicationManager.startupLatch = initialLatch;
// Test Case 3: replication manager is successfully started
replicationManager.start();
clusterMapChangeListener.onReplicaAddedOrRemoved(replicasToAdd, replicasToRemove);
// verify that PartitionInfo has latest remote replica infos
verifyRemoteReplicaInfo(partitionInfo, addedReplica, true);
verifyRemoteReplicaInfo(partitionInfo, peerReplicaToRemove, false);
verifyRemoteReplicaInfo(partitionInfo, irrelevantReplica, false);
// verify new added replica is assigned to a certain thread
ReplicaThread replicaThread = replicationManager.getDataNodeIdToReplicaThreadMap().get(addedReplica.getDataNodeId());
assertNotNull("There is no ReplicaThread assocated with new replica", replicaThread);
Optional<RemoteReplicaInfo> findResult = replicaThread.getRemoteReplicaInfos().get(remoteNode).stream().filter(info -> info.getReplicaId() == addedReplica).findAny();
assertTrue("New added remote replica info should exist in corresponding thread", findResult.isPresent());
// verify the removed replica info's thread is null
assertNull("Thread in removed replica info should be null", peerReplicaInfo.getReplicaThread());
findResult = peerReplicaThread.getRemoteReplicaInfos().get(peerReplicaToRemove.getDataNodeId()).stream().filter(info -> info.getReplicaId() == peerReplicaToRemove).findAny();
assertFalse("Previous replica thread should not contain RemoteReplicaInfo that is already removed", findResult.isPresent());
storageManager.shutdown();
}
Aggregations