use of com.github.ambry.config.ReplicationConfig in project ambry by linkedin.
the class ReplicationTest method onReplicaAddedOrRemovedCallbackTest.
/**
* Test cluster map change callback in {@link ReplicationManager} when any remote replicas are added or removed.
* Test setup: attempt to add 3 replicas and remove 3 replicas respectively. The three replicas are picked as follows:
* (1) 1st replica on current node (should skip)
* (2) 2nd replica on remote node sharing partition with current one (should be added or removed)
* (3) 3rd replica on remote node but doesn't share partition with current one (should skip)
* @throws Exception
*/
@Test
public void onReplicaAddedOrRemovedCallbackTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
StoreConfig storeConfig = new StoreConfig(verifiableProperties);
// pick a node with no special partition as current node
Set<DataNodeId> specialPartitionNodes = clusterMap.getSpecialPartition().getReplicaIds().stream().map(ReplicaId::getDataNodeId).collect(Collectors.toSet());
DataNodeId currentNode = clusterMap.getDataNodes().stream().filter(d -> !specialPartitionNodes.contains(d)).findFirst().get();
MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
storeKeyConverterFactory.setConversionMap(new HashMap<>());
StorageManager storageManager = new StorageManager(storeConfig, new DiskManagerConfig(verifiableProperties), Utils.newScheduler(1, true), new MetricRegistry(), null, clusterMap, currentNode, null, null, new MockTime(), null, new InMemAccountService(false, false));
storageManager.start();
MockReplicationManager replicationManager = new MockReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, clusterMap, currentNode, storeKeyConverterFactory, null);
ClusterMapChangeListener clusterMapChangeListener = clusterMap.getClusterMapChangeListener();
// find the special partition (not on current node) and get an irrelevant replica from it
PartitionId absentPartition = clusterMap.getSpecialPartition();
ReplicaId irrelevantReplica = absentPartition.getReplicaIds().get(0);
// find an existing replica on current node and one of its peer replicas on remote node
ReplicaId existingReplica = clusterMap.getReplicaIds(currentNode).get(0);
ReplicaId peerReplicaToRemove = existingReplica.getPartitionId().getReplicaIds().stream().filter(r -> r != existingReplica).findFirst().get();
// create a new node and place a peer of existing replica on it.
MockDataNodeId remoteNode = createDataNode(getListOfPorts(PLAIN_TEXT_PORT_START_NUMBER + 10, SSL_PORT_START_NUMBER + 10, HTTP2_PORT_START_NUMBER + 10), clusterMap.getDatacenterName((byte) 0), 3);
ReplicaId addedReplica = new MockReplicaId(remoteNode.getPort(), (MockPartitionId) existingReplica.getPartitionId(), remoteNode, 0);
// populate added replica and removed replica lists
List<ReplicaId> replicasToAdd = new ArrayList<>(Arrays.asList(existingReplica, addedReplica, irrelevantReplica));
List<ReplicaId> replicasToRemove = new ArrayList<>(Arrays.asList(existingReplica, peerReplicaToRemove, irrelevantReplica));
PartitionInfo partitionInfo = replicationManager.getPartitionToPartitionInfoMap().get(existingReplica.getPartitionId());
assertNotNull("PartitionInfo is not found", partitionInfo);
RemoteReplicaInfo peerReplicaInfo = partitionInfo.getRemoteReplicaInfos().stream().filter(info -> info.getReplicaId() == peerReplicaToRemove).findFirst().get();
// get the replica-thread for this peer replica
ReplicaThread peerReplicaThread = peerReplicaInfo.getReplicaThread();
// Test Case 1: replication manager encountered exception during startup (remote replica addition/removal will be skipped)
replicationManager.startWithException();
clusterMapChangeListener.onReplicaAddedOrRemoved(replicasToAdd, replicasToRemove);
// verify that PartitionInfo stays unchanged
verifyRemoteReplicaInfo(partitionInfo, addedReplica, false);
verifyRemoteReplicaInfo(partitionInfo, peerReplicaToRemove, true);
// Test Case 2: startup latch is interrupted
CountDownLatch initialLatch = replicationManager.startupLatch;
CountDownLatch mockLatch = Mockito.mock(CountDownLatch.class);
doThrow(new InterruptedException()).when(mockLatch).await();
replicationManager.startupLatch = mockLatch;
try {
clusterMapChangeListener.onReplicaAddedOrRemoved(replicasToAdd, replicasToRemove);
fail("should fail because startup latch is interrupted");
} catch (IllegalStateException e) {
// expected
}
replicationManager.startupLatch = initialLatch;
// Test Case 3: replication manager is successfully started
replicationManager.start();
clusterMapChangeListener.onReplicaAddedOrRemoved(replicasToAdd, replicasToRemove);
// verify that PartitionInfo has latest remote replica infos
verifyRemoteReplicaInfo(partitionInfo, addedReplica, true);
verifyRemoteReplicaInfo(partitionInfo, peerReplicaToRemove, false);
verifyRemoteReplicaInfo(partitionInfo, irrelevantReplica, false);
// verify new added replica is assigned to a certain thread
ReplicaThread replicaThread = replicationManager.getDataNodeIdToReplicaThreadMap().get(addedReplica.getDataNodeId());
assertNotNull("There is no ReplicaThread assocated with new replica", replicaThread);
Optional<RemoteReplicaInfo> findResult = replicaThread.getRemoteReplicaInfos().get(remoteNode).stream().filter(info -> info.getReplicaId() == addedReplica).findAny();
assertTrue("New added remote replica info should exist in corresponding thread", findResult.isPresent());
// verify the removed replica info's thread is null
assertNull("Thread in removed replica info should be null", peerReplicaInfo.getReplicaThread());
findResult = peerReplicaThread.getRemoteReplicaInfos().get(peerReplicaToRemove.getDataNodeId()).stream().filter(info -> info.getReplicaId() == peerReplicaToRemove).findAny();
assertFalse("Previous replica thread should not contain RemoteReplicaInfo that is already removed", findResult.isPresent());
storageManager.shutdown();
}
use of com.github.ambry.config.ReplicationConfig in project ambry by linkedin.
the class LeaderBasedReplicationTest method replicationTypeFromConfigTest.
/**
* Tests to verify replication model is correctly reflected in code based on config properties
*/
@Test
public void replicationTypeFromConfigTest() {
ReplicationConfig initialReplicationConfig = replicationConfig;
properties.remove("replication.model.across.datacenters");
replicationConfig = new ReplicationConfig(new VerifiableProperties(properties));
// When replication config is missing, replicationModelType should be defaulted to ALL_TO_ALL
assertEquals("Replication model mismatch from the value present in config", replicationConfig.replicationModelAcrossDatacenters, ReplicationModelType.ALL_TO_ALL);
// When the config set is "LEADER_BASED", replicationModelType should be LEADER_BASED
properties.setProperty("replication.model.across.datacenters", "LEADER_BASED");
replicationConfig = new ReplicationConfig(new VerifiableProperties(properties));
assertEquals("Replication model mismatch from the value present in config", replicationConfig.replicationModelAcrossDatacenters, ReplicationModelType.LEADER_BASED);
// When the config set is "ALL_TO_ALL", replicationModelType should be ALL_TO_ALL
properties.setProperty("replication.model.across.datacenters", "ALL_TO_ALL");
replicationConfig = new ReplicationConfig(new VerifiableProperties(properties));
assertEquals("Replication model mismatch from the value present in config", replicationConfig.replicationModelAcrossDatacenters, ReplicationModelType.ALL_TO_ALL);
replicationConfig = initialReplicationConfig;
}
use of com.github.ambry.config.ReplicationConfig in project ambry by linkedin.
the class CloudAndStoreReplicationTest method testGetOnServerNode.
/**
* Do a get on recovery server node to test that all the blobids that were uploaded to vcr node have been recovered on
* recovery node.
* @param blobIdToSizeMap {@link Map} of blobid to size uploaded to vcr node.
* @param node recovery server node
* @throws IOException on {@link IOException}
*/
private void testGetOnServerNode(Map<BlobId, Integer> blobIdToSizeMap, DataNodeId node) throws IOException {
ConnectedChannel channel = ServerTestUtil.getBlockingChannelBasedOnPortType(node.getPortToConnectTo(), node.getHostname(), null, null);
channel.connect();
AtomicInteger correlationIdGenerator = new AtomicInteger(0);
List<BlobId> allBlobIds = Stream.concat(cloudBlobIds.stream(), serverBlobIds.stream()).collect(Collectors.toList());
List<PartitionRequestInfo> partitionRequestInfoList = Collections.singletonList(new PartitionRequestInfo(partitionId, allBlobIds));
GetRequest getRequest = new GetRequest(correlationIdGenerator.incrementAndGet(), GetRequest.Replication_Client_Id_Prefix + node.getHostname(), MessageFormatFlags.All, partitionRequestInfoList, new ReplicationConfig(new VerifiableProperties(recoveryProperties)).replicationIncludeAll ? GetOption.Include_All : GetOption.None);
channel.send(getRequest);
GetResponse getResponse = GetResponse.readFrom(channel.receive().getInputStream(), recoveryCluster.getClusterMap());
for (PartitionResponseInfo partitionResponseInfo : getResponse.getPartitionResponseInfoList()) {
assertEquals("Error in getting the recovered blobs", ServerErrorCode.No_Error, partitionResponseInfo.getErrorCode());
// old value is 272. Adding 8 Bytes due to the two fields added 4 + 4 Blob Property BYTE.
for (MessageInfo messageInfo : partitionResponseInfo.getMessageInfoList()) {
assertEquals(blobIdToSizeMap.get(messageInfo.getStoreKey()) + 280, messageInfo.getSize());
}
}
}
use of com.github.ambry.config.ReplicationConfig in project ambry by linkedin.
the class StatsManagerTest method testReplicaFromOfflineToDropped.
/**
* Test Offline-To-Dropped transition (both failure and success cases)
* @throws Exception
*/
@Test
public void testReplicaFromOfflineToDropped() throws Exception {
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
ReplicationConfig replicationConfig = new ReplicationConfig(verifiableProperties);
StoreConfig storeConfig = new StoreConfig(verifiableProperties);
MockClusterMap clusterMap = new MockClusterMap();
DataNodeId currentNode = clusterMap.getDataNodeIds().get(0);
List<ReplicaId> localReplicas = clusterMap.getReplicaIds(currentNode);
StorageManager storageManager = new StorageManager(storeConfig, new DiskManagerConfig(verifiableProperties), Utils.newScheduler(1, true), new MetricRegistry(), null, clusterMap, currentNode, null, Collections.singletonList(clusterParticipant), new MockTime(), null, new InMemAccountService(false, false));
storageManager.start();
MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
storeKeyConverterFactory.setConversionMap(new HashMap<>());
MockReplicationManager mockReplicationManager = new MockReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, clusterMap, currentNode, storeKeyConverterFactory, clusterParticipant);
MockStatsManager mockStatsManager = new MockStatsManager(storageManager, localReplicas, new MetricRegistry(), statsManagerConfig, clusterParticipant);
// 1. attempt to remove replica while store is still running (remove store failure case)
ReplicaId replicaToDrop = localReplicas.get(0);
try {
clusterParticipant.onPartitionBecomeDroppedFromOffline(replicaToDrop.getPartitionId().toPathString());
fail("should fail because store is still running");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", ReplicaOperationFailure, e.getErrorCode());
}
// 2. shutdown the store but introduce file deletion failure (put a invalid dir in store dir)
storageManager.shutdownBlobStore(replicaToDrop.getPartitionId());
File invalidDir = new File(replicaToDrop.getReplicaPath(), "invalidDir");
invalidDir.deleteOnExit();
assertTrue("Couldn't create dir within store dir", invalidDir.mkdir());
assertTrue("Could not make unreadable", invalidDir.setReadable(false));
try {
clusterParticipant.onPartitionBecomeDroppedFromOffline(replicaToDrop.getPartitionId().toPathString());
fail("should fail because store deletion fails");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", ReplicaOperationFailure, e.getErrorCode());
}
// reset permission to allow deletion to succeed.
assertTrue("Could not make readable", invalidDir.setReadable(true));
assertTrue("Could not delete invalid dir", invalidDir.delete());
// 3. success case (remove another replica because previous replica has been removed from in-mem data structures)
ReplicaId replica = localReplicas.get(1);
storageManager.shutdownBlobStore(replica.getPartitionId());
MockHelixParticipant mockHelixParticipant = Mockito.spy(clusterParticipant);
doNothing().when(mockHelixParticipant).setPartitionDisabledState(anyString(), anyBoolean());
mockHelixParticipant.onPartitionBecomeDroppedFromOffline(replica.getPartitionId().toPathString());
// verify that the replica is no longer present in StorageManager
assertNull("Store of removed replica should not exist", storageManager.getStore(replica.getPartitionId(), true));
// purposely remove the same replica in ReplicationManager again to verify it no longer exists
assertFalse("Should return false because replica no longer exists", mockReplicationManager.removeReplica(replica));
// purposely remove the same replica in StatsManager again to verify it no longer exists
assertFalse("Should return false because replica no longer exists", mockStatsManager.removeReplica(replica));
verify(mockHelixParticipant).setPartitionDisabledState(replica.getPartitionId().toPathString(), false);
storageManager.shutdown();
mockStatsManager.shutdown();
}
Aggregations