use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class LeaderBasedReplicationTest method replicaThreadLeaderBasedReplicationTokenCatchUpForStandbyToLeaderTest.
/**
* Test leader based replication to verify remote token is caught up for standby replicas and updated token is used
* when their state transitions to leader.
* @throws Exception
*/
@Test
public void replicaThreadLeaderBasedReplicationTokenCatchUpForStandbyToLeaderTest() throws Exception {
/*
Setup:
we have 3 nodes that have replicas belonging to same partitions:
a) localNode (local node that hosts partitions)
b) remoteNodeInLocalDC (remote node in local data center that shares the partitions)
c) remoteNodeInRemoteDC (remote node in remote data center that shares the partitions)
Each node have few of its partitions as leaders and others are standby. They are randomly assigned during creation
of replicas for mock partitions.
*/
Map<DataNodeId, MockHost> hosts = new HashMap<>();
hosts.put(remoteNodeInLocalDC, remoteHostInLocalDC);
hosts.put(remoteNodeInRemoteDC, remoteHostInRemoteDC);
int batchSize = 5;
ConnectionPool mockConnectionPool = new MockConnectionPool(hosts, clusterMap, batchSize);
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant, mockConnectionPool);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
// set mock local stores on all remoteReplicaInfos which will used during replication.
for (PartitionId partitionId : replicationManager.partitionToPartitionInfo.keySet()) {
localHost.addStore(partitionId, null);
Store localStore = localHost.getStore(partitionId);
localStore.start();
List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(partitionId).getRemoteReplicaInfos();
remoteReplicaInfos.forEach(remoteReplicaInfo -> remoteReplicaInfo.setLocalStore(localStore));
}
// get remote replicas and replica thread for remote host on local datacenter
ReplicaThread intraColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInLocalDC);
List<RemoteReplicaInfo> remoteReplicaInfosForLocalDC = intraColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInLocalDC);
// get remote replicas and replica thread for remote host on remote datacenter
ReplicaThread crossColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInRemoteDC);
List<RemoteReplicaInfo> remoteReplicaInfosForRemoteDC = crossColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInRemoteDC);
// mock helix transition state from standby to leader for local leader partitions
List<? extends ReplicaId> replicaIds = clusterMap.getReplicaIds(replicationManager.dataNodeId);
for (ReplicaId replicaId : replicaIds) {
MockReplicaId mockReplicaId = (MockReplicaId) replicaId;
if (mockReplicaId.getReplicaState() == ReplicaState.LEADER) {
MockPartitionId mockPartitionId = (MockPartitionId) replicaId.getPartitionId();
mockHelixParticipant.onPartitionBecomeLeaderFromStandby(mockPartitionId.toPathString());
}
}
// Add put messages to all partitions on remoteHost1 and remoteHost2
List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
for (PartitionId partitionId : partitionIds) {
// add batchSize messages to the remoteHost1 and remote host 2 from which local host will replicate.
addPutMessagesToReplicasOfPartition(partitionId, Arrays.asList(remoteHostInLocalDC, remoteHostInRemoteDC), batchSize + batchSize);
}
// Choose partitions that are leaders on both local and remote nodes
Set<ReplicaId> leaderReplicasOnLocalAndRemoteNodes = new HashSet<>();
// Track a standby replica which has leader partition on remote node. We will update the state of replica to leader after one cycle of replication
// and verify that replication resumes from remote token.
MockReplicaId localStandbyReplicaWithLeaderPartitionOnRemoteNode = null;
List<? extends ReplicaId> localReplicas = clusterMap.getReplicaIds(replicationManager.dataNodeId);
List<? extends ReplicaId> remoteReplicas = clusterMap.getReplicaIds(remoteNodeInRemoteDC);
for (int i = 0; i < localReplicas.size(); i++) {
MockReplicaId localReplica = (MockReplicaId) localReplicas.get(i);
MockReplicaId remoteReplica = (MockReplicaId) remoteReplicas.get(i);
if (localReplica.getReplicaState() == ReplicaState.LEADER && remoteReplica.getReplicaState() == ReplicaState.LEADER) {
leaderReplicasOnLocalAndRemoteNodes.add(remoteReplicas.get(i));
}
if (localReplica.getReplicaState() == ReplicaState.STANDBY && remoteReplica.getReplicaState() == ReplicaState.LEADER && localStandbyReplicaWithLeaderPartitionOnRemoteNode == null) {
localStandbyReplicaWithLeaderPartitionOnRemoteNode = localReplica;
}
}
// replicate with remote node in remote DC
crossColoReplicaThread.replicate();
// missing messages are not fetched yet.
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
if (leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
assertEquals("remote token mismatch for leader replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), batchSize - 1);
} else {
assertEquals("remote token should not move forward for standby replicas until missing keys are fetched", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), 0);
}
}
// Replicate with remote node in local dc
intraColoReplicaThread.replicate();
// verify that remote token will be moved for all intra-colo replicas with token index = batchSize-1
for (RemoteReplicaInfo replicaInfo : remoteReplicaInfosForLocalDC) {
assertEquals("mismatch in remote token set for intra colo replicas", ((MockFindToken) replicaInfo.getToken()).getIndex(), batchSize - 1);
}
// process missing keys for cross colo replicas from previous metadata exchange
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
crossColoReplicaThread.processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
}
// as missing keys must now be obtained via intra-dc replication
for (RemoteReplicaInfo replicaInfo : remoteReplicaInfosForRemoteDC) {
assertEquals("mismatch in remote token set for inter colo replicas", ((MockFindToken) replicaInfo.getToken()).getIndex(), batchSize - 1);
}
// If we have a local standby replica with leader partition on remote node, change its state to leader
if (localStandbyReplicaWithLeaderPartitionOnRemoteNode != null) {
MockPartitionId mockPartitionId = (MockPartitionId) localStandbyReplicaWithLeaderPartitionOnRemoteNode.getPartitionId();
mockHelixParticipant.onPartitionBecomeLeaderFromStandby(mockPartitionId.toPathString());
}
// Trigger replication again with remote node in remote DC
crossColoReplicaThread.replicate();
// leader localStandbyReplicaWithLeaderPartitionOnRemoteNode whose replication should be have resumed with remote token index = batchSize-1
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
if (leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId()) || (remoteReplicaInfo.getLocalReplicaId().equals(localStandbyReplicaWithLeaderPartitionOnRemoteNode))) {
assertEquals("remote token mismatch for leader replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), batchSize * 2 - 2);
} else {
assertEquals("remote token should not move forward for standby replicas until missing keys are fetched", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), batchSize - 1);
}
}
// Trigger replication again with remote node in local DC
intraColoReplicaThread.replicate();
// verify that remote token is moved forward for all intra-colo replicas.
for (RemoteReplicaInfo replicaInfo : remoteReplicaInfosForLocalDC) {
assertEquals("mismatch in remote token set for intra colo replicas", ((MockFindToken) replicaInfo.getToken()).getIndex(), batchSize * 2 - 2);
}
// process missing keys for cross colo replicas from previous metadata exchange
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
crossColoReplicaThread.processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
}
// via intra-dc replication.
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
assertEquals("mismatch in remote token set for intra colo replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), batchSize * 2 - 2);
}
storageManager.shutdown();
}
use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class MockHost method getRemoteReplicaInfos.
/**
* Gets the list of {@link RemoteReplicaInfo} from this host to the given {@code remoteHost}
* @param remoteHost the host whose replica info is required.
* @param listener the {@link ReplicationTest.StoreEventListener} to use.
* @return the list of {@link RemoteReplicaInfo} from this host to the given {@code remoteHost}
*/
List<RemoteReplicaInfo> getRemoteReplicaInfos(MockHost remoteHost, ReplicationTest.StoreEventListener listener) {
List<? extends ReplicaId> replicaIds = clusterMap.getReplicaIds(dataNodeId);
List<RemoteReplicaInfo> remoteReplicaInfos = new ArrayList<>();
for (ReplicaId replicaId : replicaIds) {
for (ReplicaId peerReplicaId : replicaId.getPeerReplicaIds()) {
if (peerReplicaId.getDataNodeId().equals(remoteHost.dataNodeId)) {
PartitionId partitionId = replicaId.getPartitionId();
InMemoryStore store = storesByPartition.computeIfAbsent(partitionId, partitionId1 -> new InMemoryStore(partitionId, infosByPartition.computeIfAbsent(partitionId1, (Function<PartitionId, List<MessageInfo>>) partitionId2 -> new ArrayList<>()), buffersByPartition.computeIfAbsent(partitionId1, (Function<PartitionId, List<ByteBuffer>>) partitionId22 -> new ArrayList<>()), listener));
store.start();
RemoteReplicaInfo remoteReplicaInfo = new RemoteReplicaInfo(peerReplicaId, replicaId, store, new MockFindToken(0, 0), Long.MAX_VALUE, SystemTime.getInstance(), new Port(peerReplicaId.getDataNodeId().getPort(), PortType.PLAINTEXT));
remoteReplicaInfos.add(remoteReplicaInfo);
}
}
}
return remoteReplicaInfos;
}
use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class CloudToStoreReplicationManagerTest method cloudReplicaAdditionTest.
/**
* Test both success and failure cases when adding cloud replica
* @throws Exception
*/
@Test
public void cloudReplicaAdditionTest() throws Exception {
StorageManager storageManager = new StorageManager(storeConfig, new DiskManagerConfig(verifiableProperties), Utils.newScheduler(1, true), clusterMap.getMetricRegistry(), null, clusterMap, currentNode, null, Collections.singletonList(mockHelixParticipant), new MockTime(), null, new InMemAccountService(false, false));
CloudToStoreReplicationManager cloudToStoreReplicationManager = new CloudToStoreReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, storeKeyFactory, clusterMap, mockScheduler, currentNode, null, clusterMap.getMetricRegistry(), null, storeKeyConverterFactory, serverConfig.serverMessageTransformer, mockClusterSpectator, mockHelixParticipant);
storageManager.start();
cloudToStoreReplicationManager.start();
mockClusterSpectator.spectate();
// 1. test adding cloud replica that is not present locally
mockHelixParticipant.onPartitionBecomeLeaderFromStandby(NEW_PARTITION_NAME);
assertNull("Cloud replica thread should not be created", TestUtils.getThreadByThisName(REPLICA_THREAD_PREFIX));
// create a new partition and add corresponding store in storage manager
PartitionId newPartition = new MockPartitionId(Long.parseLong(NEW_PARTITION_NAME), MockClusterMap.DEFAULT_PARTITION_CLASS, clusterMap.getDataNodes(), 0);
ReplicaId replicaToAdd = newPartition.getReplicaIds().get(0);
assertTrue("Adding new store should succeed", storageManager.addBlobStore(replicaToAdd));
// 2. we deliberately shut down the store to induce failure when adding cloud replica
storageManager.shutdownBlobStore(newPartition);
mockHelixParticipant.onPartitionBecomeLeaderFromStandby(NEW_PARTITION_NAME);
assertNull("Cloud replica thread should not be created", TestUtils.getThreadByThisName(REPLICA_THREAD_PREFIX));
storageManager.startBlobStore(newPartition);
// 3. mock success case
mockHelixParticipant.onPartitionBecomeLeaderFromStandby(NEW_PARTITION_NAME);
assertNotNull("Cloud replica thread should be created for DC1", TestUtils.getThreadByThisName(REPLICA_THREAD_PREFIX));
cloudToStoreReplicationManager.shutdown();
storageManager.shutdown();
}
use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class ReplicationTest method blockDeprecatedContainerReplicationTest.
/**
* Tests if deprecated containers have been blocked during replication.
*/
@Test
public void blockDeprecatedContainerReplicationTest() throws Exception {
Properties properties = new Properties();
properties.setProperty("replication.container.deletion.enabled", "true");
replicationConfig = new ReplicationConfig(new VerifiableProperties(properties));
MockClusterMap clusterMap = new MockClusterMap();
Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
MockHost localHost = localAndRemoteHosts.getFirst();
MockHost remoteHost = localAndRemoteHosts.getSecond();
MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
storeKeyConverterFactory.setConversionMap(new HashMap<>());
storeKeyConverterFactory.setReturnInputIfAbsent(true);
MockStoreKeyConverterFactory.MockStoreKeyConverter storeKeyConverter = storeKeyConverterFactory.getStoreKeyConverter();
Map<StoreKey, StoreKey> conversionMap = new HashMap<>();
storeKeyConverter.setConversionMap(conversionMap);
List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
for (int i = 0; i < partitionIds.size(); i++) {
PartitionId partitionId = partitionIds.get(i);
BlobId b0 = generateRandomBlobId(partitionId);
conversionMap.put(b0, b0);
BlobId b1 = generateRandomBlobId(partitionId);
conversionMap.put(b1, b1);
// add 2 messages to both hosts.
storeKeyConverter.setConversionMap(conversionMap);
storeKeyConverter.convert(conversionMap.keySet());
// addPutMessagesToReplicasOfPartition(Arrays.asList(b0), Arrays.asList(localHost, remoteHost));
// add 3 messages to the remote host only
addPutMessagesToReplicasOfPartition(Arrays.asList(b0, b1), Collections.singletonList(remoteHost));
}
StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
Transformer transformer = new BlobIdTransformer(storeKeyFactory, storeKeyConverter);
int batchSize = 4;
ReplicationMetrics replicationMetrics = new ReplicationMetrics(new MetricRegistry(), clusterMap.getReplicaIds(localHost.dataNodeId));
replicationMetrics.populateSingleColoMetrics(remoteHost.dataNodeId.getDatacenterName());
List<RemoteReplicaInfo> remoteReplicaInfoList = localHost.getRemoteReplicaInfos(remoteHost, null);
Map<DataNodeId, List<RemoteReplicaInfo>> replicasToReplicate = Collections.singletonMap(remoteHost.dataNodeId, remoteReplicaInfoList);
storeKeyFactory = Utils.getObj("com.github.ambry.commons.BlobIdFactory", clusterMap);
Map<DataNodeId, MockHost> hosts = new HashMap<>();
hosts.put(remoteHost.dataNodeId, remoteHost);
MockConnectionPool connectionPool = new MockConnectionPool(hosts, clusterMap, batchSize);
Predicate<MessageInfo> skipPredicate = new ReplicationSkipPredicate(accountService, replicationConfig);
ReplicaThread replicaThread = new ReplicaThread("threadtest", new MockFindTokenHelper(storeKeyFactory, replicationConfig), clusterMap, new AtomicInteger(0), localHost.dataNodeId, connectionPool, replicationConfig, replicationMetrics, null, storeKeyConverter, transformer, clusterMap.getMetricRegistry(), false, localHost.dataNodeId.getDatacenterName(), new ResponseHandler(clusterMap), time, null, skipPredicate);
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfoList) {
replicaThread.addRemoteReplicaInfo(remoteReplicaInfo);
}
List<RemoteReplicaInfo> remoteReplicaInfos = replicasToReplicate.get(remoteHost.dataNodeId);
DataNodeId remoteNode = remoteReplicaInfos.get(0).getReplicaId().getDataNodeId();
ReplicaMetadataResponse response = replicaThread.getReplicaMetadataResponse(remoteReplicaInfos, new MockConnectionPool.MockConnection(remoteHost, batchSize), remoteNode);
// case1 DELETE_IN_PROGRESS container with retention time qualified.
for (int i = 0; i < 2; i++) {
RemoteReplicaInfo remoteReplicaInfo = remoteReplicaInfos.get(i);
ReplicaMetadataResponseInfo replicaMetadataResponseInfo = response.getReplicaMetadataResponseInfoList().get(i);
new ResponseHandler(clusterMap).onEvent(remoteReplicaInfo.getReplicaId(), replicaMetadataResponseInfo.getError());
for (int j = 0; j < replicaMetadataResponseInfo.getMessageInfoList().size(); j++) {
short accountId = replicaMetadataResponseInfo.getMessageInfoList().get(j).getAccountId();
short containerId = replicaMetadataResponseInfo.getMessageInfoList().get(j).getContainerId();
Container container = Mockito.mock(Container.class);
Account account = Mockito.mock(Account.class);
Mockito.when(account.getContainerById(containerId)).thenReturn(container);
Mockito.when(accountService.getAccountById(accountId)).thenReturn(account);
Mockito.when(container.getDeleteTriggerTime()).thenReturn(System.currentTimeMillis() - TimeUnit.DAYS.toMillis(replicationConfig.replicationContainerDeletionRetentionDays + 1));
Mockito.when(container.getStatus()).thenReturn(Container.ContainerStatus.DELETE_IN_PROGRESS);
}
Set<MessageInfo> remoteMissingStoreKeys = replicaThread.getMissingStoreMessages(replicaMetadataResponseInfo, remoteNode, remoteReplicaInfo);
assertEquals("All DELETE_IN_PROGRESS blobs qualified with retention time should be skipped during replication", 0, remoteMissingStoreKeys.size());
Map<StoreKey, StoreKey> remoteKeyToLocalKeyMap = replicaThread.batchConvertReplicaMetadataResponseKeys(response);
replicaThread.processReplicaMetadataResponse(remoteMissingStoreKeys, replicaMetadataResponseInfo, remoteReplicaInfo, remoteNode, remoteKeyToLocalKeyMap);
}
// case2 DELETE_IN_PROGRESS container with retention time not qualified.
for (int i = 2; i < 4; i++) {
RemoteReplicaInfo remoteReplicaInfo = remoteReplicaInfos.get(i);
ReplicaMetadataResponseInfo replicaMetadataResponseInfo = response.getReplicaMetadataResponseInfoList().get(i);
new ResponseHandler(clusterMap).onEvent(remoteReplicaInfo.getReplicaId(), replicaMetadataResponseInfo.getError());
for (int j = 0; j < replicaMetadataResponseInfo.getMessageInfoList().size(); j++) {
short accountId = replicaMetadataResponseInfo.getMessageInfoList().get(j).getAccountId();
short containerId = replicaMetadataResponseInfo.getMessageInfoList().get(j).getContainerId();
Container container = Mockito.mock(Container.class);
Account account = Mockito.mock(Account.class);
Mockito.when(account.getContainerById(containerId)).thenReturn(container);
Mockito.when(accountService.getAccountById(accountId)).thenReturn(account);
Mockito.when(container.getStatus()).thenReturn(Container.ContainerStatus.DELETE_IN_PROGRESS);
Mockito.when(container.getDeleteTriggerTime()).thenReturn(System.currentTimeMillis());
}
Set<MessageInfo> remoteMissingStoreKeys = replicaThread.getMissingStoreMessages(replicaMetadataResponseInfo, remoteNode, remoteReplicaInfo);
assertEquals("All DELETE_IN_PROGRESS blobs not qualified with retention time should not be skipped during replication", 2, remoteMissingStoreKeys.size());
Map<StoreKey, StoreKey> remoteKeyToLocalKeyMap = replicaThread.batchConvertReplicaMetadataResponseKeys(response);
replicaThread.processReplicaMetadataResponse(remoteMissingStoreKeys, replicaMetadataResponseInfo, remoteReplicaInfo, remoteNode, remoteKeyToLocalKeyMap);
}
// case3 INACTIVE container
for (int i = 4; i < 6; i++) {
RemoteReplicaInfo remoteReplicaInfo = remoteReplicaInfos.get(i);
ReplicaMetadataResponseInfo replicaMetadataResponseInfo = response.getReplicaMetadataResponseInfoList().get(i);
new ResponseHandler(clusterMap).onEvent(remoteReplicaInfo.getReplicaId(), replicaMetadataResponseInfo.getError());
for (int j = 0; j < replicaMetadataResponseInfo.getMessageInfoList().size(); j++) {
short accountId = replicaMetadataResponseInfo.getMessageInfoList().get(j).getAccountId();
short containerId = replicaMetadataResponseInfo.getMessageInfoList().get(j).getContainerId();
Container container = Mockito.mock(Container.class);
Account account = Mockito.mock(Account.class);
Mockito.when(account.getContainerById(containerId)).thenReturn(container);
Mockito.when(accountService.getAccountById(accountId)).thenReturn(account);
Mockito.when(container.getStatus()).thenReturn(Container.ContainerStatus.INACTIVE);
}
Set<MessageInfo> remoteMissingStoreKeys = replicaThread.getMissingStoreMessages(replicaMetadataResponseInfo, remoteNode, remoteReplicaInfo);
assertEquals("All INACTIVE blobs should be skipped during replication", 0, remoteMissingStoreKeys.size());
Map<StoreKey, StoreKey> remoteKeyToLocalKeyMap = replicaThread.batchConvertReplicaMetadataResponseKeys(response);
replicaThread.processReplicaMetadataResponse(remoteMissingStoreKeys, replicaMetadataResponseInfo, remoteReplicaInfo, remoteNode, remoteKeyToLocalKeyMap);
}
// case 4 ACTIVE Container
for (int i = 6; i < 8; i++) {
RemoteReplicaInfo remoteReplicaInfo = remoteReplicaInfos.get(i);
ReplicaMetadataResponseInfo replicaMetadataResponseInfo = response.getReplicaMetadataResponseInfoList().get(i);
new ResponseHandler(clusterMap).onEvent(remoteReplicaInfo.getReplicaId(), replicaMetadataResponseInfo.getError());
for (int j = 0; j < replicaMetadataResponseInfo.getMessageInfoList().size(); j++) {
short accountId = replicaMetadataResponseInfo.getMessageInfoList().get(j).getAccountId();
short containerId = replicaMetadataResponseInfo.getMessageInfoList().get(j).getContainerId();
Container container = Mockito.mock(Container.class);
Account account = Mockito.mock(Account.class);
Mockito.when(account.getContainerById(containerId)).thenReturn(container);
Mockito.when(accountService.getAccountById(accountId)).thenReturn(account);
Mockito.when(container.getStatus()).thenReturn(Container.ContainerStatus.ACTIVE);
}
Set<MessageInfo> remoteMissingStoreKeys = replicaThread.getMissingStoreMessages(replicaMetadataResponseInfo, remoteNode, remoteReplicaInfo);
assertEquals("All non-deprecated blobs should not be skipped during replication", 2, remoteMissingStoreKeys.size());
Map<StoreKey, StoreKey> remoteKeyToLocalKeyMap = replicaThread.batchConvertReplicaMetadataResponseKeys(response);
replicaThread.processReplicaMetadataResponse(remoteMissingStoreKeys, replicaMetadataResponseInfo, remoteReplicaInfo, remoteNode, remoteKeyToLocalKeyMap);
}
}
use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class ReplicationTest method replicaFromStandbyToInactiveTest.
/**
* Test STANDBY -> INACTIVE transition on existing replica (both success and failure cases)
*/
@Test
public void replicaFromStandbyToInactiveTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
MockHelixParticipant.metricRegistry = new MetricRegistry();
MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
// get an existing partition to test both success and failure cases
PartitionId existingPartition = replicationManager.partitionToPartitionInfo.keySet().iterator().next();
storageManager.shutdownBlobStore(existingPartition);
try {
mockHelixParticipant.onPartitionBecomeInactiveFromStandby(existingPartition.toPathString());
fail("should fail because store is not started");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
}
// restart the store and trigger Standby-To-Inactive transition again
storageManager.startBlobStore(existingPartition);
// write a blob with size = 100 into local store (end offset of last PUT = 100 + 18 = 118)
Store localStore = storageManager.getStore(existingPartition);
MockId id = new MockId(TestUtils.getRandomString(10), Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM));
long crc = (new Random()).nextLong();
long blobSize = 100;
MessageInfo info = new MessageInfo(id, blobSize, false, false, Utils.Infinite_Time, crc, id.getAccountId(), id.getContainerId(), Utils.Infinite_Time);
List<MessageInfo> infos = new ArrayList<>();
List<ByteBuffer> buffers = new ArrayList<>();
ByteBuffer buffer = ByteBuffer.wrap(TestUtils.getRandomBytes((int) blobSize));
infos.add(info);
buffers.add(buffer);
localStore.put(new MockMessageWriteSet(infos, buffers));
ReplicaId localReplica = storageManager.getReplica(existingPartition.toPathString());
// override partition state change listener in ReplicationManager to help thread manipulation
mockHelixParticipant.registerPartitionStateChangeListener(StateModelListenerType.ReplicationManagerListener, replicationManager.replicationListener);
CountDownLatch participantLatch = new CountDownLatch(1);
replicationManager.listenerExecutionLatch = new CountDownLatch(1);
// create a new thread and trigger STANDBY -> INACTIVE transition
Utils.newThread(() -> {
mockHelixParticipant.onPartitionBecomeInactiveFromStandby(existingPartition.toPathString());
participantLatch.countDown();
}, false).start();
assertTrue("Partition state change listener didn't get called within 1 sec", replicationManager.listenerExecutionLatch.await(1, TimeUnit.SECONDS));
assertEquals("Local store state should be INACTIVE", ReplicaState.INACTIVE, storageManager.getStore(existingPartition).getCurrentState());
List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(existingPartition).getRemoteReplicaInfos();
ReplicaId peerReplica1 = remoteReplicaInfos.get(0).getReplicaId();
assertFalse("Sync up should not complete because not enough replicas have caught up", mockHelixParticipant.getReplicaSyncUpManager().updateReplicaLagAndCheckSyncStatus(localReplica, peerReplica1, 10L, ReplicaState.INACTIVE));
// pick another remote replica to update the replication lag
ReplicaId peerReplica2 = remoteReplicaInfos.get(1).getReplicaId();
replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica1.getDataNodeId().getHostname(), peerReplica1.getReplicaPath(), 118);
assertFalse("Sync up shouldn't complete because only one replica has caught up with local replica", mockHelixParticipant.getReplicaSyncUpManager().isSyncUpComplete(localReplica));
// make second peer replica catch up with last PUT in local store
replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica2.getDataNodeId().getHostname(), peerReplica2.getReplicaPath(), 118);
assertTrue("Standby-To-Inactive transition didn't complete within 1 sec", participantLatch.await(1, TimeUnit.SECONDS));
// we purposely update lag against local replica to verify local replica is no longer in ReplicaSyncUpManager because
// deactivation is complete and local replica should be removed from "replicaToLagInfos" map.
assertFalse("Sync up should complete (2 replicas have caught up), hence updated should be false", mockHelixParticipant.getReplicaSyncUpManager().updateReplicaLagAndCheckSyncStatus(localReplica, peerReplica2, 0L, ReplicaState.INACTIVE));
storageManager.shutdown();
}
Aggregations