use of com.github.ambry.clustermap.AmbryReplicaSyncUpManager in project ambry by linkedin.
the class ReplicationTest method replicationLagMetricAndSyncUpTest.
/**
* Tests {@link ReplicationMetrics#getMaxLagForPartition(PartitionId)}
* @throws Exception
*/
@Test
public void replicationLagMetricAndSyncUpTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
AmbryReplicaSyncUpManager replicaSyncUpService = new AmbryReplicaSyncUpManager(clusterMapConfig);
Pair<MockHost, MockHost> localAndRemoteHosts = getLocalAndRemoteHosts(clusterMap);
MockHost localHost = localAndRemoteHosts.getFirst();
MockHost remoteHost1 = localAndRemoteHosts.getSecond();
// create another remoteHost2 that shares spacial partition with localHost and remoteHost1
PartitionId specialPartitionId = clusterMap.getWritablePartitionIds(MockClusterMap.SPECIAL_PARTITION_CLASS).get(0);
MockHost remoteHost2 = new MockHost(specialPartitionId.getReplicaIds().get(2).getDataNodeId(), clusterMap);
MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
storeKeyConverterFactory.setConversionMap(new HashMap<>());
storeKeyConverterFactory.setReturnInputIfAbsent(true);
MockStoreKeyConverterFactory.MockStoreKeyConverter storeKeyConverter = storeKeyConverterFactory.getStoreKeyConverter();
int batchSize = 4;
List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
for (int i = 0; i < partitionIds.size(); i++) {
PartitionId partitionId = partitionIds.get(i);
// add batchSize + 1 messages to the remoteHost1 so that two rounds of replication is needed.
addPutMessagesToReplicasOfPartition(partitionId, Collections.singletonList(remoteHost1), batchSize + 1);
}
// add batchSize - 1 messages to the remoteHost2 so that localHost can catch up during one cycle of replication
for (ReplicaId replicaId : clusterMap.getReplicaIds(remoteHost2.dataNodeId)) {
addPutMessagesToReplicasOfPartition(replicaId.getPartitionId(), Collections.singletonList(remoteHost2), batchSize - 1);
}
StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
Transformer transformer = new BlobIdTransformer(storeKeyFactory, storeKeyConverter);
Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread1 = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost1, storeKeyConverter, transformer, null, replicaSyncUpService);
Map<DataNodeId, List<RemoteReplicaInfo>> replicasToReplicate1 = replicasAndThread1.getFirst();
ReplicaThread replicaThread1 = replicasAndThread1.getSecond();
// mock Bootstrap-To-Standby transition in ReplicationManager: 1. update store current state; 2. initiate bootstrap
replicasToReplicate1.get(remoteHost1.dataNodeId).forEach(info -> info.getLocalStore().setCurrentState(ReplicaState.BOOTSTRAP));
clusterMap.getReplicaIds(localHost.dataNodeId).forEach(replicaSyncUpService::initiateBootstrap);
List<ReplicaThread.ExchangeMetadataResponse> response = replicaThread1.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHost1, batchSize), replicasToReplicate1.get(remoteHost1.dataNodeId));
replicaThread1.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHost1, batchSize), replicasToReplicate1.get(remoteHost1.dataNodeId), response, false);
for (PartitionId partitionId : partitionIds) {
List<MessageInfo> allMessageInfos = localAndRemoteHosts.getSecond().infosByPartition.get(partitionId);
long expectedLag = allMessageInfos.subList(batchSize, allMessageInfos.size()).stream().mapToLong(MessageInfo::getSize).sum();
assertEquals("Replication lag doesn't match expected value", expectedLag, replicaThread1.getReplicationMetrics().getMaxLagForPartition(partitionId));
}
response = replicaThread1.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHost1, batchSize), replicasToReplicate1.get(remoteHost1.dataNodeId));
replicaThread1.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHost1, batchSize), replicasToReplicate1.get(remoteHost1.dataNodeId), response, false);
for (PartitionId partitionId : partitionIds) {
assertEquals("Replication lag should equal to 0", 0, replicaThread1.getReplicationMetrics().getMaxLagForPartition(partitionId));
}
// replicate with remoteHost2 to ensure special replica has caught up with enough peers
Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> replicasAndThread2 = getRemoteReplicasAndReplicaThread(batchSize, clusterMap, localHost, remoteHost2, storeKeyConverter, transformer, null, replicaSyncUpService);
Map<DataNodeId, List<RemoteReplicaInfo>> replicasToReplicate2 = replicasAndThread2.getFirst();
ReplicaThread replicaThread2 = replicasAndThread2.getSecond();
// initiate bootstrap on replica of special partition
RemoteReplicaInfo specialReplicaInfo = replicasToReplicate2.get(remoteHost2.dataNodeId).stream().filter(info -> info.getReplicaId().getPartitionId() == specialPartitionId).findFirst().get();
specialReplicaInfo.getLocalStore().setCurrentState(ReplicaState.BOOTSTRAP);
replicaSyncUpService.initiateBootstrap(specialReplicaInfo.getLocalReplicaId());
response = replicaThread2.exchangeMetadata(new MockConnectionPool.MockConnection(remoteHost2, batchSize), replicasToReplicate2.get(remoteHost2.dataNodeId));
replicaThread2.fixMissingStoreKeys(new MockConnectionPool.MockConnection(remoteHost2, batchSize), replicasToReplicate2.get(remoteHost2.dataNodeId), response, false);
// verify replica of special partition has completed bootstrap and becomes standby
assertEquals("Store state is not expected", ReplicaState.STANDBY, specialReplicaInfo.getLocalStore().getCurrentState());
}
use of com.github.ambry.clustermap.AmbryReplicaSyncUpManager in project ambry by linkedin.
the class ReplicationTest method replicaResumeDecommissionTest.
/**
* Test that resuming decommission on certain replica behaves correctly.
* @throws Exception
*/
@Test
public void replicaResumeDecommissionTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
MockHelixParticipant.metricRegistry = new MetricRegistry();
MockHelixParticipant mockHelixParticipant = Mockito.spy(new MockHelixParticipant(clusterMapConfig));
doNothing().when(mockHelixParticipant).setPartitionDisabledState(anyString(), anyBoolean());
// choose a replica on local node and put decommission file into its dir
ReplicaId localReplica = clusterMap.getReplicaIds(clusterMap.getDataNodeIds().get(0)).get(0);
String partitionName = localReplica.getPartitionId().toPathString();
File decommissionFile = new File(localReplica.getReplicaPath(), "decommission_in_progress");
assertTrue("Can't create decommission file", decommissionFile.createNewFile());
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
StorageManager storageManager = managers.getFirst();
// failure case 1: store is not started when resuming decommission
storageManager.shutdownBlobStore(localReplica.getPartitionId());
try {
mockHelixParticipant.onPartitionBecomeDroppedFromOffline(partitionName);
fail("should fail");
} catch (StateTransitionException e) {
assertEquals("Mismatch in error code", ReplicaOperationFailure, e.getErrorCode());
}
storageManager.startBlobStore(localReplica.getPartitionId());
// failure case 2: fail to remove replica from InstanceConfig in Helix
AmbryReplicaSyncUpManager replicaSyncUpManager = (AmbryReplicaSyncUpManager) mockHelixParticipant.getReplicaSyncUpManager();
mockHelixParticipant.updateNodeInfoReturnVal = false;
CountDownLatch executionLatch = new CountDownLatch(1);
AtomicBoolean exceptionOccurred = new AtomicBoolean(false);
Utils.newThread(() -> {
try {
mockHelixParticipant.onPartitionBecomeDroppedFromOffline(partitionName);
fail("should fail because updating node info returns false");
} catch (StateTransitionException e) {
exceptionOccurred.getAndSet(true);
assertEquals("Mismatch in error code", ReplicaOperationFailure, e.getErrorCode());
} finally {
executionLatch.countDown();
}
}, false).start();
while (!replicaSyncUpManager.getPartitionToDeactivationLatch().containsKey(partitionName)) {
Thread.sleep(100);
}
replicaSyncUpManager.onDeactivationComplete(localReplica);
while (!replicaSyncUpManager.getPartitionToDisconnectionLatch().containsKey(partitionName)) {
Thread.sleep(100);
}
replicaSyncUpManager.onDisconnectionComplete(localReplica);
assertTrue("Offline-To-Dropped transition didn't complete within 1 sec", executionLatch.await(1, TimeUnit.SECONDS));
assertTrue("State transition exception should be thrown", exceptionOccurred.get());
mockHelixParticipant.updateNodeInfoReturnVal = null;
storageManager.startBlobStore(localReplica.getPartitionId());
// success case
mockHelixParticipant.mockStatsManagerListener = Mockito.mock(PartitionStateChangeListener.class);
doNothing().when(mockHelixParticipant.mockStatsManagerListener).onPartitionBecomeDroppedFromOffline(anyString());
mockHelixParticipant.registerPartitionStateChangeListener(StateModelListenerType.StatsManagerListener, mockHelixParticipant.mockStatsManagerListener);
CountDownLatch participantLatch = new CountDownLatch(1);
Utils.newThread(() -> {
mockHelixParticipant.onPartitionBecomeDroppedFromOffline(partitionName);
participantLatch.countDown();
}, false).start();
while (!replicaSyncUpManager.getPartitionToDeactivationLatch().containsKey(partitionName)) {
Thread.sleep(100);
}
replicaSyncUpManager.onDeactivationComplete(localReplica);
while (!replicaSyncUpManager.getPartitionToDisconnectionLatch().containsKey(partitionName)) {
Thread.sleep(100);
}
replicaSyncUpManager.onDisconnectionComplete(localReplica);
assertTrue("Offline-To-Dropped transition didn't complete within 1 sec", participantLatch.await(1, TimeUnit.SECONDS));
// verify stats manager listener is called
verify(mockHelixParticipant.mockStatsManagerListener).onPartitionBecomeDroppedFromOffline(anyString());
// verify setPartitionDisabledState method is called
verify(mockHelixParticipant).setPartitionDisabledState(partitionName, false);
File storeDir = new File(localReplica.getReplicaPath());
assertFalse("Store dir should not exist", storeDir.exists());
storageManager.shutdown();
}
Aggregations