use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class StorageManagerTest method multiParticipantsMarkStoreInErrorStateTest.
/**
* Test that, if store is not started, all participants on this node are able to mark it in ERROR state during
* OFFLINE -> BOOTSTRAP transition.
* @throws Exception
*/
@Test
public void multiParticipantsMarkStoreInErrorStateTest() throws Exception {
MockDataNodeId dataNode = clusterMap.getDataNodes().get(0);
List<ReplicaId> replicas = clusterMap.getReplicaIds(dataNode);
List<ClusterParticipant> participants = Arrays.asList(new MockClusterParticipant(), new MockClusterParticipant());
StorageManager storageManager = createStorageManager(dataNode, metricRegistry, participants);
storageManager.start();
// stop one of the stores to induce transition failure
PartitionId id = replicas.get(0).getPartitionId();
storageManager.shutdownBlobStore(id);
// verify that both participants throw exception during OFFLINE -> BOOTSTRAP transition
for (ClusterParticipant participant : participants) {
try {
((MockClusterParticipant) participant).onPartitionBecomeBootstrapFromOffline(id.toPathString());
fail("should fail because store is not started");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
}
}
shutdownAndAssertStoresInaccessible(storageManager, replicas);
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class StorageManagerTest method replicaFromStandbyToInactiveTest.
/**
* test both success and failure cases during STANDBY -> INACTIVE transition
*/
@Test
public void replicaFromStandbyToInactiveTest() throws Exception {
generateConfigs(true, false);
MockDataNodeId localNode = clusterMap.getDataNodes().get(0);
List<ReplicaId> localReplicas = clusterMap.getReplicaIds(localNode);
MockClusterParticipant mockHelixParticipant = new MockClusterParticipant();
StorageManager storageManager = createStorageManager(localNode, metricRegistry, Collections.singletonList(mockHelixParticipant));
storageManager.start();
// 1. get listeners from Helix participant and verify there is a storageManager listener.
Map<StateModelListenerType, PartitionStateChangeListener> listeners = mockHelixParticipant.getPartitionStateChangeListeners();
assertTrue("Should contain storage manager listener", listeners.containsKey(StateModelListenerType.StorageManagerListener));
// 2. not found replica should encounter exception
try {
mockHelixParticipant.onPartitionBecomeInactiveFromStandby("-1");
fail("should fail because replica is not found");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", ReplicaNotFound, e.getErrorCode());
}
// 3. not found store should throw exception (induced by removing the store)
ReplicaId replicaToRemove = localReplicas.get(localReplicas.size() - 1);
storageManager.controlCompactionForBlobStore(replicaToRemove.getPartitionId(), false);
storageManager.shutdownBlobStore(replicaToRemove.getPartitionId());
storageManager.getDiskManager(replicaToRemove.getPartitionId()).removeBlobStore(replicaToRemove.getPartitionId());
try {
mockHelixParticipant.onPartitionBecomeInactiveFromStandby(replicaToRemove.getPartitionId().toPathString());
fail("should fail because store is not found");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", ReplicaNotFound, e.getErrorCode());
}
// 4. store not started exception
ReplicaId localReplica = localReplicas.get(0);
storageManager.shutdownBlobStore(localReplica.getPartitionId());
try {
mockHelixParticipant.onPartitionBecomeInactiveFromStandby(localReplica.getPartitionId().toPathString());
fail("should fail because store is not started");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
}
storageManager.startBlobStore(localReplica.getPartitionId());
// 5. store is disabled due to disk I/O error
BlobStore localStore = (BlobStore) storageManager.getStore(localReplica.getPartitionId());
localStore.setDisableState(true);
try {
mockHelixParticipant.onPartitionBecomeInactiveFromStandby(localReplica.getPartitionId().toPathString());
fail("should fail because store is disabled");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", ReplicaOperationFailure, e.getErrorCode());
}
localStore.setDisableState(false);
// 6. success case (verify both replica's state and decommission file)
mockHelixParticipant.onPartitionBecomeInactiveFromStandby(localReplica.getPartitionId().toPathString());
assertEquals("local store state should be set to INACTIVE", ReplicaState.INACTIVE, storageManager.getStore(localReplica.getPartitionId()).getCurrentState());
File decommissionFile = new File(localReplica.getReplicaPath(), BlobStore.DECOMMISSION_FILE_NAME);
assertTrue("Decommission file is not found in local replica's dir", decommissionFile.exists());
shutdownAndAssertStoresInaccessible(storageManager, localReplicas);
// 7. mock disable compaction failure
mockHelixParticipant = new MockClusterParticipant();
MockStorageManager mockStorageManager = new MockStorageManager(localNode, Collections.singletonList(mockHelixParticipant));
mockStorageManager.start();
try {
mockHelixParticipant.onPartitionBecomeInactiveFromStandby(localReplica.getPartitionId().toPathString());
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", ReplicaNotFound, e.getErrorCode());
} finally {
shutdownAndAssertStoresInaccessible(mockStorageManager, localReplicas);
}
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class StorageManagerTest method isDiskAvailableTest.
/**
* Tests that{@link StorageManager} can correctly determine if disk is unavailable based on states of all stores.
*/
@Test
public void isDiskAvailableTest() throws Exception {
MockDataNodeId dataNode = clusterMap.getDataNodes().get(0);
List<ReplicaId> replicas = clusterMap.getReplicaIds(dataNode);
Map<DiskId, List<ReplicaId>> diskToReplicas = new HashMap<>();
StorageManager storageManager = createStorageManager(dataNode, metricRegistry, null);
storageManager.start();
assertEquals("There should be no unexpected partitions reported", 0, getNumUnrecognizedPartitionsReported());
for (ReplicaId replica : replicas) {
diskToReplicas.computeIfAbsent(replica.getDiskId(), disk -> new ArrayList<>()).add(replica);
}
// for each disk, shutdown all the stores except for the last one
for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
for (int i = 0; i < replicasOnDisk.size() - 1; ++i) {
storageManager.getStore(replicasOnDisk.get(i).getPartitionId(), false).shutdown();
}
}
// verify all disks are still available because at least one store on them is up
for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
assertTrue("Disk should be available", storageManager.isDiskAvailable(replicasOnDisk.get(0).getDiskId()));
assertEquals("Disk state be available", HardwareState.AVAILABLE, replicasOnDisk.get(0).getDiskId().getState());
}
// now, shutdown the last store on each disk
for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
storageManager.getStore(replicasOnDisk.get(replicasOnDisk.size() - 1).getPartitionId(), false).shutdown();
}
// verify all disks are unavailable because all stores are down
for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
assertFalse("Disk should be unavailable", storageManager.isDiskAvailable(replicasOnDisk.get(0).getDiskId()));
}
// then, start the one store on each disk to test if disk is up again
for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
storageManager.startBlobStore(replicasOnDisk.get(0).getPartitionId());
}
// verify all disks are available again because one store is started
for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
assertTrue("Disk should be available", storageManager.isDiskAvailable(replicasOnDisk.get(0).getDiskId()));
assertEquals("Disk state be available", HardwareState.AVAILABLE, replicasOnDisk.get(0).getDiskId().getState());
}
shutdownAndAssertStoresInaccessible(storageManager, replicas);
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class StorageManagerTest method updateInstanceConfigSuccessTest.
/**
* Test success case when updating InstanceConfig in Helix after new replica is added in storage manager.
*/
@Test
public void updateInstanceConfigSuccessTest() throws Exception {
generateConfigs(true, true);
MockDataNodeId localNode = clusterMap.getDataNodes().get(0);
List<ReplicaId> localReplicas = clusterMap.getReplicaIds(localNode);
MockClusterParticipant mockHelixParticipant = new MockClusterParticipant();
StorageManager storageManager = createStorageManager(localNode, metricRegistry, Collections.singletonList(mockHelixParticipant));
storageManager.start();
// create a new partition and get its replica on local node
PartitionId newPartition = clusterMap.createNewPartition(Collections.singletonList(localNode));
ReplicaId newReplica = newPartition.getReplicaIds().get(0);
// for updating instanceConfig test, we first add an empty InstanceConfig of current node
String instanceName = ClusterMapUtils.getInstanceName(clusterMapConfig.clusterMapHostName, clusterMapConfig.clusterMapPort);
InstanceConfig instanceConfig = new InstanceConfig(instanceName);
instanceConfig.setHostName(localNode.getHostname());
instanceConfig.setPort(Integer.toString(localNode.getPort()));
// for current test, we initial InstanceConfig empty, non-empty case will be tested in HelixParticipantTest
Map<String, Map<String, String>> diskInfos = new HashMap<>();
instanceConfig.getRecord().setMapFields(diskInfos);
HelixAdmin helixAdmin = mockHelixParticipant.getHelixAdmin();
helixAdmin.addCluster(CLUSTER_NAME);
helixAdmin.addInstance(CLUSTER_NAME, instanceConfig);
// test success case
mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
instanceConfig = helixAdmin.getInstanceConfig(CLUSTER_NAME, instanceName);
// verify that new replica info is present in InstanceConfig
Map<String, Map<String, String>> mountPathToDiskInfos = instanceConfig.getRecord().getMapFields();
Map<String, String> diskInfo = mountPathToDiskInfos.get(newReplica.getMountPath());
String replicasStr = diskInfo.get("Replicas");
Set<String> partitionStrs = new HashSet<>();
for (String replicaInfo : replicasStr.split(",")) {
String[] infos = replicaInfo.split(":");
partitionStrs.add(infos[0]);
}
assertTrue("New replica info is not found in InstanceConfig", partitionStrs.contains(newPartition.toPathString()));
shutdownAndAssertStoresInaccessible(storageManager, localReplicas);
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class StorageManagerTest method setBlobStoreStoppedStateSuccessTest.
/**
* Test successfully set stopped state of blobstore with given list of {@link PartitionId}.
*/
@Test
public void setBlobStoreStoppedStateSuccessTest() throws Exception {
MockDataNodeId dataNode = clusterMap.getDataNodes().get(0);
List<ReplicaId> replicas = clusterMap.getReplicaIds(dataNode);
List<PartitionId> partitionIds = new ArrayList<>();
Map<DiskId, List<ReplicaId>> diskToReplicas = new HashMap<>();
// test setting the state of store via instantiated MockClusterParticipant
ClusterParticipant participant = new MockClusterParticipant();
ClusterParticipant participantSpy = Mockito.spy(participant);
StorageManager storageManager = createStorageManager(dataNode, metricRegistry, Collections.singletonList(participantSpy));
storageManager.start();
assertEquals("There should be no unexpected partitions reported", 0, getNumUnrecognizedPartitionsReported());
for (ReplicaId replica : replicas) {
partitionIds.add(replica.getPartitionId());
diskToReplicas.computeIfAbsent(replica.getDiskId(), disk -> new ArrayList<>()).add(replica);
}
List<PartitionId> failToUpdateList;
// add a list of stores to STOPPED list. Note that the stores are residing on 3 disks.
failToUpdateList = storageManager.setBlobStoreStoppedState(partitionIds, true);
// make sure the update operation succeeds
assertTrue("Add stores to stopped list should succeed, failToUpdateList should be empty", failToUpdateList.isEmpty());
// make sure the stopped list contains all the added stores
Set<String> stoppedReplicasCopy = new HashSet<>(participantSpy.getStoppedReplicas());
for (ReplicaId replica : replicas) {
assertTrue("The stopped list should contain the replica: " + replica.getPartitionId().toPathString(), stoppedReplicasCopy.contains(replica.getPartitionId().toPathString()));
}
// make sure replicaStatusDelegate is invoked 3 times and each time the input replica list conforms with stores on particular disk
for (List<ReplicaId> replicasPerDisk : diskToReplicas.values()) {
verify(participantSpy, times(1)).setReplicaStoppedState(replicasPerDisk, true);
}
// remove a list of stores from STOPPED list. Note that the stores are residing on 3 disks.
storageManager.setBlobStoreStoppedState(partitionIds, false);
// make sure the update operation succeeds
assertTrue("Remove stores from stopped list should succeed, failToUpdateList should be empty", failToUpdateList.isEmpty());
// make sure the stopped list is empty because all the stores are successfully removed.
assertTrue("The stopped list should be empty after removing all stores", participantSpy.getStoppedReplicas().isEmpty());
// make sure replicaStatusDelegate is invoked 3 times and each time the input replica list conforms with stores on particular disk
for (List<ReplicaId> replicasPerDisk : diskToReplicas.values()) {
verify(participantSpy, times(1)).setReplicaStoppedState(replicasPerDisk, false);
}
shutdownAndAssertStoresInaccessible(storageManager, replicas);
}
Aggregations