Search in sources :

Example 16 with ReplicaId

use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.

the class StorageManagerTest method multiParticipantsMarkStoreInErrorStateTest.

/**
 * Test that, if store is not started, all participants on this node are able to mark it in ERROR state during
 * OFFLINE -> BOOTSTRAP transition.
 * @throws Exception
 */
@Test
public void multiParticipantsMarkStoreInErrorStateTest() throws Exception {
    MockDataNodeId dataNode = clusterMap.getDataNodes().get(0);
    List<ReplicaId> replicas = clusterMap.getReplicaIds(dataNode);
    List<ClusterParticipant> participants = Arrays.asList(new MockClusterParticipant(), new MockClusterParticipant());
    StorageManager storageManager = createStorageManager(dataNode, metricRegistry, participants);
    storageManager.start();
    // stop one of the stores to induce transition failure
    PartitionId id = replicas.get(0).getPartitionId();
    storageManager.shutdownBlobStore(id);
    // verify that both participants throw exception during OFFLINE -> BOOTSTRAP transition
    for (ClusterParticipant participant : participants) {
        try {
            ((MockClusterParticipant) participant).onPartitionBecomeBootstrapFromOffline(id.toPathString());
            fail("should fail because store is not started");
        } catch (StateTransitionException e) {
            assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
        }
    }
    shutdownAndAssertStoresInaccessible(storageManager, replicas);
}
Also used : MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) ReplicaId(com.github.ambry.clustermap.ReplicaId) ClusterParticipant(com.github.ambry.clustermap.ClusterParticipant) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) BlobStoreTest(com.github.ambry.store.BlobStoreTest) Test(org.junit.Test)

Example 17 with ReplicaId

use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.

the class StorageManagerTest method replicaFromStandbyToInactiveTest.

/**
 * test both success and failure cases during STANDBY -> INACTIVE transition
 */
@Test
public void replicaFromStandbyToInactiveTest() throws Exception {
    generateConfigs(true, false);
    MockDataNodeId localNode = clusterMap.getDataNodes().get(0);
    List<ReplicaId> localReplicas = clusterMap.getReplicaIds(localNode);
    MockClusterParticipant mockHelixParticipant = new MockClusterParticipant();
    StorageManager storageManager = createStorageManager(localNode, metricRegistry, Collections.singletonList(mockHelixParticipant));
    storageManager.start();
    // 1. get listeners from Helix participant and verify there is a storageManager listener.
    Map<StateModelListenerType, PartitionStateChangeListener> listeners = mockHelixParticipant.getPartitionStateChangeListeners();
    assertTrue("Should contain storage manager listener", listeners.containsKey(StateModelListenerType.StorageManagerListener));
    // 2. not found replica should encounter exception
    try {
        mockHelixParticipant.onPartitionBecomeInactiveFromStandby("-1");
        fail("should fail because replica is not found");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", ReplicaNotFound, e.getErrorCode());
    }
    // 3. not found store should throw exception (induced by removing the store)
    ReplicaId replicaToRemove = localReplicas.get(localReplicas.size() - 1);
    storageManager.controlCompactionForBlobStore(replicaToRemove.getPartitionId(), false);
    storageManager.shutdownBlobStore(replicaToRemove.getPartitionId());
    storageManager.getDiskManager(replicaToRemove.getPartitionId()).removeBlobStore(replicaToRemove.getPartitionId());
    try {
        mockHelixParticipant.onPartitionBecomeInactiveFromStandby(replicaToRemove.getPartitionId().toPathString());
        fail("should fail because store is not found");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", ReplicaNotFound, e.getErrorCode());
    }
    // 4. store not started exception
    ReplicaId localReplica = localReplicas.get(0);
    storageManager.shutdownBlobStore(localReplica.getPartitionId());
    try {
        mockHelixParticipant.onPartitionBecomeInactiveFromStandby(localReplica.getPartitionId().toPathString());
        fail("should fail because store is not started");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
    }
    storageManager.startBlobStore(localReplica.getPartitionId());
    // 5. store is disabled due to disk I/O error
    BlobStore localStore = (BlobStore) storageManager.getStore(localReplica.getPartitionId());
    localStore.setDisableState(true);
    try {
        mockHelixParticipant.onPartitionBecomeInactiveFromStandby(localReplica.getPartitionId().toPathString());
        fail("should fail because store is disabled");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", ReplicaOperationFailure, e.getErrorCode());
    }
    localStore.setDisableState(false);
    // 6. success case (verify both replica's state and decommission file)
    mockHelixParticipant.onPartitionBecomeInactiveFromStandby(localReplica.getPartitionId().toPathString());
    assertEquals("local store state should be set to INACTIVE", ReplicaState.INACTIVE, storageManager.getStore(localReplica.getPartitionId()).getCurrentState());
    File decommissionFile = new File(localReplica.getReplicaPath(), BlobStore.DECOMMISSION_FILE_NAME);
    assertTrue("Decommission file is not found in local replica's dir", decommissionFile.exists());
    shutdownAndAssertStoresInaccessible(storageManager, localReplicas);
    // 7. mock disable compaction failure
    mockHelixParticipant = new MockClusterParticipant();
    MockStorageManager mockStorageManager = new MockStorageManager(localNode, Collections.singletonList(mockHelixParticipant));
    mockStorageManager.start();
    try {
        mockHelixParticipant.onPartitionBecomeInactiveFromStandby(localReplica.getPartitionId().toPathString());
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", ReplicaNotFound, e.getErrorCode());
    } finally {
        shutdownAndAssertStoresInaccessible(mockStorageManager, localReplicas);
    }
}
Also used : MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) PartitionStateChangeListener(com.github.ambry.clustermap.PartitionStateChangeListener) File(java.io.File) ReplicaId(com.github.ambry.clustermap.ReplicaId) StateModelListenerType(com.github.ambry.clustermap.StateModelListenerType) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) BlobStoreTest(com.github.ambry.store.BlobStoreTest) Test(org.junit.Test)

Example 18 with ReplicaId

use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.

the class StorageManagerTest method isDiskAvailableTest.

/**
 * Tests that{@link StorageManager} can correctly determine if disk is unavailable based on states of all stores.
 */
@Test
public void isDiskAvailableTest() throws Exception {
    MockDataNodeId dataNode = clusterMap.getDataNodes().get(0);
    List<ReplicaId> replicas = clusterMap.getReplicaIds(dataNode);
    Map<DiskId, List<ReplicaId>> diskToReplicas = new HashMap<>();
    StorageManager storageManager = createStorageManager(dataNode, metricRegistry, null);
    storageManager.start();
    assertEquals("There should be no unexpected partitions reported", 0, getNumUnrecognizedPartitionsReported());
    for (ReplicaId replica : replicas) {
        diskToReplicas.computeIfAbsent(replica.getDiskId(), disk -> new ArrayList<>()).add(replica);
    }
    // for each disk, shutdown all the stores except for the last one
    for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
        for (int i = 0; i < replicasOnDisk.size() - 1; ++i) {
            storageManager.getStore(replicasOnDisk.get(i).getPartitionId(), false).shutdown();
        }
    }
    // verify all disks are still available because at least one store on them is up
    for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
        assertTrue("Disk should be available", storageManager.isDiskAvailable(replicasOnDisk.get(0).getDiskId()));
        assertEquals("Disk state be available", HardwareState.AVAILABLE, replicasOnDisk.get(0).getDiskId().getState());
    }
    // now, shutdown the last store on each disk
    for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
        storageManager.getStore(replicasOnDisk.get(replicasOnDisk.size() - 1).getPartitionId(), false).shutdown();
    }
    // verify all disks are unavailable because all stores are down
    for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
        assertFalse("Disk should be unavailable", storageManager.isDiskAvailable(replicasOnDisk.get(0).getDiskId()));
    }
    // then, start the one store on each disk to test if disk is up again
    for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
        storageManager.startBlobStore(replicasOnDisk.get(0).getPartitionId());
    }
    // verify all disks are available again because one store is started
    for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
        assertTrue("Disk should be available", storageManager.isDiskAvailable(replicasOnDisk.get(0).getDiskId()));
        assertEquals("Disk state be available", HardwareState.AVAILABLE, replicasOnDisk.get(0).getDiskId().getState());
    }
    shutdownAndAssertStoresInaccessible(storageManager, replicas);
}
Also used : DiskId(com.github.ambry.clustermap.DiskId) Arrays(java.util.Arrays) ClusterMapUtils(com.github.ambry.clustermap.ClusterMapUtils) DataNodeId(com.github.ambry.clustermap.DataNodeId) Random(java.util.Random) ByteBuffer(java.nio.ByteBuffer) MockHelixManagerFactory(com.github.ambry.clustermap.MockHelixManagerFactory) JSONObject(org.json.JSONObject) TestUtils(com.github.ambry.utils.TestUtils) Map(java.util.Map) After(org.junit.After) Counter(com.codahale.metrics.Counter) ClusterParticipant(com.github.ambry.clustermap.ClusterParticipant) DiskManagerConfig(com.github.ambry.config.DiskManagerConfig) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) Collectors(java.util.stream.Collectors) AccountStatsStore(com.github.ambry.accountstats.AccountStatsStore) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) PartitionStateChangeListener(com.github.ambry.clustermap.PartitionStateChangeListener) StatsSnapshot(com.github.ambry.server.StatsSnapshot) Callback(com.github.ambry.commons.Callback) TransitionErrorCode(com.github.ambry.clustermap.StateTransitionException.TransitionErrorCode) InMemAccountService(com.github.ambry.account.InMemAccountService) PartitionId(com.github.ambry.clustermap.PartitionId) HashMap(java.util.HashMap) HardwareState(com.github.ambry.clustermap.HardwareState) AmbryStatsReport(com.github.ambry.server.AmbryStatsReport) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) TestUtils(com.github.ambry.clustermap.TestUtils) SystemTime(com.github.ambry.utils.SystemTime) Before(org.junit.Before) BlobStoreTest(com.github.ambry.store.BlobStoreTest) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) ReplicaState(com.github.ambry.clustermap.ReplicaState) StateModelListenerType(com.github.ambry.clustermap.StateModelListenerType) StoreConfig(com.github.ambry.config.StoreConfig) MetricRegistry(com.codahale.metrics.MetricRegistry) Properties(java.util.Properties) Pair(com.github.ambry.utils.Pair) HelixParticipant(com.github.ambry.clustermap.HelixParticipant) VerifiableProperties(com.github.ambry.config.VerifiableProperties) IOException(java.io.IOException) Test(org.junit.Test) InstanceConfig(org.apache.helix.model.InstanceConfig) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) HelixAdmin(org.apache.helix.HelixAdmin) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) ReplicaId(com.github.ambry.clustermap.ReplicaId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) Assert(org.junit.Assert) Collections(java.util.Collections) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) HashMap(java.util.HashMap) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) ReplicaId(com.github.ambry.clustermap.ReplicaId) DiskId(com.github.ambry.clustermap.DiskId) BlobStoreTest(com.github.ambry.store.BlobStoreTest) Test(org.junit.Test)

Example 19 with ReplicaId

use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.

the class StorageManagerTest method updateInstanceConfigSuccessTest.

/**
 * Test success case when updating InstanceConfig in Helix after new replica is added in storage manager.
 */
@Test
public void updateInstanceConfigSuccessTest() throws Exception {
    generateConfigs(true, true);
    MockDataNodeId localNode = clusterMap.getDataNodes().get(0);
    List<ReplicaId> localReplicas = clusterMap.getReplicaIds(localNode);
    MockClusterParticipant mockHelixParticipant = new MockClusterParticipant();
    StorageManager storageManager = createStorageManager(localNode, metricRegistry, Collections.singletonList(mockHelixParticipant));
    storageManager.start();
    // create a new partition and get its replica on local node
    PartitionId newPartition = clusterMap.createNewPartition(Collections.singletonList(localNode));
    ReplicaId newReplica = newPartition.getReplicaIds().get(0);
    // for updating instanceConfig test, we first add an empty InstanceConfig of current node
    String instanceName = ClusterMapUtils.getInstanceName(clusterMapConfig.clusterMapHostName, clusterMapConfig.clusterMapPort);
    InstanceConfig instanceConfig = new InstanceConfig(instanceName);
    instanceConfig.setHostName(localNode.getHostname());
    instanceConfig.setPort(Integer.toString(localNode.getPort()));
    // for current test, we initial InstanceConfig empty, non-empty case will be tested in HelixParticipantTest
    Map<String, Map<String, String>> diskInfos = new HashMap<>();
    instanceConfig.getRecord().setMapFields(diskInfos);
    HelixAdmin helixAdmin = mockHelixParticipant.getHelixAdmin();
    helixAdmin.addCluster(CLUSTER_NAME);
    helixAdmin.addInstance(CLUSTER_NAME, instanceConfig);
    // test success case
    mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
    instanceConfig = helixAdmin.getInstanceConfig(CLUSTER_NAME, instanceName);
    // verify that new replica info is present in InstanceConfig
    Map<String, Map<String, String>> mountPathToDiskInfos = instanceConfig.getRecord().getMapFields();
    Map<String, String> diskInfo = mountPathToDiskInfos.get(newReplica.getMountPath());
    String replicasStr = diskInfo.get("Replicas");
    Set<String> partitionStrs = new HashSet<>();
    for (String replicaInfo : replicasStr.split(",")) {
        String[] infos = replicaInfo.split(":");
        partitionStrs.add(infos[0]);
    }
    assertTrue("New replica info is not found in InstanceConfig", partitionStrs.contains(newPartition.toPathString()));
    shutdownAndAssertStoresInaccessible(storageManager, localReplicas);
}
Also used : HashMap(java.util.HashMap) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) HelixAdmin(org.apache.helix.HelixAdmin) ReplicaId(com.github.ambry.clustermap.ReplicaId) InstanceConfig(org.apache.helix.model.InstanceConfig) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) Map(java.util.Map) HashMap(java.util.HashMap) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) HashSet(java.util.HashSet) BlobStoreTest(com.github.ambry.store.BlobStoreTest) Test(org.junit.Test)

Example 20 with ReplicaId

use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.

the class StorageManagerTest method setBlobStoreStoppedStateSuccessTest.

/**
 * Test successfully set stopped state of blobstore with given list of {@link PartitionId}.
 */
@Test
public void setBlobStoreStoppedStateSuccessTest() throws Exception {
    MockDataNodeId dataNode = clusterMap.getDataNodes().get(0);
    List<ReplicaId> replicas = clusterMap.getReplicaIds(dataNode);
    List<PartitionId> partitionIds = new ArrayList<>();
    Map<DiskId, List<ReplicaId>> diskToReplicas = new HashMap<>();
    // test setting the state of store via instantiated MockClusterParticipant
    ClusterParticipant participant = new MockClusterParticipant();
    ClusterParticipant participantSpy = Mockito.spy(participant);
    StorageManager storageManager = createStorageManager(dataNode, metricRegistry, Collections.singletonList(participantSpy));
    storageManager.start();
    assertEquals("There should be no unexpected partitions reported", 0, getNumUnrecognizedPartitionsReported());
    for (ReplicaId replica : replicas) {
        partitionIds.add(replica.getPartitionId());
        diskToReplicas.computeIfAbsent(replica.getDiskId(), disk -> new ArrayList<>()).add(replica);
    }
    List<PartitionId> failToUpdateList;
    // add a list of stores to STOPPED list. Note that the stores are residing on 3 disks.
    failToUpdateList = storageManager.setBlobStoreStoppedState(partitionIds, true);
    // make sure the update operation succeeds
    assertTrue("Add stores to stopped list should succeed, failToUpdateList should be empty", failToUpdateList.isEmpty());
    // make sure the stopped list contains all the added stores
    Set<String> stoppedReplicasCopy = new HashSet<>(participantSpy.getStoppedReplicas());
    for (ReplicaId replica : replicas) {
        assertTrue("The stopped list should contain the replica: " + replica.getPartitionId().toPathString(), stoppedReplicasCopy.contains(replica.getPartitionId().toPathString()));
    }
    // make sure replicaStatusDelegate is invoked 3 times and each time the input replica list conforms with stores on particular disk
    for (List<ReplicaId> replicasPerDisk : diskToReplicas.values()) {
        verify(participantSpy, times(1)).setReplicaStoppedState(replicasPerDisk, true);
    }
    // remove a list of stores from STOPPED list. Note that the stores are residing on 3 disks.
    storageManager.setBlobStoreStoppedState(partitionIds, false);
    // make sure the update operation succeeds
    assertTrue("Remove stores from stopped list should succeed, failToUpdateList should be empty", failToUpdateList.isEmpty());
    // make sure the stopped list is empty because all the stores are successfully removed.
    assertTrue("The stopped list should be empty after removing all stores", participantSpy.getStoppedReplicas().isEmpty());
    // make sure replicaStatusDelegate is invoked 3 times and each time the input replica list conforms with stores on particular disk
    for (List<ReplicaId> replicasPerDisk : diskToReplicas.values()) {
        verify(participantSpy, times(1)).setReplicaStoppedState(replicasPerDisk, false);
    }
    shutdownAndAssertStoresInaccessible(storageManager, replicas);
}
Also used : DiskId(com.github.ambry.clustermap.DiskId) Arrays(java.util.Arrays) ClusterMapUtils(com.github.ambry.clustermap.ClusterMapUtils) DataNodeId(com.github.ambry.clustermap.DataNodeId) Random(java.util.Random) ByteBuffer(java.nio.ByteBuffer) MockHelixManagerFactory(com.github.ambry.clustermap.MockHelixManagerFactory) JSONObject(org.json.JSONObject) TestUtils(com.github.ambry.utils.TestUtils) Map(java.util.Map) After(org.junit.After) Counter(com.codahale.metrics.Counter) ClusterParticipant(com.github.ambry.clustermap.ClusterParticipant) DiskManagerConfig(com.github.ambry.config.DiskManagerConfig) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) Collectors(java.util.stream.Collectors) AccountStatsStore(com.github.ambry.accountstats.AccountStatsStore) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) PartitionStateChangeListener(com.github.ambry.clustermap.PartitionStateChangeListener) StatsSnapshot(com.github.ambry.server.StatsSnapshot) Callback(com.github.ambry.commons.Callback) TransitionErrorCode(com.github.ambry.clustermap.StateTransitionException.TransitionErrorCode) InMemAccountService(com.github.ambry.account.InMemAccountService) PartitionId(com.github.ambry.clustermap.PartitionId) HashMap(java.util.HashMap) HardwareState(com.github.ambry.clustermap.HardwareState) AmbryStatsReport(com.github.ambry.server.AmbryStatsReport) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) TestUtils(com.github.ambry.clustermap.TestUtils) SystemTime(com.github.ambry.utils.SystemTime) Before(org.junit.Before) BlobStoreTest(com.github.ambry.store.BlobStoreTest) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) ReplicaState(com.github.ambry.clustermap.ReplicaState) StateModelListenerType(com.github.ambry.clustermap.StateModelListenerType) StoreConfig(com.github.ambry.config.StoreConfig) MetricRegistry(com.codahale.metrics.MetricRegistry) Properties(java.util.Properties) Pair(com.github.ambry.utils.Pair) HelixParticipant(com.github.ambry.clustermap.HelixParticipant) VerifiableProperties(com.github.ambry.config.VerifiableProperties) IOException(java.io.IOException) Test(org.junit.Test) InstanceConfig(org.apache.helix.model.InstanceConfig) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) HelixAdmin(org.apache.helix.HelixAdmin) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) ReplicaId(com.github.ambry.clustermap.ReplicaId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) Assert(org.junit.Assert) Collections(java.util.Collections) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) ReplicaId(com.github.ambry.clustermap.ReplicaId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) List(java.util.List) ArrayList(java.util.ArrayList) ClusterParticipant(com.github.ambry.clustermap.ClusterParticipant) DiskId(com.github.ambry.clustermap.DiskId) HashSet(java.util.HashSet) BlobStoreTest(com.github.ambry.store.BlobStoreTest) Test(org.junit.Test)

Aggregations

ReplicaId (com.github.ambry.clustermap.ReplicaId)147 Test (org.junit.Test)83 PartitionId (com.github.ambry.clustermap.PartitionId)68 MockPartitionId (com.github.ambry.clustermap.MockPartitionId)60 MockReplicaId (com.github.ambry.clustermap.MockReplicaId)57 ArrayList (java.util.ArrayList)55 MockDataNodeId (com.github.ambry.clustermap.MockDataNodeId)43 DataNodeId (com.github.ambry.clustermap.DataNodeId)32 MockClusterMap (com.github.ambry.clustermap.MockClusterMap)31 MetricRegistry (com.codahale.metrics.MetricRegistry)29 HashMap (java.util.HashMap)28 HashSet (java.util.HashSet)25 ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)24 VerifiableProperties (com.github.ambry.config.VerifiableProperties)24 BlobStoreTest (com.github.ambry.store.BlobStoreTest)24 File (java.io.File)24 List (java.util.List)21 Map (java.util.Map)21 Port (com.github.ambry.network.Port)20 Properties (java.util.Properties)20