Search in sources :

Example 11 with StateTransitionException

use of com.github.ambry.clustermap.StateTransitionException in project ambry by linkedin.

the class StatsManagerTest method testReplicaFromOfflineToDropped.

/**
 * Test Offline-To-Dropped transition (both failure and success cases)
 * @throws Exception
 */
@Test
public void testReplicaFromOfflineToDropped() throws Exception {
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    ReplicationConfig replicationConfig = new ReplicationConfig(verifiableProperties);
    StoreConfig storeConfig = new StoreConfig(verifiableProperties);
    MockClusterMap clusterMap = new MockClusterMap();
    DataNodeId currentNode = clusterMap.getDataNodeIds().get(0);
    List<ReplicaId> localReplicas = clusterMap.getReplicaIds(currentNode);
    StorageManager storageManager = new StorageManager(storeConfig, new DiskManagerConfig(verifiableProperties), Utils.newScheduler(1, true), new MetricRegistry(), null, clusterMap, currentNode, null, Collections.singletonList(clusterParticipant), new MockTime(), null, new InMemAccountService(false, false));
    storageManager.start();
    MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
    storeKeyConverterFactory.setConversionMap(new HashMap<>());
    MockReplicationManager mockReplicationManager = new MockReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, clusterMap, currentNode, storeKeyConverterFactory, clusterParticipant);
    MockStatsManager mockStatsManager = new MockStatsManager(storageManager, localReplicas, new MetricRegistry(), statsManagerConfig, clusterParticipant);
    // 1. attempt to remove replica while store is still running (remove store failure case)
    ReplicaId replicaToDrop = localReplicas.get(0);
    try {
        clusterParticipant.onPartitionBecomeDroppedFromOffline(replicaToDrop.getPartitionId().toPathString());
        fail("should fail because store is still running");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", ReplicaOperationFailure, e.getErrorCode());
    }
    // 2. shutdown the store but introduce file deletion failure (put a invalid dir in store dir)
    storageManager.shutdownBlobStore(replicaToDrop.getPartitionId());
    File invalidDir = new File(replicaToDrop.getReplicaPath(), "invalidDir");
    invalidDir.deleteOnExit();
    assertTrue("Couldn't create dir within store dir", invalidDir.mkdir());
    assertTrue("Could not make unreadable", invalidDir.setReadable(false));
    try {
        clusterParticipant.onPartitionBecomeDroppedFromOffline(replicaToDrop.getPartitionId().toPathString());
        fail("should fail because store deletion fails");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", ReplicaOperationFailure, e.getErrorCode());
    }
    // reset permission to allow deletion to succeed.
    assertTrue("Could not make readable", invalidDir.setReadable(true));
    assertTrue("Could not delete invalid dir", invalidDir.delete());
    // 3. success case (remove another replica because previous replica has been removed from in-mem data structures)
    ReplicaId replica = localReplicas.get(1);
    storageManager.shutdownBlobStore(replica.getPartitionId());
    MockHelixParticipant mockHelixParticipant = Mockito.spy(clusterParticipant);
    doNothing().when(mockHelixParticipant).setPartitionDisabledState(anyString(), anyBoolean());
    mockHelixParticipant.onPartitionBecomeDroppedFromOffline(replica.getPartitionId().toPathString());
    // verify that the replica is no longer present in StorageManager
    assertNull("Store of removed replica should not exist", storageManager.getStore(replica.getPartitionId(), true));
    // purposely remove the same replica in ReplicationManager again to verify it no longer exists
    assertFalse("Should return false because replica no longer exists", mockReplicationManager.removeReplica(replica));
    // purposely remove the same replica in StatsManager again to verify it no longer exists
    assertFalse("Should return false because replica no longer exists", mockStatsManager.removeReplica(replica));
    verify(mockHelixParticipant).setPartitionDisabledState(replica.getPartitionId().toPathString(), false);
    storageManager.shutdown();
    mockStatsManager.shutdown();
}
Also used : DiskManagerConfig(com.github.ambry.config.DiskManagerConfig) MockStoreKeyConverterFactory(com.github.ambry.store.MockStoreKeyConverterFactory) ReplicationConfig(com.github.ambry.config.ReplicationConfig) MetricRegistry(com.codahale.metrics.MetricRegistry) StorageManager(com.github.ambry.store.StorageManager) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) ReplicaId(com.github.ambry.clustermap.ReplicaId) MockReplicationManager(com.github.ambry.replication.MockReplicationManager) InMemAccountService(com.github.ambry.account.InMemAccountService) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) StoreConfig(com.github.ambry.config.StoreConfig) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) File(java.io.File) MockTime(com.github.ambry.utils.MockTime) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) Test(org.junit.Test)

Example 12 with StateTransitionException

use of com.github.ambry.clustermap.StateTransitionException in project ambry by linkedin.

the class StatsManagerTest method testReplicaFromOfflineToBootstrap.

/**
 * Test state transition in stats manager from OFFLINE to BOOTSTRAP
 */
@Test
public void testReplicaFromOfflineToBootstrap() {
    MockStatsManager mockStatsManager = new MockStatsManager(storageManager, replicas, new MetricRegistry(), statsManagerConfig, clusterParticipant);
    // 1. verify stats manager's listener is registered
    assertTrue("Stats manager listener is found in cluster participant", clusterParticipant.getPartitionStateChangeListeners().containsKey(StateModelListenerType.StatsManagerListener));
    // 2. test partition not found
    try {
        clusterParticipant.onPartitionBecomeBootstrapFromOffline("InvalidPartition");
        fail("should fail because partition is not found");
    } catch (StateTransitionException e) {
        assertEquals("Transition error doesn't match", ReplicaNotFound, e.getErrorCode());
    }
    // 3. create a new partition and test replica addition failure
    PartitionId newPartition = new MockPartitionId(3, MockClusterMap.DEFAULT_PARTITION_CLASS, Collections.singletonList((MockDataNodeId) dataNodeId), 0);
    ((MockStorageManager) storageManager).getReplicaReturnVal = newPartition.getReplicaIds().get(0);
    mockStatsManager.returnValOfAddReplica = false;
    try {
        clusterParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
        fail("should fail because adding replica to stats manager failed");
    } catch (StateTransitionException e) {
        assertEquals("Transition error code doesn't match", ReplicaOperationFailure, e.getErrorCode());
    }
    // 4. test replica addition success during Offline-To-Bootstrap transition
    assertFalse("Before adding new replica, in-mem data structure should not contain new partition", mockStatsManager.partitionToReplicaMap.containsKey(newPartition));
    mockStatsManager.returnValOfAddReplica = null;
    clusterParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
    assertTrue("After adding new replica, in-mem data structure should contain new partition", mockStatsManager.partitionToReplicaMap.containsKey(newPartition));
    // 5. state transition on existing replica should be no-op
    clusterParticipant.onPartitionBecomeBootstrapFromOffline(replicas.get(0).getPartitionId().toPathString());
}
Also used : MockPartitionId(com.github.ambry.clustermap.MockPartitionId) MetricRegistry(com.codahale.metrics.MetricRegistry) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) Test(org.junit.Test)

Example 13 with StateTransitionException

use of com.github.ambry.clustermap.StateTransitionException in project ambry by linkedin.

the class StorageManagerTest method replicaFromOfflineToBootstrapTest.

/**
 * test that both success and failure in storage manager when replica becomes BOOTSTRAP from OFFLINE (update
 * InstanceConfig in Helix is turned off in this test)
 * @throws Exception
 */
@Test
public void replicaFromOfflineToBootstrapTest() throws Exception {
    generateConfigs(true, false);
    MockDataNodeId localNode = clusterMap.getDataNodes().get(0);
    List<PartitionId> partitionIds = clusterMap.getAllPartitionIds(null);
    List<ReplicaId> localReplicas = clusterMap.getReplicaIds(localNode);
    MockClusterParticipant mockHelixParticipant = new MockClusterParticipant();
    StorageManager storageManager = createStorageManager(localNode, metricRegistry, Collections.singletonList(mockHelixParticipant));
    storageManager.start();
    // 1. get listeners from Helix participant and verify there is a storageManager listener.
    Map<StateModelListenerType, PartitionStateChangeListener> listeners = mockHelixParticipant.getPartitionStateChangeListeners();
    assertTrue("Should contain storage manager listener", listeners.containsKey(StateModelListenerType.StorageManagerListener));
    // 2. if new bootstrap replica is not found, there should be an exception
    try {
        mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(String.valueOf(partitionIds.size() + 1));
        fail("should fail due to bootstrap replica not found");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", ReplicaNotFound, e.getErrorCode());
    }
    // 3. test regular store didn't start up (which triggers StoreNotStarted exception)
    ReplicaId replicaId = localReplicas.get(0);
    Store localStore = storageManager.getStore(replicaId.getPartitionId(), true);
    localStore.shutdown();
    try {
        mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(replicaId.getPartitionId().toPathString());
        fail("should fail due to store not started");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
    }
    localStore.start();
    // 4. test both failure and success cases regarding new replica addition
    PartitionId newPartition = clusterMap.createNewPartition(Collections.singletonList(localNode));
    assertNull("There should not be any store associated with new partition", storageManager.getStore(newPartition, true));
    // find an existing replica that shares disk with new replica
    ReplicaId newReplica = newPartition.getReplicaIds().get(0);
    ReplicaId replicaOnSameDisk = localReplicas.stream().filter(r -> r.getDiskId().equals(newReplica.getDiskId())).findFirst().get();
    // test add new store failure by shutting down target diskManager
    storageManager.getDiskManager(replicaOnSameDisk.getPartitionId()).shutdown();
    try {
        mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", ReplicaOperationFailure, e.getErrorCode());
    }
    // restart disk manager to test case where new replica(store) is successfully added into StorageManager
    storageManager.getDiskManager(replicaOnSameDisk.getPartitionId()).start();
    mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
    BlobStore newAddedStore = (BlobStore) storageManager.getStore(newPartition);
    assertNotNull("There should be a started store associated with new partition", newAddedStore);
    // 5. verify that new added store has bootstrap file
    assertTrue("There should be a bootstrap file indicating store is in BOOTSTRAP state", newAddedStore.isBootstrapInProgress());
    assertEquals("The store's current state should be BOOTSTRAP", ReplicaState.BOOTSTRAP, newAddedStore.getCurrentState());
    // 6. test that state transition should succeed for existing non-empty replicas (we write some data into store beforehand)
    MockId id = new MockId(TestUtils.getRandomString(MOCK_ID_STRING_LENGTH), Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM));
    MessageInfo info = new MessageInfo(id, PUT_RECORD_SIZE, id.getAccountId(), id.getContainerId(), Utils.Infinite_Time);
    MessageWriteSet writeSet = new MockMessageWriteSet(Collections.singletonList(info), Collections.singletonList(ByteBuffer.allocate(PUT_RECORD_SIZE)));
    Store storeToWrite = storageManager.getStore(localReplicas.get(1).getPartitionId());
    storeToWrite.put(writeSet);
    mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(localReplicas.get(1).getPartitionId().toPathString());
    assertFalse("There should not be any bootstrap file for existing non-empty store", storeToWrite.isBootstrapInProgress());
    assertEquals("The store's current state should be BOOTSTRAP", ReplicaState.BOOTSTRAP, storeToWrite.getCurrentState());
    // 7. test that for new created (empty) store, state transition puts it into BOOTSTRAP state
    mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(localReplicas.get(0).getPartitionId().toPathString());
    assertTrue("There should be a bootstrap file because store is empty and probably recreated", localStore.isBootstrapInProgress());
    assertEquals("The store's current state should be BOOTSTRAP", ReplicaState.BOOTSTRAP, localStore.getCurrentState());
    shutdownAndAssertStoresInaccessible(storageManager, localReplicas);
}
Also used : PartitionStateChangeListener(com.github.ambry.clustermap.PartitionStateChangeListener) AccountStatsStore(com.github.ambry.accountstats.AccountStatsStore) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) ReplicaId(com.github.ambry.clustermap.ReplicaId) StateModelListenerType(com.github.ambry.clustermap.StateModelListenerType) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) BlobStoreTest(com.github.ambry.store.BlobStoreTest) Test(org.junit.Test)

Aggregations

StateTransitionException (com.github.ambry.clustermap.StateTransitionException)13 Test (org.junit.Test)13 ReplicaId (com.github.ambry.clustermap.ReplicaId)12 MockDataNodeId (com.github.ambry.clustermap.MockDataNodeId)9 MockPartitionId (com.github.ambry.clustermap.MockPartitionId)8 PartitionId (com.github.ambry.clustermap.PartitionId)8 MetricRegistry (com.codahale.metrics.MetricRegistry)7 MockClusterMap (com.github.ambry.clustermap.MockClusterMap)6 MockHelixParticipant (com.github.ambry.clustermap.MockHelixParticipant)6 ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)6 BlobStoreTest (com.github.ambry.store.BlobStoreTest)6 StorageManager (com.github.ambry.store.StorageManager)6 MockReplicaId (com.github.ambry.clustermap.MockReplicaId)5 File (java.io.File)5 CountDownLatch (java.util.concurrent.CountDownLatch)5 PartitionStateChangeListener (com.github.ambry.clustermap.PartitionStateChangeListener)3 DataNodeId (com.github.ambry.clustermap.DataNodeId)2 StateModelListenerType (com.github.ambry.clustermap.StateModelListenerType)2 MessageInfo (com.github.ambry.store.MessageInfo)2 MockId (com.github.ambry.store.MockId)2