Search in sources :

Example 1 with StateTransitionException

use of com.github.ambry.clustermap.StateTransitionException in project ambry by linkedin.

the class StorageManagerTest method replicaFromInactiveToOfflineTest.

/**
 * Test shutting down blob store failure during Inactive-To-Offline transition.
 * @throws Exception
 */
@Test
public void replicaFromInactiveToOfflineTest() throws Exception {
    generateConfigs(true, false);
    MockDataNodeId localNode = clusterMap.getDataNodes().get(0);
    List<ReplicaId> localReplicas = clusterMap.getReplicaIds(localNode);
    ReplicaId testReplica = localReplicas.get(0);
    MockClusterParticipant mockHelixParticipant = new MockClusterParticipant();
    StorageManager storageManager = createStorageManager(localNode, metricRegistry, Collections.singletonList(mockHelixParticipant));
    storageManager.start();
    // test shutdown store failure (this is induced by shutting down disk manager)
    storageManager.getDiskManager(testReplica.getPartitionId()).shutdown();
    mockHelixParticipant.getReplicaSyncUpManager().initiateDisconnection(testReplica);
    CountDownLatch participantLatch = new CountDownLatch(1);
    Utils.newThread(() -> {
        try {
            mockHelixParticipant.onPartitionBecomeOfflineFromInactive(testReplica.getPartitionId().toPathString());
            fail("should fail because of shutting down store failure");
        } catch (StateTransitionException e) {
            assertEquals("Error code doesn't match", ReplicaOperationFailure, e.getErrorCode());
            participantLatch.countDown();
        }
    }, false).start();
    // make sync-up complete to let code proceed and encounter exception in storage manager.
    mockHelixParticipant.getReplicaSyncUpManager().onDisconnectionComplete(testReplica);
    assertTrue("Helix participant transition didn't get invoked within 1 sec", participantLatch.await(1, TimeUnit.SECONDS));
    shutdownAndAssertStoresInaccessible(storageManager, localReplicas);
}
Also used : MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) CountDownLatch(java.util.concurrent.CountDownLatch) ReplicaId(com.github.ambry.clustermap.ReplicaId) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) BlobStoreTest(com.github.ambry.store.BlobStoreTest) Test(org.junit.Test)

Example 2 with StateTransitionException

use of com.github.ambry.clustermap.StateTransitionException in project ambry by linkedin.

the class StorageManagerTest method multiParticipantsMarkStoreInErrorStateTest.

/**
 * Test that, if store is not started, all participants on this node are able to mark it in ERROR state during
 * OFFLINE -> BOOTSTRAP transition.
 * @throws Exception
 */
@Test
public void multiParticipantsMarkStoreInErrorStateTest() throws Exception {
    MockDataNodeId dataNode = clusterMap.getDataNodes().get(0);
    List<ReplicaId> replicas = clusterMap.getReplicaIds(dataNode);
    List<ClusterParticipant> participants = Arrays.asList(new MockClusterParticipant(), new MockClusterParticipant());
    StorageManager storageManager = createStorageManager(dataNode, metricRegistry, participants);
    storageManager.start();
    // stop one of the stores to induce transition failure
    PartitionId id = replicas.get(0).getPartitionId();
    storageManager.shutdownBlobStore(id);
    // verify that both participants throw exception during OFFLINE -> BOOTSTRAP transition
    for (ClusterParticipant participant : participants) {
        try {
            ((MockClusterParticipant) participant).onPartitionBecomeBootstrapFromOffline(id.toPathString());
            fail("should fail because store is not started");
        } catch (StateTransitionException e) {
            assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
        }
    }
    shutdownAndAssertStoresInaccessible(storageManager, replicas);
}
Also used : MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) ReplicaId(com.github.ambry.clustermap.ReplicaId) ClusterParticipant(com.github.ambry.clustermap.ClusterParticipant) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) BlobStoreTest(com.github.ambry.store.BlobStoreTest) Test(org.junit.Test)

Example 3 with StateTransitionException

use of com.github.ambry.clustermap.StateTransitionException in project ambry by linkedin.

the class StorageManagerTest method replicaFromStandbyToInactiveTest.

/**
 * test both success and failure cases during STANDBY -> INACTIVE transition
 */
@Test
public void replicaFromStandbyToInactiveTest() throws Exception {
    generateConfigs(true, false);
    MockDataNodeId localNode = clusterMap.getDataNodes().get(0);
    List<ReplicaId> localReplicas = clusterMap.getReplicaIds(localNode);
    MockClusterParticipant mockHelixParticipant = new MockClusterParticipant();
    StorageManager storageManager = createStorageManager(localNode, metricRegistry, Collections.singletonList(mockHelixParticipant));
    storageManager.start();
    // 1. get listeners from Helix participant and verify there is a storageManager listener.
    Map<StateModelListenerType, PartitionStateChangeListener> listeners = mockHelixParticipant.getPartitionStateChangeListeners();
    assertTrue("Should contain storage manager listener", listeners.containsKey(StateModelListenerType.StorageManagerListener));
    // 2. not found replica should encounter exception
    try {
        mockHelixParticipant.onPartitionBecomeInactiveFromStandby("-1");
        fail("should fail because replica is not found");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", ReplicaNotFound, e.getErrorCode());
    }
    // 3. not found store should throw exception (induced by removing the store)
    ReplicaId replicaToRemove = localReplicas.get(localReplicas.size() - 1);
    storageManager.controlCompactionForBlobStore(replicaToRemove.getPartitionId(), false);
    storageManager.shutdownBlobStore(replicaToRemove.getPartitionId());
    storageManager.getDiskManager(replicaToRemove.getPartitionId()).removeBlobStore(replicaToRemove.getPartitionId());
    try {
        mockHelixParticipant.onPartitionBecomeInactiveFromStandby(replicaToRemove.getPartitionId().toPathString());
        fail("should fail because store is not found");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", ReplicaNotFound, e.getErrorCode());
    }
    // 4. store not started exception
    ReplicaId localReplica = localReplicas.get(0);
    storageManager.shutdownBlobStore(localReplica.getPartitionId());
    try {
        mockHelixParticipant.onPartitionBecomeInactiveFromStandby(localReplica.getPartitionId().toPathString());
        fail("should fail because store is not started");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
    }
    storageManager.startBlobStore(localReplica.getPartitionId());
    // 5. store is disabled due to disk I/O error
    BlobStore localStore = (BlobStore) storageManager.getStore(localReplica.getPartitionId());
    localStore.setDisableState(true);
    try {
        mockHelixParticipant.onPartitionBecomeInactiveFromStandby(localReplica.getPartitionId().toPathString());
        fail("should fail because store is disabled");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", ReplicaOperationFailure, e.getErrorCode());
    }
    localStore.setDisableState(false);
    // 6. success case (verify both replica's state and decommission file)
    mockHelixParticipant.onPartitionBecomeInactiveFromStandby(localReplica.getPartitionId().toPathString());
    assertEquals("local store state should be set to INACTIVE", ReplicaState.INACTIVE, storageManager.getStore(localReplica.getPartitionId()).getCurrentState());
    File decommissionFile = new File(localReplica.getReplicaPath(), BlobStore.DECOMMISSION_FILE_NAME);
    assertTrue("Decommission file is not found in local replica's dir", decommissionFile.exists());
    shutdownAndAssertStoresInaccessible(storageManager, localReplicas);
    // 7. mock disable compaction failure
    mockHelixParticipant = new MockClusterParticipant();
    MockStorageManager mockStorageManager = new MockStorageManager(localNode, Collections.singletonList(mockHelixParticipant));
    mockStorageManager.start();
    try {
        mockHelixParticipant.onPartitionBecomeInactiveFromStandby(localReplica.getPartitionId().toPathString());
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", ReplicaNotFound, e.getErrorCode());
    } finally {
        shutdownAndAssertStoresInaccessible(mockStorageManager, localReplicas);
    }
}
Also used : MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) PartitionStateChangeListener(com.github.ambry.clustermap.PartitionStateChangeListener) File(java.io.File) ReplicaId(com.github.ambry.clustermap.ReplicaId) StateModelListenerType(com.github.ambry.clustermap.StateModelListenerType) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) BlobStoreTest(com.github.ambry.store.BlobStoreTest) Test(org.junit.Test)

Example 4 with StateTransitionException

use of com.github.ambry.clustermap.StateTransitionException in project ambry by linkedin.

the class StorageManagerTest method residualDirDeletionTest.

/**
 * Test that residual directory associated with removed replica is deleted correctly during OFFLINE -> DROPPED transition.
 * @throws Exception
 */
@Test
public void residualDirDeletionTest() throws Exception {
    MockDataNodeId localNode = clusterMap.getDataNodes().get(0);
    List<ReplicaId> replicas = clusterMap.getReplicaIds(localNode);
    MockClusterParticipant mockHelixParticipant = Mockito.spy(new MockClusterParticipant());
    doNothing().when(mockHelixParticipant).setPartitionDisabledState(anyString(), anyBoolean());
    // create an extra store dir at one of the mount paths
    String mountPath = replicas.get(0).getMountPath();
    String extraPartitionName = "1000";
    File extraStoreDir = new File(mountPath, extraPartitionName);
    assertTrue("Can't create an extra store dir", extraStoreDir.mkdir());
    StorageManager storageManager = createStorageManager(localNode, metricRegistry, Collections.singletonList(mockHelixParticipant));
    storageManager.start();
    // failure case: IOException when deleting store dir
    File invalidDir = new File(extraStoreDir.getAbsolutePath(), "invalidDir");
    invalidDir.deleteOnExit();
    assertTrue("Couldn't create dir within store dir", invalidDir.mkdir());
    assertTrue("Could not make unreadable", invalidDir.setReadable(false));
    try {
        mockHelixParticipant.onPartitionBecomeDroppedFromOffline(extraPartitionName);
        fail("should fail because there is IOException when deleting store dir");
    } catch (StateTransitionException e) {
        assertEquals("Error code is not expected", ReplicaOperationFailure, e.getErrorCode());
    }
    assertTrue("Could not make readable", invalidDir.setReadable(true));
    // trigger OFFLINE -> DROPPED transition on extra partition. Storage manager should delete residual store dir.
    mockHelixParticipant.onPartitionBecomeDroppedFromOffline(extraPartitionName);
    verify(mockHelixParticipant).setPartitionDisabledState(extraPartitionName, false);
    assertFalse("Extra store dir should not exist", extraStoreDir.exists());
    shutdownAndAssertStoresInaccessible(storageManager, replicas);
}
Also used : MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) File(java.io.File) ReplicaId(com.github.ambry.clustermap.ReplicaId) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) BlobStoreTest(com.github.ambry.store.BlobStoreTest) Test(org.junit.Test)

Example 5 with StateTransitionException

use of com.github.ambry.clustermap.StateTransitionException in project ambry by linkedin.

the class StorageManagerTest method updateInstanceConfigFailureTest.

/**
 * Test failure cases when updating InstanceConfig in Helix for both Offline-To-Bootstrap and Inactive-To-Offline.
 */
@Test
public void updateInstanceConfigFailureTest() throws Exception {
    generateConfigs(true, true);
    MockDataNodeId localNode = clusterMap.getDataNodes().get(0);
    List<ReplicaId> localReplicas = clusterMap.getReplicaIds(localNode);
    MockClusterParticipant mockHelixParticipant = new MockClusterParticipant();
    StorageManager storageManager = createStorageManager(localNode, metricRegistry, Collections.singletonList(mockHelixParticipant));
    storageManager.start();
    // create a new partition and get its replica on local node
    PartitionId newPartition = clusterMap.createNewPartition(Collections.singletonList(localNode));
    // override return value of updateDataNodeInfoInCluster() to mock update InstanceConfig failure
    mockHelixParticipant.updateNodeInfoReturnVal = false;
    try {
        mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
        fail("should fail because updating InstanceConfig didn't succeed during Offline-To-Bootstrap");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", StateTransitionException.TransitionErrorCode.HelixUpdateFailure, e.getErrorCode());
    }
    try {
        mockHelixParticipant.onPartitionBecomeOfflineFromInactive(localReplicas.get(0).getPartitionId().toPathString());
        fail("should fail because updating InstanceConfig didn't succeed during Inactive-To-Offline");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", StateTransitionException.TransitionErrorCode.HelixUpdateFailure, e.getErrorCode());
    }
    mockHelixParticipant.updateNodeInfoReturnVal = null;
    // mock InstanceConfig not found error (note that MockHelixAdmin is empty by default, so no InstanceConfig is present)
    newPartition = clusterMap.createNewPartition(Collections.singletonList(localNode));
    try {
        mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
        fail("should fail because InstanceConfig is not found during Offline-To-Bootstrap");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", StateTransitionException.TransitionErrorCode.HelixUpdateFailure, e.getErrorCode());
    }
    try {
        mockHelixParticipant.onPartitionBecomeOfflineFromInactive(localReplicas.get(1).getPartitionId().toPathString());
        fail("should fail because InstanceConfig is not found during Inactive-To-Offline");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", StateTransitionException.TransitionErrorCode.HelixUpdateFailure, e.getErrorCode());
    }
    shutdownAndAssertStoresInaccessible(storageManager, localReplicas);
}
Also used : MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) ReplicaId(com.github.ambry.clustermap.ReplicaId) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) BlobStoreTest(com.github.ambry.store.BlobStoreTest) Test(org.junit.Test)

Aggregations

StateTransitionException (com.github.ambry.clustermap.StateTransitionException)13 Test (org.junit.Test)13 ReplicaId (com.github.ambry.clustermap.ReplicaId)12 MockDataNodeId (com.github.ambry.clustermap.MockDataNodeId)9 MockPartitionId (com.github.ambry.clustermap.MockPartitionId)8 PartitionId (com.github.ambry.clustermap.PartitionId)8 MetricRegistry (com.codahale.metrics.MetricRegistry)7 MockClusterMap (com.github.ambry.clustermap.MockClusterMap)6 MockHelixParticipant (com.github.ambry.clustermap.MockHelixParticipant)6 ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)6 BlobStoreTest (com.github.ambry.store.BlobStoreTest)6 StorageManager (com.github.ambry.store.StorageManager)6 MockReplicaId (com.github.ambry.clustermap.MockReplicaId)5 File (java.io.File)5 CountDownLatch (java.util.concurrent.CountDownLatch)5 PartitionStateChangeListener (com.github.ambry.clustermap.PartitionStateChangeListener)3 DataNodeId (com.github.ambry.clustermap.DataNodeId)2 StateModelListenerType (com.github.ambry.clustermap.StateModelListenerType)2 MessageInfo (com.github.ambry.store.MessageInfo)2 MockId (com.github.ambry.store.MockId)2