use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class StorageManagerTest method storeStartFailureTest.
/**
* Tests that {@link StorageManager} can start even when certain stores cannot be started. Checks that these stores
* are not accessible. We can make the replica path non-readable to induce a store starting failure.
* @throws Exception
*/
@Test
public void storeStartFailureTest() throws Exception {
MockDataNodeId dataNode = clusterMap.getDataNodes().get(0);
List<ReplicaId> replicas = clusterMap.getReplicaIds(dataNode);
Set<Integer> badReplicaIndexes = new HashSet<>(Arrays.asList(2, 7));
for (Integer badReplicaIndex : badReplicaIndexes) {
new File(replicas.get(badReplicaIndex).getReplicaPath()).setReadable(false);
}
StorageManager storageManager = createStorageManager(dataNode, metricRegistry, null);
storageManager.start();
assertEquals("There should be no unexpected partitions reported", 0, getNumUnrecognizedPartitionsReported());
Map<String, Counter> counters = metricRegistry.getCounters();
assertEquals(0, getCounterValue(counters, DiskSpaceAllocator.class.getName(), "DiskSpaceAllocatorInitFailureCount"));
assertEquals(badReplicaIndexes.size(), getCounterValue(counters, DiskManager.class.getName(), "TotalStoreStartFailures"));
assertEquals(0, getCounterValue(counters, DiskManager.class.getName(), "DiskMountPathFailures"));
for (int i = 0; i < replicas.size(); i++) {
ReplicaId replica = replicas.get(i);
PartitionId id = replica.getPartitionId();
if (badReplicaIndexes.contains(i)) {
assertNull("This store should not be accessible.", storageManager.getStore(id, false));
assertFalse("Compaction should not be scheduled", storageManager.scheduleNextForCompaction(id));
} else {
Store store = storageManager.getStore(id, false);
assertTrue("Store should be started", store.isStarted());
assertTrue("Compaction should be scheduled", storageManager.scheduleNextForCompaction(id));
}
}
assertEquals("Compaction thread count is incorrect", dataNode.getMountPaths().size(), TestUtils.numThreadsByThisName(CompactionManager.THREAD_NAME_PREFIX));
verifyCompactionThreadCount(storageManager, dataNode.getMountPaths().size());
shutdownAndAssertStoresInaccessible(storageManager, replicas);
assertEquals("Compaction thread count is incorrect", 0, storageManager.getCompactionThreadCount());
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class StorageManagerTest method setBlobStoreStoppedStateFailureTest.
/**
* Test set stopped state of blobstore with given list of {@link PartitionId} in failure cases.
*/
@Test
public void setBlobStoreStoppedStateFailureTest() throws Exception {
MockDataNodeId dataNode = clusterMap.getDataNodes().get(0);
List<ReplicaId> replicas = clusterMap.getReplicaIds(dataNode);
List<MockDataNodeId> dataNodes = new ArrayList<>();
dataNodes.add(dataNode);
MockPartitionId invalidPartition = new MockPartitionId(Long.MAX_VALUE, MockClusterMap.DEFAULT_PARTITION_CLASS, dataNodes, 0);
List<? extends ReplicaId> invalidPartitionReplicas = invalidPartition.getReplicaIds();
StorageManager storageManager = createStorageManager(dataNode, metricRegistry, null);
storageManager.start();
assertEquals("There should be 1 unexpected partition reported", 1, getNumUnrecognizedPartitionsReported());
// test set the state of store whose replicaStatusDelegate is null
ReplicaId replica = replicas.get(0);
PartitionId id = replica.getPartitionId();
storageManager.getDiskManager(id).shutdown();
List<PartitionId> failToUpdateList = storageManager.setBlobStoreStoppedState(Arrays.asList(id), true);
assertEquals("Set store stopped state should fail on given store whose replicaStatusDelegate is null", id, failToUpdateList.get(0));
// test invalid partition case (where diskManager == null)
replica = invalidPartitionReplicas.get(0);
id = replica.getPartitionId();
failToUpdateList = storageManager.setBlobStoreStoppedState(Arrays.asList(id), true);
assertEquals("Set store stopped state should fail on given invalid replica", id, failToUpdateList.get(0));
shutdownAndAssertStoresInaccessible(storageManager, replicas);
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class StorageManagerTest method removeBlobStoreTest.
/**
* Test remove blob store with given {@link PartitionId}
* @throws Exception
*/
@Test
public void removeBlobStoreTest() throws Exception {
MockDataNodeId dataNode = clusterMap.getDataNodes().get(0);
List<ReplicaId> replicas = clusterMap.getReplicaIds(dataNode);
List<MockDataNodeId> dataNodes = new ArrayList<>();
dataNodes.add(dataNode);
MockPartitionId invalidPartition = new MockPartitionId(Long.MAX_VALUE, MockClusterMap.DEFAULT_PARTITION_CLASS, dataNodes, 0);
StorageManager storageManager = createStorageManager(dataNode, metricRegistry, null);
storageManager.start();
// Replica[1] will be used to test removing a started store. Replica[2] will be used to test a store with compaction enabled
for (int i = 3; i < replicas.size(); i++) {
ReplicaId replica = replicas.get(i);
PartitionId id = replica.getPartitionId();
assertTrue("Disable compaction should succeed", storageManager.controlCompactionForBlobStore(id, false));
assertTrue("Shutdown should succeed on given store", storageManager.shutdownBlobStore(id));
assertTrue("Removing store should succeed", storageManager.removeBlobStore(id));
assertNull("The store should not exist", storageManager.getStore(id, false));
}
// test remove store that compaction is still enabled on it, even though it is shutdown
PartitionId id = replicas.get(2).getPartitionId();
assertTrue("Shutdown should succeed on given store", storageManager.shutdownBlobStore(id));
assertFalse("Removing store should fail because compaction is enabled on this store", storageManager.removeBlobStore(id));
// test remove store that is still started
id = replicas.get(1).getPartitionId();
assertFalse("Removing store should fail because store is still started", storageManager.removeBlobStore(id));
// test remove store that the disk manager is not running
id = replicas.get(0).getPartitionId();
storageManager.getDiskManager(id).shutdown();
assertFalse("Removing store should fail because disk manager is not running", storageManager.removeBlobStore(id));
// test a store that doesn't exist
assertFalse("Removing not-found store should return false", storageManager.removeBlobStore(invalidPartition));
shutdownAndAssertStoresInaccessible(storageManager, replicas);
// test that remove store when compaction executor is not instantiated
// by default, storeCompactionTriggers = "" which makes compaction executor = null during initialization
VerifiableProperties vProps = new VerifiableProperties(new Properties());
storageManager = new StorageManager(new StoreConfig(vProps), diskManagerConfig, Utils.newScheduler(1, false), metricRegistry, new MockIdFactory(), clusterMap, dataNode, new DummyMessageStoreHardDelete(), null, SystemTime.getInstance(), new DummyMessageStoreRecovery(), new InMemAccountService(false, false));
storageManager.start();
for (ReplicaId replica : replicas) {
id = replica.getPartitionId();
assertTrue("Disable compaction should succeed", storageManager.controlCompactionForBlobStore(id, false));
assertTrue("Shutdown should succeed on given store", storageManager.shutdownBlobStore(id));
assertTrue("Removing store should succeed", storageManager.removeBlobStore(id));
assertNull("The store should not exist", storageManager.getStore(id, false));
}
shutdownAndAssertStoresInaccessible(storageManager, replicas);
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class StorageManagerTest method replicaFromInactiveToOfflineTest.
/**
* Test shutting down blob store failure during Inactive-To-Offline transition.
* @throws Exception
*/
@Test
public void replicaFromInactiveToOfflineTest() throws Exception {
generateConfigs(true, false);
MockDataNodeId localNode = clusterMap.getDataNodes().get(0);
List<ReplicaId> localReplicas = clusterMap.getReplicaIds(localNode);
ReplicaId testReplica = localReplicas.get(0);
MockClusterParticipant mockHelixParticipant = new MockClusterParticipant();
StorageManager storageManager = createStorageManager(localNode, metricRegistry, Collections.singletonList(mockHelixParticipant));
storageManager.start();
// test shutdown store failure (this is induced by shutting down disk manager)
storageManager.getDiskManager(testReplica.getPartitionId()).shutdown();
mockHelixParticipant.getReplicaSyncUpManager().initiateDisconnection(testReplica);
CountDownLatch participantLatch = new CountDownLatch(1);
Utils.newThread(() -> {
try {
mockHelixParticipant.onPartitionBecomeOfflineFromInactive(testReplica.getPartitionId().toPathString());
fail("should fail because of shutting down store failure");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", ReplicaOperationFailure, e.getErrorCode());
participantLatch.countDown();
}
}, false).start();
// make sync-up complete to let code proceed and encounter exception in storage manager.
mockHelixParticipant.getReplicaSyncUpManager().onDisconnectionComplete(testReplica);
assertTrue("Helix participant transition didn't get invoked within 1 sec", participantLatch.await(1, TimeUnit.SECONDS));
shutdownAndAssertStoresInaccessible(storageManager, localReplicas);
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class StorageManagerTest method multiParticipantsMarkStoreInErrorStateTest.
/**
* Test that, if store is not started, all participants on this node are able to mark it in ERROR state during
* OFFLINE -> BOOTSTRAP transition.
* @throws Exception
*/
@Test
public void multiParticipantsMarkStoreInErrorStateTest() throws Exception {
MockDataNodeId dataNode = clusterMap.getDataNodes().get(0);
List<ReplicaId> replicas = clusterMap.getReplicaIds(dataNode);
List<ClusterParticipant> participants = Arrays.asList(new MockClusterParticipant(), new MockClusterParticipant());
StorageManager storageManager = createStorageManager(dataNode, metricRegistry, participants);
storageManager.start();
// stop one of the stores to induce transition failure
PartitionId id = replicas.get(0).getPartitionId();
storageManager.shutdownBlobStore(id);
// verify that both participants throw exception during OFFLINE -> BOOTSTRAP transition
for (ClusterParticipant participant : participants) {
try {
((MockClusterParticipant) participant).onPartitionBecomeBootstrapFromOffline(id.toPathString());
fail("should fail because store is not started");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
}
}
shutdownAndAssertStoresInaccessible(storageManager, replicas);
}
Aggregations