use of com.github.ambry.store.StorageManager in project ambry by linkedin.
the class AmbryServerRequests method handleRemoveStoreRequest.
/**
* Handles admin request that removes a BlobStore from current node
* @param partitionId the {@link PartitionId} associated with BlobStore
* @return {@link ServerErrorCode} represents result of handling admin request.
*/
private ServerErrorCode handleRemoveStoreRequest(PartitionId partitionId) throws StoreException, IOException {
ServerErrorCode errorCode = ServerErrorCode.No_Error;
ReplicaId replicaId = storeManager.getReplica(partitionId.toPathString());
if (replicaId == null) {
logger.error("{} doesn't exist on current node", partitionId);
return ServerErrorCode.Partition_Unknown;
}
// Attempt to remove replica from stats manager. If replica doesn't exist, log info but don't fail the request
statsManager.removeReplica(replicaId);
// Attempt to remove replica from replication manager. If replica doesn't exist, log info but don't fail the request
((ReplicationManager) replicationEngine).removeReplica(replicaId);
Store store = ((StorageManager) storeManager).getStore(partitionId, true);
// Attempt to remove store from storage manager.
if (storeManager.removeBlobStore(partitionId) && store != null) {
((BlobStore) store).deleteStoreFiles();
for (ReplicaStatusDelegate replicaStatusDelegate : ((BlobStore) store).getReplicaStatusDelegates()) {
// Remove store from sealed and stopped list (if present)
logger.info("Removing store from sealed and stopped list(if present)");
replicaStatusDelegate.unseal(replicaId);
replicaStatusDelegate.unmarkStopped(Collections.singletonList(replicaId));
}
} else {
errorCode = ServerErrorCode.Unknown_Error;
}
return errorCode;
}
use of com.github.ambry.store.StorageManager in project ambry by linkedin.
the class CloudToStoreReplicationManagerTest method cloudReplicaRemovalTest.
/**
* Test both success and failure cases when removing cloud replica.
* @throws Exception
*/
@Test
public void cloudReplicaRemovalTest() throws Exception {
StorageManager storageManager = new StorageManager(storeConfig, new DiskManagerConfig(verifiableProperties), Utils.newScheduler(1, true), clusterMap.getMetricRegistry(), null, clusterMap, currentNode, null, Collections.singletonList(mockHelixParticipant), new MockTime(), null, new InMemAccountService(false, false));
CloudToStoreReplicationManager cloudToStoreReplicationManager = new CloudToStoreReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, storeKeyFactory, clusterMap, mockScheduler, currentNode, null, clusterMap.getMetricRegistry(), null, storeKeyConverterFactory, serverConfig.serverMessageTransformer, mockClusterSpectator, mockHelixParticipant);
storageManager.start();
cloudToStoreReplicationManager.start();
mockClusterSpectator.spectate();
PartitionId localPartition = storageManager.getLocalPartitions().iterator().next();
// 1. add cloud replica first for subsequent removal test
mockHelixParticipant.onPartitionBecomeLeaderFromStandby(localPartition.toPathString());
String replicaPath = Cloud_Replica_Keyword + File.separator + localPartition.toPathString() + File.separator + localPartition.toPathString();
RemoteReplicaInfo remoteReplicaInfo = cloudToStoreReplicationManager.getRemoteReplicaInfo(localPartition, vcrNode.getHostname(), replicaPath);
assertNotNull("Remote replica info should not be null", remoteReplicaInfo);
assertEquals("There should be only one cloud replica thread created", 1, TestUtils.getAllThreadsByThisName(REPLICA_THREAD_PREFIX).size());
// 2. before removing cloud replica of local partition let's remove a non-existent partition first
mockHelixParticipant.onPartitionBecomeStandbyFromLeader(NEW_PARTITION_NAME);
// ensure there is no change in replica thread
assertEquals("There should be only one cloud replica thread created", 1, TestUtils.getAllThreadsByThisName(REPLICA_THREAD_PREFIX).size());
// 3. remove the cloud replica by calling Leader-To-Standby transition on local partition
mockHelixParticipant.onPartitionBecomeStandbyFromLeader(localPartition.toPathString());
// ensure that the remote replica info has been successfully removed from replica thread
assertNull("Cloud replica should be removed and no thread is assigned to it", remoteReplicaInfo.getReplicaThread());
cloudToStoreReplicationManager.shutdown();
storageManager.shutdown();
}
use of com.github.ambry.store.StorageManager in project ambry by linkedin.
the class ReplicationTest method replicaFromOfflineToBootstrapTest.
/**
* Test that state transition in replication manager from OFFLINE to BOOTSTRAP
* @throws Exception
*/
@Test
public void replicaFromOfflineToBootstrapTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
MockHelixParticipant.metricRegistry = new MetricRegistry();
MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
DataNodeId currentNode = clusterMap.getDataNodeIds().get(0);
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
assertTrue("State change listener in cluster participant should contain replication manager listener", mockHelixParticipant.getPartitionStateChangeListeners().containsKey(StateModelListenerType.ReplicationManagerListener));
// 1. test partition not found case (should throw exception)
try {
mockHelixParticipant.onPartitionBecomeBootstrapFromOffline("-1");
fail("should fail because replica is not found");
} catch (StateTransitionException e) {
assertEquals("Transition error doesn't match", ReplicaNotFound, e.getErrorCode());
}
// 2. create a new partition and test replica addition success case
ReplicaId newReplicaToAdd = getNewReplicaToAdd(clusterMap);
PartitionId newPartition = newReplicaToAdd.getPartitionId();
assertTrue("Adding new replica to Storage Manager should succeed", storageManager.addBlobStore(newReplicaToAdd));
assertFalse("partitionToPartitionInfo should not contain new partition", replicationManager.partitionToPartitionInfo.containsKey(newPartition));
mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
assertTrue("partitionToPartitionInfo should contain new partition", replicationManager.partitionToPartitionInfo.containsKey(newPartition));
// 3. test replica addition failure case
replicationManager.partitionToPartitionInfo.remove(newPartition);
replicationManager.addReplicaReturnVal = false;
try {
mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
fail("should fail due to replica addition failure");
} catch (StateTransitionException e) {
assertEquals("Transition error doesn't match", ReplicaOperationFailure, e.getErrorCode());
}
replicationManager.addReplicaReturnVal = null;
// 4. test OFFLINE -> BOOTSTRAP on existing replica (should be no-op)
ReplicaId existingReplica = clusterMap.getReplicaIds(currentNode).get(0);
assertTrue("partitionToPartitionInfo should contain existing partition", replicationManager.partitionToPartitionInfo.containsKey(existingReplica.getPartitionId()));
mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(existingReplica.getPartitionId().toPathString());
storageManager.shutdown();
}
use of com.github.ambry.store.StorageManager in project ambry by linkedin.
the class ReplicationTest method dcLevelReplicationLagMetricsTest.
/**
* Test that metrics that track remote replicas lag behind local replicas in each dc.
* @throws Exception
*/
@Test
public void dcLevelReplicationLagMetricsTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, null);
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
Set<String> remoteDcNames = new HashSet<>(Arrays.asList("DC1", "DC2", "DC3"));
String localDcName = clusterMap.getDataNodeIds().get(0).getDatacenterName();
remoteDcNames.remove(localDcName);
// before updating replication lag, make sure avg lag in each dc is 0
MetricRegistry metricRegistry = replicationManager.getMetricRegistry();
String prefix = ReplicaThread.class.getName() + ".";
String avgMetricSuffix = "-avgReplicaLagFromLocalInBytes";
assertEquals("Average replication lag in local dc is not expected", 18.0, metricRegistry.getGauges().get(prefix + localDcName + avgMetricSuffix).getValue());
for (String remoteDc : remoteDcNames) {
assertEquals("Average replication lag in remote dc is not expected", 18.0, metricRegistry.getGauges().get(prefix + remoteDc + avgMetricSuffix).getValue());
}
// iterate over all partitions on current node and make sure all their peer replicas in local dc have fully caught up
for (Map.Entry<PartitionId, PartitionInfo> entry : replicationManager.partitionToPartitionInfo.entrySet()) {
PartitionId localPartition = entry.getKey();
PartitionInfo partitionInfo = entry.getValue();
List<RemoteReplicaInfo> remoteReplicaInfos = partitionInfo.getRemoteReplicaInfos().stream().filter(info -> info.getReplicaId().getDataNodeId().getDatacenterName().equals(localDcName)).collect(Collectors.toList());
for (RemoteReplicaInfo remoteReplicaInfoInLocalDc : remoteReplicaInfos) {
ReplicaId peerReplicaInLocalDc = remoteReplicaInfoInLocalDc.getReplicaId();
replicationManager.updateTotalBytesReadByRemoteReplica(localPartition, peerReplicaInLocalDc.getDataNodeId().getHostname(), peerReplicaInLocalDc.getReplicaPath(), 18);
}
}
// verify that after updating replication lag for all peer replicas in local dc, the avg lag in local dc has updated
assertEquals("Average replication lag in local dc is not expected", 0.0, metricRegistry.getGauges().get(prefix + localDcName + avgMetricSuffix).getValue());
// for remote dc, the avg lag is still 18.0
for (String remoteDc : remoteDcNames) {
assertEquals("Average replication lag in remote dc is not expected", 18.0, metricRegistry.getGauges().get(prefix + remoteDc + avgMetricSuffix).getValue());
}
}
use of com.github.ambry.store.StorageManager in project ambry by linkedin.
the class ReplicationTest method replicaFromLeaderToStandbyTest.
/**
* Test state transition in replication manager from LEADER to STANDBY
* Test setup: When creating partitions, make sure that there is exactly one replica in LEADER STATE on each data center
* Test condition: When a partition on the current node moves from leader to standby, verify that in-memory map storing
* partition to peer leader replicas is updated correctly
* @throws Exception
*/
@Test
public void replicaFromLeaderToStandbyTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
MockHelixParticipant.metricRegistry = new MetricRegistry();
MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
ReplicationConfig initialReplicationConfig = replicationConfig;
properties.setProperty("replication.model.across.datacenters", "LEADER_BASED");
replicationConfig = new ReplicationConfig(new VerifiableProperties(properties));
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
PartitionId existingPartition = replicationManager.partitionToPartitionInfo.keySet().iterator().next();
mockHelixParticipant.onPartitionBecomeLeaderFromStandby(existingPartition.toPathString());
Map<String, Set<ReplicaId>> peerLeaderReplicasByPartition = replicationManager.leaderBasedReplicationAdmin.getLeaderPartitionToPeerLeaderReplicas();
assertTrue("Partition is not present in the map of partition to peer leader replicas after it moved from standby to leader", peerLeaderReplicasByPartition.containsKey(existingPartition.toPathString()));
mockHelixParticipant.onPartitionBecomeStandbyFromLeader(existingPartition.toPathString());
assertFalse("Partition is still present in the map of partition to peer leader replicas after it moved from leader to standby", peerLeaderReplicasByPartition.containsKey(existingPartition.toPathString()));
storageManager.shutdown();
replicationConfig = initialReplicationConfig;
}
Aggregations