Search in sources :

Example 1 with DiskId

use of com.github.ambry.clustermap.DiskId in project ambry by linkedin.

the class StorageManagerTest method isDiskAvailableTest.

/**
 * Tests that{@link StorageManager} can correctly determine if disk is unavailable based on states of all stores.
 */
@Test
public void isDiskAvailableTest() throws Exception {
    MockDataNodeId dataNode = clusterMap.getDataNodes().get(0);
    List<ReplicaId> replicas = clusterMap.getReplicaIds(dataNode);
    Map<DiskId, List<ReplicaId>> diskToReplicas = new HashMap<>();
    StorageManager storageManager = createStorageManager(dataNode, metricRegistry, null);
    storageManager.start();
    assertEquals("There should be no unexpected partitions reported", 0, getNumUnrecognizedPartitionsReported());
    for (ReplicaId replica : replicas) {
        diskToReplicas.computeIfAbsent(replica.getDiskId(), disk -> new ArrayList<>()).add(replica);
    }
    // for each disk, shutdown all the stores except for the last one
    for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
        for (int i = 0; i < replicasOnDisk.size() - 1; ++i) {
            storageManager.getStore(replicasOnDisk.get(i).getPartitionId(), false).shutdown();
        }
    }
    // verify all disks are still available because at least one store on them is up
    for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
        assertTrue("Disk should be available", storageManager.isDiskAvailable(replicasOnDisk.get(0).getDiskId()));
        assertEquals("Disk state be available", HardwareState.AVAILABLE, replicasOnDisk.get(0).getDiskId().getState());
    }
    // now, shutdown the last store on each disk
    for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
        storageManager.getStore(replicasOnDisk.get(replicasOnDisk.size() - 1).getPartitionId(), false).shutdown();
    }
    // verify all disks are unavailable because all stores are down
    for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
        assertFalse("Disk should be unavailable", storageManager.isDiskAvailable(replicasOnDisk.get(0).getDiskId()));
    }
    // then, start the one store on each disk to test if disk is up again
    for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
        storageManager.startBlobStore(replicasOnDisk.get(0).getPartitionId());
    }
    // verify all disks are available again because one store is started
    for (List<ReplicaId> replicasOnDisk : diskToReplicas.values()) {
        assertTrue("Disk should be available", storageManager.isDiskAvailable(replicasOnDisk.get(0).getDiskId()));
        assertEquals("Disk state be available", HardwareState.AVAILABLE, replicasOnDisk.get(0).getDiskId().getState());
    }
    shutdownAndAssertStoresInaccessible(storageManager, replicas);
}
Also used : DiskId(com.github.ambry.clustermap.DiskId) Arrays(java.util.Arrays) ClusterMapUtils(com.github.ambry.clustermap.ClusterMapUtils) DataNodeId(com.github.ambry.clustermap.DataNodeId) Random(java.util.Random) ByteBuffer(java.nio.ByteBuffer) MockHelixManagerFactory(com.github.ambry.clustermap.MockHelixManagerFactory) JSONObject(org.json.JSONObject) TestUtils(com.github.ambry.utils.TestUtils) Map(java.util.Map) After(org.junit.After) Counter(com.codahale.metrics.Counter) ClusterParticipant(com.github.ambry.clustermap.ClusterParticipant) DiskManagerConfig(com.github.ambry.config.DiskManagerConfig) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) Collectors(java.util.stream.Collectors) AccountStatsStore(com.github.ambry.accountstats.AccountStatsStore) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) PartitionStateChangeListener(com.github.ambry.clustermap.PartitionStateChangeListener) StatsSnapshot(com.github.ambry.server.StatsSnapshot) Callback(com.github.ambry.commons.Callback) TransitionErrorCode(com.github.ambry.clustermap.StateTransitionException.TransitionErrorCode) InMemAccountService(com.github.ambry.account.InMemAccountService) PartitionId(com.github.ambry.clustermap.PartitionId) HashMap(java.util.HashMap) HardwareState(com.github.ambry.clustermap.HardwareState) AmbryStatsReport(com.github.ambry.server.AmbryStatsReport) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) TestUtils(com.github.ambry.clustermap.TestUtils) SystemTime(com.github.ambry.utils.SystemTime) Before(org.junit.Before) BlobStoreTest(com.github.ambry.store.BlobStoreTest) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) ReplicaState(com.github.ambry.clustermap.ReplicaState) StateModelListenerType(com.github.ambry.clustermap.StateModelListenerType) StoreConfig(com.github.ambry.config.StoreConfig) MetricRegistry(com.codahale.metrics.MetricRegistry) Properties(java.util.Properties) Pair(com.github.ambry.utils.Pair) HelixParticipant(com.github.ambry.clustermap.HelixParticipant) VerifiableProperties(com.github.ambry.config.VerifiableProperties) IOException(java.io.IOException) Test(org.junit.Test) InstanceConfig(org.apache.helix.model.InstanceConfig) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) HelixAdmin(org.apache.helix.HelixAdmin) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) ReplicaId(com.github.ambry.clustermap.ReplicaId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) Assert(org.junit.Assert) Collections(java.util.Collections) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) HashMap(java.util.HashMap) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) ReplicaId(com.github.ambry.clustermap.ReplicaId) DiskId(com.github.ambry.clustermap.DiskId) BlobStoreTest(com.github.ambry.store.BlobStoreTest) Test(org.junit.Test)

Example 2 with DiskId

use of com.github.ambry.clustermap.DiskId in project ambry by linkedin.

the class StorageManagerTest method setBlobStoreStoppedStateSuccessTest.

/**
 * Test successfully set stopped state of blobstore with given list of {@link PartitionId}.
 */
@Test
public void setBlobStoreStoppedStateSuccessTest() throws Exception {
    MockDataNodeId dataNode = clusterMap.getDataNodes().get(0);
    List<ReplicaId> replicas = clusterMap.getReplicaIds(dataNode);
    List<PartitionId> partitionIds = new ArrayList<>();
    Map<DiskId, List<ReplicaId>> diskToReplicas = new HashMap<>();
    // test setting the state of store via instantiated MockClusterParticipant
    ClusterParticipant participant = new MockClusterParticipant();
    ClusterParticipant participantSpy = Mockito.spy(participant);
    StorageManager storageManager = createStorageManager(dataNode, metricRegistry, Collections.singletonList(participantSpy));
    storageManager.start();
    assertEquals("There should be no unexpected partitions reported", 0, getNumUnrecognizedPartitionsReported());
    for (ReplicaId replica : replicas) {
        partitionIds.add(replica.getPartitionId());
        diskToReplicas.computeIfAbsent(replica.getDiskId(), disk -> new ArrayList<>()).add(replica);
    }
    List<PartitionId> failToUpdateList;
    // add a list of stores to STOPPED list. Note that the stores are residing on 3 disks.
    failToUpdateList = storageManager.setBlobStoreStoppedState(partitionIds, true);
    // make sure the update operation succeeds
    assertTrue("Add stores to stopped list should succeed, failToUpdateList should be empty", failToUpdateList.isEmpty());
    // make sure the stopped list contains all the added stores
    Set<String> stoppedReplicasCopy = new HashSet<>(participantSpy.getStoppedReplicas());
    for (ReplicaId replica : replicas) {
        assertTrue("The stopped list should contain the replica: " + replica.getPartitionId().toPathString(), stoppedReplicasCopy.contains(replica.getPartitionId().toPathString()));
    }
    // make sure replicaStatusDelegate is invoked 3 times and each time the input replica list conforms with stores on particular disk
    for (List<ReplicaId> replicasPerDisk : diskToReplicas.values()) {
        verify(participantSpy, times(1)).setReplicaStoppedState(replicasPerDisk, true);
    }
    // remove a list of stores from STOPPED list. Note that the stores are residing on 3 disks.
    storageManager.setBlobStoreStoppedState(partitionIds, false);
    // make sure the update operation succeeds
    assertTrue("Remove stores from stopped list should succeed, failToUpdateList should be empty", failToUpdateList.isEmpty());
    // make sure the stopped list is empty because all the stores are successfully removed.
    assertTrue("The stopped list should be empty after removing all stores", participantSpy.getStoppedReplicas().isEmpty());
    // make sure replicaStatusDelegate is invoked 3 times and each time the input replica list conforms with stores on particular disk
    for (List<ReplicaId> replicasPerDisk : diskToReplicas.values()) {
        verify(participantSpy, times(1)).setReplicaStoppedState(replicasPerDisk, false);
    }
    shutdownAndAssertStoresInaccessible(storageManager, replicas);
}
Also used : DiskId(com.github.ambry.clustermap.DiskId) Arrays(java.util.Arrays) ClusterMapUtils(com.github.ambry.clustermap.ClusterMapUtils) DataNodeId(com.github.ambry.clustermap.DataNodeId) Random(java.util.Random) ByteBuffer(java.nio.ByteBuffer) MockHelixManagerFactory(com.github.ambry.clustermap.MockHelixManagerFactory) JSONObject(org.json.JSONObject) TestUtils(com.github.ambry.utils.TestUtils) Map(java.util.Map) After(org.junit.After) Counter(com.codahale.metrics.Counter) ClusterParticipant(com.github.ambry.clustermap.ClusterParticipant) DiskManagerConfig(com.github.ambry.config.DiskManagerConfig) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) Collectors(java.util.stream.Collectors) AccountStatsStore(com.github.ambry.accountstats.AccountStatsStore) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) PartitionStateChangeListener(com.github.ambry.clustermap.PartitionStateChangeListener) StatsSnapshot(com.github.ambry.server.StatsSnapshot) Callback(com.github.ambry.commons.Callback) TransitionErrorCode(com.github.ambry.clustermap.StateTransitionException.TransitionErrorCode) InMemAccountService(com.github.ambry.account.InMemAccountService) PartitionId(com.github.ambry.clustermap.PartitionId) HashMap(java.util.HashMap) HardwareState(com.github.ambry.clustermap.HardwareState) AmbryStatsReport(com.github.ambry.server.AmbryStatsReport) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) TestUtils(com.github.ambry.clustermap.TestUtils) SystemTime(com.github.ambry.utils.SystemTime) Before(org.junit.Before) BlobStoreTest(com.github.ambry.store.BlobStoreTest) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) ReplicaState(com.github.ambry.clustermap.ReplicaState) StateModelListenerType(com.github.ambry.clustermap.StateModelListenerType) StoreConfig(com.github.ambry.config.StoreConfig) MetricRegistry(com.codahale.metrics.MetricRegistry) Properties(java.util.Properties) Pair(com.github.ambry.utils.Pair) HelixParticipant(com.github.ambry.clustermap.HelixParticipant) VerifiableProperties(com.github.ambry.config.VerifiableProperties) IOException(java.io.IOException) Test(org.junit.Test) InstanceConfig(org.apache.helix.model.InstanceConfig) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) HelixAdmin(org.apache.helix.HelixAdmin) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) ReplicaId(com.github.ambry.clustermap.ReplicaId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) Assert(org.junit.Assert) Collections(java.util.Collections) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) ReplicaId(com.github.ambry.clustermap.ReplicaId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) List(java.util.List) ArrayList(java.util.ArrayList) ClusterParticipant(com.github.ambry.clustermap.ClusterParticipant) DiskId(com.github.ambry.clustermap.DiskId) HashSet(java.util.HashSet) BlobStoreTest(com.github.ambry.store.BlobStoreTest) Test(org.junit.Test)

Example 3 with DiskId

use of com.github.ambry.clustermap.DiskId in project ambry by linkedin.

the class CryptoJobMetricsTracker method initializeResourceToHistogramMap.

/**
 * Initialize resource-to-latency-histogram maps based on given resource type. Here resource can be {@link PartitionId},
 * {@link DataNodeId}, etc. The resource type is defined by {@link RouterConfig#routerOperationTrackerMetricScope}.
 * @param clusterMap the {@link ClusterMap} that contains info of all resources.
 * @param routerConfig the {@link RouterConfig} that specifies histogram parameters.
 */
private void initializeResourceToHistogramMap(ClusterMap clusterMap, RouterConfig routerConfig) {
    String localDatacenterName = clusterMap.getDatacenterName(clusterMap.getLocalDatacenterId());
    switch(routerConfig.routerOperationTrackerMetricScope) {
        case Partition:
            for (PartitionId partitionId : clusterMap.getAllPartitionIds(null)) {
                getBlobLocalDcResourceToLatency.put(partitionId, createHistogram(routerConfig, false));
                getBlobInfoLocalDcResourceToLatency.put(partitionId, createHistogram(routerConfig, false));
                getBlobCrossDcResourceToLatency.put(partitionId, createHistogram(routerConfig, false));
                getBlobInfoCrossDcResourceToLatency.put(partitionId, createHistogram(routerConfig, false));
                putBlobResourceToLatency.put(partitionId, createHistogram(routerConfig, false));
            }
            break;
        case DataNode:
            List<? extends DataNodeId> dataNodeIds = clusterMap.getDataNodeIds();
            for (DataNodeId dataNodeId : dataNodeIds) {
                if (dataNodeId.getDatacenterName().equals(localDatacenterName)) {
                    getBlobLocalDcResourceToLatency.put(dataNodeId, createHistogram(routerConfig, false));
                    getBlobInfoLocalDcResourceToLatency.put(dataNodeId, createHistogram(routerConfig, false));
                    // Put blob only cares abou local db data nodes.
                    putBlobResourceToLatency.put(dataNodeId, createHistogram(routerConfig, false));
                } else {
                    getBlobCrossDcResourceToLatency.put(dataNodeId, createHistogram(routerConfig, false));
                    getBlobInfoCrossDcResourceToLatency.put(dataNodeId, createHistogram(routerConfig, false));
                }
            }
            break;
        case Disk:
            for (PartitionId partitionId : clusterMap.getAllPartitionIds(null)) {
                for (ReplicaId replicaId : partitionId.getReplicaIds()) {
                    DiskId diskId = replicaId.getDiskId();
                    if (getBlobLocalDcResourceToLatency.containsKey(diskId) || getBlobCrossDcResourceToLatency.containsKey(diskId)) {
                        continue;
                    }
                    if (replicaId.getDataNodeId().getDatacenterName().equals(localDatacenterName)) {
                        getBlobLocalDcResourceToLatency.put(diskId, createHistogram(routerConfig, false));
                        getBlobInfoLocalDcResourceToLatency.put(diskId, createHistogram(routerConfig, false));
                        putBlobResourceToLatency.put(diskId, createHistogram(routerConfig, false));
                    } else {
                        getBlobCrossDcResourceToLatency.put(diskId, createHistogram(routerConfig, false));
                        getBlobInfoCrossDcResourceToLatency.put(diskId, createHistogram(routerConfig, false));
                    }
                }
            }
        default:
    }
}
Also used : PartitionId(com.github.ambry.clustermap.PartitionId) DataNodeId(com.github.ambry.clustermap.DataNodeId) ReplicaId(com.github.ambry.clustermap.ReplicaId) DiskId(com.github.ambry.clustermap.DiskId)

Example 4 with DiskId

use of com.github.ambry.clustermap.DiskId in project ambry by linkedin.

the class AdaptiveOperationTracker method getLatencyHistogram.

/**
 * Gets the {@link Histogram} that tracks request latencies to the class of replicas (intra or inter DC) that
 * {@code replicaId} belongs to.
 * @param replicaId the {@link ReplicaId} whose request latency is going to be tracked.
 * @return the {@link CachedHistogram} associated with this replica.
 */
CachedHistogram getLatencyHistogram(ReplicaId replicaId) {
    boolean isLocalReplica = replicaId.getDataNodeId().getDatacenterName().equals(datacenterName);
    CachedHistogram histogramToReturn;
    switch(routerConfig.routerOperationTrackerMetricScope) {
        case Datacenter:
            histogramToReturn = isLocalReplica ? localDcHistogram : crossDcHistogram;
            break;
        case Partition:
            PartitionId partitionId = replicaId.getPartitionId();
            histogramToReturn = isLocalReplica ? localDcResourceToHistogram.computeIfAbsent(partitionId, k -> createHistogram(routerConfig, false)) : crossDcResourceToHistogram.computeIfAbsent(partitionId, k -> createHistogram(routerConfig, false));
            break;
        case DataNode:
            DataNodeId dataNodeId = replicaId.getDataNodeId();
            histogramToReturn = isLocalReplica ? localDcResourceToHistogram.computeIfAbsent(dataNodeId, k -> createHistogram(routerConfig, false)) : crossDcResourceToHistogram.computeIfAbsent(dataNodeId, k -> createHistogram(routerConfig, false));
            break;
        case Disk:
            DiskId diskId = replicaId.getDiskId();
            histogramToReturn = isLocalReplica ? localDcResourceToHistogram.computeIfAbsent(diskId, k -> createHistogram(routerConfig, false)) : crossDcResourceToHistogram.computeIfAbsent(diskId, k -> createHistogram(routerConfig, false));
            break;
        default:
            throw new IllegalArgumentException("Unsupported operation tracker metric scope.");
    }
    return histogramToReturn;
}
Also used : PartitionId(com.github.ambry.clustermap.PartitionId) DataNodeId(com.github.ambry.clustermap.DataNodeId) CachedHistogram(com.github.ambry.utils.CachedHistogram) DiskId(com.github.ambry.clustermap.DiskId)

Aggregations

DataNodeId (com.github.ambry.clustermap.DataNodeId)4 DiskId (com.github.ambry.clustermap.DiskId)4 PartitionId (com.github.ambry.clustermap.PartitionId)4 ReplicaId (com.github.ambry.clustermap.ReplicaId)3 Counter (com.codahale.metrics.Counter)2 MetricRegistry (com.codahale.metrics.MetricRegistry)2 InMemAccountService (com.github.ambry.account.InMemAccountService)2 AccountStatsStore (com.github.ambry.accountstats.AccountStatsStore)2 ClusterMapUtils (com.github.ambry.clustermap.ClusterMapUtils)2 ClusterParticipant (com.github.ambry.clustermap.ClusterParticipant)2 HardwareState (com.github.ambry.clustermap.HardwareState)2 HelixParticipant (com.github.ambry.clustermap.HelixParticipant)2 MockClusterMap (com.github.ambry.clustermap.MockClusterMap)2 MockDataNodeId (com.github.ambry.clustermap.MockDataNodeId)2 MockHelixManagerFactory (com.github.ambry.clustermap.MockHelixManagerFactory)2 MockPartitionId (com.github.ambry.clustermap.MockPartitionId)2 PartitionStateChangeListener (com.github.ambry.clustermap.PartitionStateChangeListener)2 ReplicaState (com.github.ambry.clustermap.ReplicaState)2 StateModelListenerType (com.github.ambry.clustermap.StateModelListenerType)2 StateTransitionException (com.github.ambry.clustermap.StateTransitionException)2