Search in sources :

Example 21 with StorageManager

use of com.github.ambry.store.StorageManager in project ambry by linkedin.

the class ReplicationTestHelper method createStorageManagerAndReplicationManager.

/**
 * Helper method to create storage manager and replication manager
 * @param clusterMap {@link ClusterMap} to use
 * @param clusterMapConfig {@link ClusterMapConfig} to use
 * @param clusterParticipant {@link com.github.ambry.clustermap.ClusterParticipant} for listener registration.
 * @return a pair of storage manager and replication manager
 * @throws Exception
 */
protected Pair<StorageManager, ReplicationManager> createStorageManagerAndReplicationManager(ClusterMap clusterMap, ClusterMapConfig clusterMapConfig, MockHelixParticipant clusterParticipant, ConnectionPool mockConnectionPool) throws Exception {
    StoreConfig storeConfig = new StoreConfig(verifiableProperties);
    DataNodeId dataNodeId = clusterMap.getDataNodeIds().get(0);
    MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
    storeKeyConverterFactory.setConversionMap(new HashMap<>());
    storeKeyConverterFactory.setReturnInputIfAbsent(true);
    StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
    StorageManager storageManager = new StorageManager(storeConfig, new DiskManagerConfig(verifiableProperties), Utils.newScheduler(1, true), new MetricRegistry(), null, clusterMap, dataNodeId, null, clusterParticipant == null ? null : Collections.singletonList(clusterParticipant), new MockTime(), null, new InMemAccountService(false, false));
    storageManager.start();
    MockReplicationManager replicationManager = new MockReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, clusterMap, dataNodeId, storeKeyConverterFactory, clusterParticipant, mockConnectionPool, new MockFindTokenHelper(storeKeyFactory, replicationConfig), BlobIdTransformer.class.getName(), storeKeyFactory, time);
    return new Pair<>(storageManager, replicationManager);
}
Also used : DiskManagerConfig(com.github.ambry.config.DiskManagerConfig) MockStoreKeyConverterFactory(com.github.ambry.store.MockStoreKeyConverterFactory) MetricRegistry(com.codahale.metrics.MetricRegistry) StorageManager(com.github.ambry.store.StorageManager) BlobIdFactory(com.github.ambry.commons.BlobIdFactory) StoreKeyFactory(com.github.ambry.store.StoreKeyFactory) InMemAccountService(com.github.ambry.account.InMemAccountService) StoreConfig(com.github.ambry.config.StoreConfig) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockTime(com.github.ambry.utils.MockTime) Pair(com.github.ambry.utils.Pair)

Example 22 with StorageManager

use of com.github.ambry.store.StorageManager in project ambry by linkedin.

the class AmbryServerRequests method handleStopStoreRequest.

/**
 * Handles admin request that stops BlobStore
 * @param partitionId the {@link PartitionId} associated with BlobStore
 * @param numReplicasCaughtUpPerPartition the minimum number of peer replicas per partition that should catch up with
 *                                        local store before stopping it.
 * @return {@link ServerErrorCode} represents result of handling admin request.
 */
private ServerErrorCode handleStopStoreRequest(PartitionId partitionId, short numReplicasCaughtUpPerPartition) {
    ServerErrorCode error = validateRequest(partitionId, RequestOrResponseType.AdminRequest, false);
    if (!error.equals(ServerErrorCode.No_Error)) {
        logger.debug("Validate request fails for {} with error code {} when trying to stop store", partitionId, error);
        return error;
    }
    if (numReplicasCaughtUpPerPartition < 0) {
        logger.debug("The number of replicas to catch up should not be less than zero {}", numReplicasCaughtUpPerPartition);
        return ServerErrorCode.Bad_Request;
    }
    if (!storeManager.controlCompactionForBlobStore(partitionId, false)) {
        logger.error("Disable compaction fails on given BlobStore {}", partitionId);
        return ServerErrorCode.Unknown_Error;
    }
    Collection<PartitionId> partitionIds = Collections.singletonList(partitionId);
    controlRequestForPartitions(EnumSet.of(RequestOrResponseType.PutRequest, RequestOrResponseType.DeleteRequest, RequestOrResponseType.TtlUpdateRequest), partitionIds, false);
    if (!replicationEngine.controlReplicationForPartitions(partitionIds, Collections.<String>emptyList(), false)) {
        logger.error("Could not disable replication on {}", partitionIds);
        return ServerErrorCode.Unknown_Error;
    }
    if (!isRemoteLagLesserOrEqual(partitionIds, 0, numReplicasCaughtUpPerPartition)) {
        logger.debug("Catchup not done on {}", partitionIds);
        return ServerErrorCode.Retry_After_Backoff;
    }
    controlRequestForPartitions(EnumSet.of(RequestOrResponseType.ReplicaMetadataRequest, RequestOrResponseType.GetRequest), partitionIds, false);
    // Shutdown the BlobStore completely
    if (storeManager.shutdownBlobStore(partitionId)) {
        logger.info("Store is successfully shutdown for partition: {}", partitionId);
        List<PartitionId> failToUpdateList = storeManager.setBlobStoreStoppedState(Collections.singletonList(partitionId), true);
        if (!failToUpdateList.isEmpty() && clusterParticipant != null) {
            logger.error("Fail to add BlobStore(s) {} to stopped list after stop operation completed", failToUpdateList.toArray());
            error = ServerErrorCode.Unknown_Error;
        }
        // is adopted. The intention here is to force Helix to re-elect a new leader replica if necessary.
        if (storeManager instanceof StorageManager && clusterParticipant != null) {
            // Previous operation has guaranteed the store is not null
            Store store = ((StorageManager) storeManager).getStore(partitionId, true);
            // Setting disable state will trigger transition error at the very beginning of STANDBY -> INACTIVE transition.
            // Thus it doesn't go through decommission process.
            ((BlobStore) store).setDisableState(true);
            clusterParticipant.setReplicaDisabledState(storeManager.getReplica(partitionId.toPathString()), true);
        }
    } else {
        error = ServerErrorCode.Unknown_Error;
        logger.error("Shutting down BlobStore fails on {}", partitionId);
    }
    return error;
}
Also used : StorageManager(com.github.ambry.store.StorageManager) Store(com.github.ambry.store.Store) BlobStore(com.github.ambry.store.BlobStore) PartitionId(com.github.ambry.clustermap.PartitionId) BlobStore(com.github.ambry.store.BlobStore)

Example 23 with StorageManager

use of com.github.ambry.store.StorageManager in project ambry by linkedin.

the class StatsManagerTest method testReplicaFromOfflineToDropped.

/**
 * Test Offline-To-Dropped transition (both failure and success cases)
 * @throws Exception
 */
@Test
public void testReplicaFromOfflineToDropped() throws Exception {
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    ReplicationConfig replicationConfig = new ReplicationConfig(verifiableProperties);
    StoreConfig storeConfig = new StoreConfig(verifiableProperties);
    MockClusterMap clusterMap = new MockClusterMap();
    DataNodeId currentNode = clusterMap.getDataNodeIds().get(0);
    List<ReplicaId> localReplicas = clusterMap.getReplicaIds(currentNode);
    StorageManager storageManager = new StorageManager(storeConfig, new DiskManagerConfig(verifiableProperties), Utils.newScheduler(1, true), new MetricRegistry(), null, clusterMap, currentNode, null, Collections.singletonList(clusterParticipant), new MockTime(), null, new InMemAccountService(false, false));
    storageManager.start();
    MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
    storeKeyConverterFactory.setConversionMap(new HashMap<>());
    MockReplicationManager mockReplicationManager = new MockReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, clusterMap, currentNode, storeKeyConverterFactory, clusterParticipant);
    MockStatsManager mockStatsManager = new MockStatsManager(storageManager, localReplicas, new MetricRegistry(), statsManagerConfig, clusterParticipant);
    // 1. attempt to remove replica while store is still running (remove store failure case)
    ReplicaId replicaToDrop = localReplicas.get(0);
    try {
        clusterParticipant.onPartitionBecomeDroppedFromOffline(replicaToDrop.getPartitionId().toPathString());
        fail("should fail because store is still running");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", ReplicaOperationFailure, e.getErrorCode());
    }
    // 2. shutdown the store but introduce file deletion failure (put a invalid dir in store dir)
    storageManager.shutdownBlobStore(replicaToDrop.getPartitionId());
    File invalidDir = new File(replicaToDrop.getReplicaPath(), "invalidDir");
    invalidDir.deleteOnExit();
    assertTrue("Couldn't create dir within store dir", invalidDir.mkdir());
    assertTrue("Could not make unreadable", invalidDir.setReadable(false));
    try {
        clusterParticipant.onPartitionBecomeDroppedFromOffline(replicaToDrop.getPartitionId().toPathString());
        fail("should fail because store deletion fails");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", ReplicaOperationFailure, e.getErrorCode());
    }
    // reset permission to allow deletion to succeed.
    assertTrue("Could not make readable", invalidDir.setReadable(true));
    assertTrue("Could not delete invalid dir", invalidDir.delete());
    // 3. success case (remove another replica because previous replica has been removed from in-mem data structures)
    ReplicaId replica = localReplicas.get(1);
    storageManager.shutdownBlobStore(replica.getPartitionId());
    MockHelixParticipant mockHelixParticipant = Mockito.spy(clusterParticipant);
    doNothing().when(mockHelixParticipant).setPartitionDisabledState(anyString(), anyBoolean());
    mockHelixParticipant.onPartitionBecomeDroppedFromOffline(replica.getPartitionId().toPathString());
    // verify that the replica is no longer present in StorageManager
    assertNull("Store of removed replica should not exist", storageManager.getStore(replica.getPartitionId(), true));
    // purposely remove the same replica in ReplicationManager again to verify it no longer exists
    assertFalse("Should return false because replica no longer exists", mockReplicationManager.removeReplica(replica));
    // purposely remove the same replica in StatsManager again to verify it no longer exists
    assertFalse("Should return false because replica no longer exists", mockStatsManager.removeReplica(replica));
    verify(mockHelixParticipant).setPartitionDisabledState(replica.getPartitionId().toPathString(), false);
    storageManager.shutdown();
    mockStatsManager.shutdown();
}
Also used : DiskManagerConfig(com.github.ambry.config.DiskManagerConfig) MockStoreKeyConverterFactory(com.github.ambry.store.MockStoreKeyConverterFactory) ReplicationConfig(com.github.ambry.config.ReplicationConfig) MetricRegistry(com.codahale.metrics.MetricRegistry) StorageManager(com.github.ambry.store.StorageManager) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) ReplicaId(com.github.ambry.clustermap.ReplicaId) MockReplicationManager(com.github.ambry.replication.MockReplicationManager) InMemAccountService(com.github.ambry.account.InMemAccountService) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) StoreConfig(com.github.ambry.config.StoreConfig) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) File(java.io.File) MockTime(com.github.ambry.utils.MockTime) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) Test(org.junit.Test)

Example 24 with StorageManager

use of com.github.ambry.store.StorageManager in project ambry by linkedin.

the class StatsManagerTest method testAddAndRemoveReplica.

/**
 * Test to verify the {@link StatsManager} behaves correctly when dynamically adding/removing {@link ReplicaId}.
 * @throws Exception
 */
@Test
public void testAddAndRemoveReplica() throws Exception {
    // setup testing environment
    Map<PartitionId, Store> testStoreMap = new HashMap<>();
    List<ReplicaId> testReplicas = new ArrayList<>();
    DataNodeId dataNodeId = new MockDataNodeId(Collections.singletonList(new Port(6667, PortType.PLAINTEXT)), Collections.singletonList("/tmp"), "DC1");
    for (int i = 0; i < 3; i++) {
        PartitionId partitionId = new MockPartitionId(i, MockClusterMap.DEFAULT_PARTITION_CLASS, Collections.singletonList((MockDataNodeId) dataNodeId), 0);
        testStoreMap.put(partitionId, new MockStore(new MockStoreStats(hostAccountStorageStats.getStorageStats().get(i), false)));
        testReplicas.add(partitionId.getReplicaIds().get(0));
    }
    StorageManager mockStorageManager = new MockStorageManager(testStoreMap, dataNodeId);
    StatsManager testStatsManager = new StatsManager(mockStorageManager, testReplicas, new MetricRegistry(), statsManagerConfig, new MockTime(), null, null, inMemoryAccountService);
    // verify that adding an existing store to StatsManager should fail
    assertFalse("Adding a store which already exists should fail", testStatsManager.addReplica(testReplicas.get(0)));
    PartitionId partitionId3 = new MockPartitionId(3, MockClusterMap.DEFAULT_PARTITION_CLASS, Collections.singletonList((MockDataNodeId) dataNodeId), 0);
    testStoreMap.put(partitionId3, new MockStore(new MockStoreStats(hostAccountStorageStats.getStorageStats().get(0), false)));
    // verify that partitionId3 is not in stats report before adding to statsManager
    StatsManager.AccountStatsPublisher publisher = testStatsManager.new AccountStatsPublisher(accountStatsStore);
    publisher.run();
    HostAccountStorageStatsWrapper statsWrapper = accountStatsStore.queryHostAccountStorageStatsByHost("localhost", 0);
    assertFalse("Partition3 should not present in stats report", statsWrapper.getStats().getStorageStats().containsKey(partitionId3.getId()));
    // verify that after adding into statsManager, PartitionId3 is in stats report
    testStatsManager.addReplica(partitionId3.getReplicaIds().get(0));
    publisher.run();
    statsWrapper = accountStatsStore.queryHostAccountStorageStatsByHost("localhost", 0);
    assertTrue("Partition3 should present in stats report", statsWrapper.getStats().getStorageStats().containsKey(partitionId3.getId()));
    // verify that after removing PartitionId0 (corresponding to the first replica in replicas list), PartitionId0 is not in the stats report
    PartitionId partitionId0 = testReplicas.get(0).getPartitionId();
    assertTrue("Partition0 should present in stats report before removal", statsWrapper.getStats().getStorageStats().containsKey(partitionId0.getId()));
    testStoreMap.remove(testReplicas.get(0).getPartitionId());
    testStatsManager.removeReplica(testReplicas.get(0));
    publisher.run();
    statsWrapper = accountStatsStore.queryHostAccountStorageStatsByHost("localhost", 0);
    assertFalse("Partition0 should not present in stats report after removal", statsWrapper.getStats().getStorageStats().containsKey(partitionId0.getId()));
    // verify that removing the PartitionId0 should fail because it no longer exists in StatsManager
    assertFalse(testStatsManager.removeReplica(testReplicas.get(0)));
    // concurrent remove test
    CountDownLatch getStatsCountdown1 = new CountDownLatch(1);
    CountDownLatch waitRemoveCountdown = new CountDownLatch(1);
    ((MockStorageManager) mockStorageManager).waitOperationCountdown = waitRemoveCountdown;
    ((MockStorageManager) mockStorageManager).firstCall = true;
    ((MockStorageManager) mockStorageManager).unreachablePartitions.clear();
    for (Store store : testStoreMap.values()) {
        ((MockStore) store).getStatsCountdown = getStatsCountdown1;
        ((MockStore) store).isCollected = false;
    }
    List<PartitionId> partitionRemoved = new ArrayList<>();
    Utils.newThread(() -> {
        // wait until at least one store has been collected (this ensures stats aggregation using old snapshot of map)
        try {
            getStatsCountdown1.await();
        } catch (InterruptedException e) {
            throw new IllegalStateException("CountDown await was interrupted", e);
        }
        // find one store which hasn't been collected
        ReplicaId replicaToRemove = null;
        for (Map.Entry<PartitionId, Store> partitionToStore : testStoreMap.entrySet()) {
            MockStore store = (MockStore) partitionToStore.getValue();
            if (!store.isCollected) {
                replicaToRemove = partitionToStore.getKey().getReplicaIds().get(0);
                break;
            }
        }
        if (replicaToRemove != null) {
            testStatsManager.removeReplica(replicaToRemove);
            testStoreMap.remove(replicaToRemove.getPartitionId());
            partitionRemoved.add(replicaToRemove.getPartitionId());
            // count down to allow stats aggregation to proceed
            waitRemoveCountdown.countDown();
        }
    }, false).start();
    publisher.run();
    statsWrapper = accountStatsStore.queryHostAccountStorageStatsByHost("localhost", 0);
    // verify that the removed store is indeed unreachable during stats aggregation
    assertTrue("The removed partition should be unreachable during aggregation", ((MockStorageManager) mockStorageManager).unreachablePartitions.contains(partitionRemoved.get(0)));
    // verify unreachable store list doesn't contain the store which is removed.
    List<String> unreachableStores = statsWrapper.getHeader().getUnreachableStores();
    assertFalse("The removed partition should not present in unreachable list", unreachableStores.contains(partitionRemoved.get(0).toPathString()));
    // concurrent add test
    CountDownLatch getStatsCountdown2 = new CountDownLatch(1);
    CountDownLatch waitAddCountdown = new CountDownLatch(1);
    ((MockStorageManager) mockStorageManager).waitOperationCountdown = waitAddCountdown;
    ((MockStorageManager) mockStorageManager).firstCall = true;
    ((MockStorageManager) mockStorageManager).unreachablePartitions.clear();
    for (Store store : testStoreMap.values()) {
        ((MockStore) store).getStatsCountdown = getStatsCountdown2;
        ((MockStore) store).isCollected = false;
    }
    PartitionId partitionId4 = new MockPartitionId(4, MockClusterMap.DEFAULT_PARTITION_CLASS, Collections.singletonList((MockDataNodeId) dataNodeId), 0);
    Utils.newThread(() -> {
        // wait until at least one store has been collected (this ensures stats aggregation using old snapshot of map)
        try {
            getStatsCountdown2.await();
        } catch (InterruptedException e) {
            throw new IllegalStateException("CountDown await was interrupted", e);
        }
        testStatsManager.addReplica(partitionId4.getReplicaIds().get(0));
        testStoreMap.put(partitionId4, new MockStore(new MockStoreStats(hostAccountStorageStats.getStorageStats().get(0), false)));
        // count down to allow stats aggregation to proceed
        waitAddCountdown.countDown();
    }, false).start();
    publisher.run();
    statsWrapper = accountStatsStore.queryHostAccountStorageStatsByHost("localhost", 0);
    // verify that new added PartitionId4 is not in report for this round of aggregation
    assertFalse("Partition4 should not present in stats report", statsWrapper.getStats().getStorageStats().containsKey(partitionId4.getId()));
    // verify that new added PartitionId4 will be collected for next round of aggregation
    publisher.run();
    statsWrapper = accountStatsStore.queryHostAccountStorageStatsByHost("localhost", 0);
    assertTrue("Partition4 should present in stats report", statsWrapper.getStats().getStorageStats().containsKey(partitionId4.getId()));
}
Also used : HashMap(java.util.HashMap) Port(com.github.ambry.network.Port) ArrayList(java.util.ArrayList) StorageManager(com.github.ambry.store.StorageManager) InmemoryAccountStatsStore(com.github.ambry.accountstats.InmemoryAccountStatsStore) Store(com.github.ambry.store.Store) MockTime(com.github.ambry.utils.MockTime) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) MetricRegistry(com.codahale.metrics.MetricRegistry) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) CountDownLatch(java.util.concurrent.CountDownLatch) ReplicaId(com.github.ambry.clustermap.ReplicaId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) Test(org.junit.Test)

Aggregations

StorageManager (com.github.ambry.store.StorageManager)24 Test (org.junit.Test)20 ReplicaId (com.github.ambry.clustermap.ReplicaId)19 MockPartitionId (com.github.ambry.clustermap.MockPartitionId)18 PartitionId (com.github.ambry.clustermap.PartitionId)18 MockReplicaId (com.github.ambry.clustermap.MockReplicaId)15 MetricRegistry (com.codahale.metrics.MetricRegistry)14 ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)13 Store (com.github.ambry.store.Store)13 MockClusterMap (com.github.ambry.clustermap.MockClusterMap)12 MockHelixParticipant (com.github.ambry.clustermap.MockHelixParticipant)12 ArrayList (java.util.ArrayList)11 DataNodeId (com.github.ambry.clustermap.DataNodeId)10 StateTransitionException (com.github.ambry.clustermap.StateTransitionException)9 HashSet (java.util.HashSet)9 DiskManagerConfig (com.github.ambry.config.DiskManagerConfig)8 ReplicationConfig (com.github.ambry.config.ReplicationConfig)8 MockTime (com.github.ambry.utils.MockTime)8 HashMap (java.util.HashMap)8 CountDownLatch (java.util.concurrent.CountDownLatch)8