Search in sources :

Example 6 with StorageManager

use of com.github.ambry.store.StorageManager in project ambry by linkedin.

the class ReplicationTest method addAndRemoveReplicaTest.

/**
 * Test dynamically add/remove replica in {@link ReplicationManager}
 * @throws Exception
 */
@Test
public void addAndRemoveReplicaTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    StoreConfig storeConfig = new StoreConfig(verifiableProperties);
    DataNodeId dataNodeId = clusterMap.getDataNodeIds().get(0);
    MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
    storeKeyConverterFactory.setConversionMap(new HashMap<>());
    StorageManager storageManager = new StorageManager(storeConfig, new DiskManagerConfig(verifiableProperties), Utils.newScheduler(1, true), new MetricRegistry(), null, clusterMap, dataNodeId, null, null, new MockTime(), null, new InMemAccountService(false, false));
    storageManager.start();
    MockReplicationManager replicationManager = new MockReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, clusterMap, dataNodeId, storeKeyConverterFactory, null);
    ReplicaId replicaToTest = clusterMap.getReplicaIds(dataNodeId).get(0);
    // Attempting to add replica that already exists should fail
    assertFalse("Adding an existing replica should fail", replicationManager.addReplica(replicaToTest));
    // Create a brand new replica that sits on one of the disk of datanode, add it into replication manager
    PartitionId newPartition = clusterMap.createNewPartition(clusterMap.getDataNodes());
    for (ReplicaId replicaId : newPartition.getReplicaIds()) {
        if (replicaId.getDataNodeId() == dataNodeId) {
            replicaToTest = replicaId;
            break;
        }
    }
    // Before adding replica, partitionToPartitionInfo and mountPathToPartitionInfos should not contain new partition
    assertFalse("partitionToPartitionInfo should not contain new partition", replicationManager.getPartitionToPartitionInfoMap().containsKey(newPartition));
    for (PartitionInfo partitionInfo : replicationManager.getMountPathToPartitionInfosMap().get(replicaToTest.getMountPath())) {
        assertNotSame("mountPathToPartitionInfos should not contain new partition", partitionInfo.getPartitionId(), newPartition);
    }
    // Add new replica to replication manager
    assertTrue("Adding new replica to replication manager should succeed", replicationManager.addReplica(replicaToTest));
    // After adding replica, partitionToPartitionInfo and mountPathToPartitionInfos should contain new partition
    assertTrue("partitionToPartitionInfo should contain new partition", replicationManager.getPartitionToPartitionInfoMap().containsKey(newPartition));
    Optional<PartitionInfo> newPartitionInfo = replicationManager.getMountPathToPartitionInfosMap().get(replicaToTest.getMountPath()).stream().filter(partitionInfo -> partitionInfo.getPartitionId() == newPartition).findAny();
    assertTrue("mountPathToPartitionInfos should contain new partition info", newPartitionInfo.isPresent());
    // Verify that all remoteReplicaInfos of new added replica have assigned thread
    for (RemoteReplicaInfo remoteReplicaInfo : newPartitionInfo.get().getRemoteReplicaInfos()) {
        assertNotNull("The remote replica should be assigned to one replica thread", remoteReplicaInfo.getReplicaThread());
    }
    // Remove replica
    assertTrue("Remove replica from replication manager should succeed", replicationManager.removeReplica(replicaToTest));
    // Verify replica is removed, so partitionToPartitionInfo and mountPathToPartitionInfos should not contain new partition
    assertFalse("partitionToPartitionInfo should not contain new partition", replicationManager.getPartitionToPartitionInfoMap().containsKey(newPartition));
    for (PartitionInfo partitionInfo : replicationManager.getMountPathToPartitionInfosMap().get(replicaToTest.getMountPath())) {
        assertNotSame("mountPathToPartitionInfos should not contain new partition", partitionInfo.getPartitionId(), newPartition);
    }
    // Verify that none of remoteReplicaInfo should have assigned thread
    for (RemoteReplicaInfo remoteReplicaInfo : newPartitionInfo.get().getRemoteReplicaInfos()) {
        assertNull("The remote replica should be assigned to one replica thread", remoteReplicaInfo.getReplicaThread());
    }
    // Remove the same replica that doesn't exist should be no-op
    ReplicationManager mockManager = Mockito.spy(replicationManager);
    assertFalse("Remove non-existent replica should return false", replicationManager.removeReplica(replicaToTest));
    verify(mockManager, never()).removeRemoteReplicaInfoFromReplicaThread(anyList());
    storageManager.shutdown();
}
Also used : DiskManagerConfig(com.github.ambry.config.DiskManagerConfig) CoreMatchers(org.hamcrest.CoreMatchers) Arrays(java.util.Arrays) StorageManager(com.github.ambry.store.StorageManager) StoreKeyConverter(com.github.ambry.store.StoreKeyConverter) DataNodeId(com.github.ambry.clustermap.DataNodeId) Random(java.util.Random) ByteBuffer(java.nio.ByteBuffer) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) PortType(com.github.ambry.network.PortType) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TestUtils(com.github.ambry.utils.TestUtils) Map(java.util.Map) DeleteMessageFormatInputStream(com.github.ambry.messageformat.DeleteMessageFormatInputStream) Parameterized(org.junit.runners.Parameterized) ReplicationConfig(com.github.ambry.config.ReplicationConfig) Container(com.github.ambry.account.Container) DiskManagerConfig(com.github.ambry.config.DiskManagerConfig) Predicate(java.util.function.Predicate) ValidatingTransformer(com.github.ambry.messageformat.ValidatingTransformer) Collection(java.util.Collection) StoreKeyFactory(com.github.ambry.store.StoreKeyFactory) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) Collectors(java.util.stream.Collectors) ConnectedChannel(com.github.ambry.network.ConnectedChannel) CountDownLatch(java.util.concurrent.CountDownLatch) StoreKey(com.github.ambry.store.StoreKey) List(java.util.List) ReplicaMetadataResponse(com.github.ambry.protocol.ReplicaMetadataResponse) PartitionStateChangeListener(com.github.ambry.clustermap.PartitionStateChangeListener) MockTime(com.github.ambry.utils.MockTime) Account(com.github.ambry.account.Account) Optional(java.util.Optional) TransitionErrorCode(com.github.ambry.clustermap.StateTransitionException.TransitionErrorCode) MockId(com.github.ambry.store.MockId) InMemAccountService(com.github.ambry.account.InMemAccountService) AmbryReplicaSyncUpManager(com.github.ambry.clustermap.AmbryReplicaSyncUpManager) PartitionId(com.github.ambry.clustermap.PartitionId) BlobId(com.github.ambry.commons.BlobId) ResponseHandler(com.github.ambry.commons.ResponseHandler) ClusterMapChangeListener(com.github.ambry.clustermap.ClusterMapChangeListener) RunWith(org.junit.runner.RunWith) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Transformer(com.github.ambry.store.Transformer) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) CommonTestUtils(com.github.ambry.commons.CommonTestUtils) ReplicaMetadataResponseInfo(com.github.ambry.protocol.ReplicaMetadataResponseInfo) MockStoreKeyConverterFactory(com.github.ambry.store.MockStoreKeyConverterFactory) Time(com.github.ambry.utils.Time) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MockMessageWriteSet(com.github.ambry.store.MockMessageWriteSet) ReplicaState(com.github.ambry.clustermap.ReplicaState) StateModelListenerType(com.github.ambry.clustermap.StateModelListenerType) StoreConfig(com.github.ambry.config.StoreConfig) MetricRegistry(com.codahale.metrics.MetricRegistry) Properties(java.util.Properties) Pair(com.github.ambry.utils.Pair) Iterator(java.util.Iterator) ReplicaType(com.github.ambry.clustermap.ReplicaType) VerifiableProperties(com.github.ambry.config.VerifiableProperties) ClusterMap(com.github.ambry.clustermap.ClusterMap) Test(org.junit.Test) BlobIdFactory(com.github.ambry.commons.BlobIdFactory) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Store(com.github.ambry.store.Store) Mockito(org.mockito.Mockito) MessageInfo(com.github.ambry.store.MessageInfo) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) ReplicaId(com.github.ambry.clustermap.ReplicaId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) Port(com.github.ambry.network.Port) Comparator(java.util.Comparator) Assert(org.junit.Assert) Collections(java.util.Collections) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) MockStoreKeyConverterFactory(com.github.ambry.store.MockStoreKeyConverterFactory) MetricRegistry(com.codahale.metrics.MetricRegistry) StorageManager(com.github.ambry.store.StorageManager) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) InMemAccountService(com.github.ambry.account.InMemAccountService) StoreConfig(com.github.ambry.config.StoreConfig) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockDataNodeId(com.github.ambry.clustermap.MockDataNodeId) MockTime(com.github.ambry.utils.MockTime) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) Test(org.junit.Test)

Example 7 with StorageManager

use of com.github.ambry.store.StorageManager in project ambry by linkedin.

the class ReplicationTest method replicaResumeDecommissionTest.

/**
 * Test that resuming decommission on certain replica behaves correctly.
 * @throws Exception
 */
@Test
public void replicaResumeDecommissionTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    MockHelixParticipant.metricRegistry = new MetricRegistry();
    MockHelixParticipant mockHelixParticipant = Mockito.spy(new MockHelixParticipant(clusterMapConfig));
    doNothing().when(mockHelixParticipant).setPartitionDisabledState(anyString(), anyBoolean());
    // choose a replica on local node and put decommission file into its dir
    ReplicaId localReplica = clusterMap.getReplicaIds(clusterMap.getDataNodeIds().get(0)).get(0);
    String partitionName = localReplica.getPartitionId().toPathString();
    File decommissionFile = new File(localReplica.getReplicaPath(), "decommission_in_progress");
    assertTrue("Can't create decommission file", decommissionFile.createNewFile());
    Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
    StorageManager storageManager = managers.getFirst();
    // failure case 1: store is not started when resuming decommission
    storageManager.shutdownBlobStore(localReplica.getPartitionId());
    try {
        mockHelixParticipant.onPartitionBecomeDroppedFromOffline(partitionName);
        fail("should fail");
    } catch (StateTransitionException e) {
        assertEquals("Mismatch in error code", ReplicaOperationFailure, e.getErrorCode());
    }
    storageManager.startBlobStore(localReplica.getPartitionId());
    // failure case 2: fail to remove replica from InstanceConfig in Helix
    AmbryReplicaSyncUpManager replicaSyncUpManager = (AmbryReplicaSyncUpManager) mockHelixParticipant.getReplicaSyncUpManager();
    mockHelixParticipant.updateNodeInfoReturnVal = false;
    CountDownLatch executionLatch = new CountDownLatch(1);
    AtomicBoolean exceptionOccurred = new AtomicBoolean(false);
    Utils.newThread(() -> {
        try {
            mockHelixParticipant.onPartitionBecomeDroppedFromOffline(partitionName);
            fail("should fail because updating node info returns false");
        } catch (StateTransitionException e) {
            exceptionOccurred.getAndSet(true);
            assertEquals("Mismatch in error code", ReplicaOperationFailure, e.getErrorCode());
        } finally {
            executionLatch.countDown();
        }
    }, false).start();
    while (!replicaSyncUpManager.getPartitionToDeactivationLatch().containsKey(partitionName)) {
        Thread.sleep(100);
    }
    replicaSyncUpManager.onDeactivationComplete(localReplica);
    while (!replicaSyncUpManager.getPartitionToDisconnectionLatch().containsKey(partitionName)) {
        Thread.sleep(100);
    }
    replicaSyncUpManager.onDisconnectionComplete(localReplica);
    assertTrue("Offline-To-Dropped transition didn't complete within 1 sec", executionLatch.await(1, TimeUnit.SECONDS));
    assertTrue("State transition exception should be thrown", exceptionOccurred.get());
    mockHelixParticipant.updateNodeInfoReturnVal = null;
    storageManager.startBlobStore(localReplica.getPartitionId());
    // success case
    mockHelixParticipant.mockStatsManagerListener = Mockito.mock(PartitionStateChangeListener.class);
    doNothing().when(mockHelixParticipant.mockStatsManagerListener).onPartitionBecomeDroppedFromOffline(anyString());
    mockHelixParticipant.registerPartitionStateChangeListener(StateModelListenerType.StatsManagerListener, mockHelixParticipant.mockStatsManagerListener);
    CountDownLatch participantLatch = new CountDownLatch(1);
    Utils.newThread(() -> {
        mockHelixParticipant.onPartitionBecomeDroppedFromOffline(partitionName);
        participantLatch.countDown();
    }, false).start();
    while (!replicaSyncUpManager.getPartitionToDeactivationLatch().containsKey(partitionName)) {
        Thread.sleep(100);
    }
    replicaSyncUpManager.onDeactivationComplete(localReplica);
    while (!replicaSyncUpManager.getPartitionToDisconnectionLatch().containsKey(partitionName)) {
        Thread.sleep(100);
    }
    replicaSyncUpManager.onDisconnectionComplete(localReplica);
    assertTrue("Offline-To-Dropped transition didn't complete within 1 sec", participantLatch.await(1, TimeUnit.SECONDS));
    // verify stats manager listener is called
    verify(mockHelixParticipant.mockStatsManagerListener).onPartitionBecomeDroppedFromOffline(anyString());
    // verify setPartitionDisabledState method is called
    verify(mockHelixParticipant).setPartitionDisabledState(partitionName, false);
    File storeDir = new File(localReplica.getReplicaPath());
    assertFalse("Store dir should not exist", storeDir.exists());
    storageManager.shutdown();
}
Also used : MetricRegistry(com.codahale.metrics.MetricRegistry) StorageManager(com.github.ambry.store.StorageManager) PartitionStateChangeListener(com.github.ambry.clustermap.PartitionStateChangeListener) CountDownLatch(java.util.concurrent.CountDownLatch) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) File(java.io.File) AmbryReplicaSyncUpManager(com.github.ambry.clustermap.AmbryReplicaSyncUpManager) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) Test(org.junit.Test)

Example 8 with StorageManager

use of com.github.ambry.store.StorageManager in project ambry by linkedin.

the class ReplicationTest method replicaFromInactiveToOfflineTest.

/**
 * Test INACTIVE -> OFFLINE transition on existing replica (both success and failure cases)
 */
@Test
public void replicaFromInactiveToOfflineTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    MockHelixParticipant.metricRegistry = new MetricRegistry();
    MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
    Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
    StorageManager storageManager = managers.getFirst();
    MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
    // 1. test replica not found case
    try {
        mockHelixParticipant.onPartitionBecomeOfflineFromInactive("-1");
        fail("should fail because of invalid partition");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", ReplicaNotFound, e.getErrorCode());
    }
    // 2. test store not started case
    PartitionId existingPartition = replicationManager.partitionToPartitionInfo.keySet().iterator().next();
    storageManager.shutdownBlobStore(existingPartition);
    try {
        mockHelixParticipant.onPartitionBecomeOfflineFromInactive(existingPartition.toPathString());
        fail("should fail because store is not started");
    } catch (StateTransitionException e) {
        assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
    }
    storageManager.startBlobStore(existingPartition);
    // before testing success case, let's write a blob (size = 100) into local store and add a delete record for new blob
    Store localStore = storageManager.getStore(existingPartition);
    MockId id = new MockId(TestUtils.getRandomString(10), Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM));
    long crc = (new Random()).nextLong();
    long blobSize = 100;
    MessageInfo info = new MessageInfo(id, blobSize, false, false, Utils.Infinite_Time, crc, id.getAccountId(), id.getContainerId(), Utils.Infinite_Time);
    List<MessageInfo> infos = new ArrayList<>();
    List<ByteBuffer> buffers = new ArrayList<>();
    ByteBuffer buffer = ByteBuffer.wrap(TestUtils.getRandomBytes((int) blobSize));
    infos.add(info);
    buffers.add(buffer);
    localStore.put(new MockMessageWriteSet(infos, buffers));
    // delete the blob
    int deleteRecordSize = (int) (new DeleteMessageFormatInputStream(id, (short) 0, (short) 0, 0).getSize());
    MessageInfo deleteInfo = new MessageInfo(id, deleteRecordSize, id.getAccountId(), id.getContainerId(), time.milliseconds());
    localStore.delete(Collections.singletonList(deleteInfo));
    int sizeOfPutAndHeader = 100 + 18;
    int sizeOfWhole = sizeOfPutAndHeader + deleteRecordSize;
    // note that end offset of last PUT = 100 + 18 = 118, end offset of the store is sizeOfWhole
    // 3. test success case (create a new thread and trigger INACTIVE -> OFFLINE transition)
    ReplicaId localReplica = storageManager.getReplica(existingPartition.toPathString());
    // put a decommission-in-progress file into local store dir
    File decommissionFile = new File(localReplica.getReplicaPath(), "decommission_in_progress");
    assertTrue("Couldn't create decommission file in local store", decommissionFile.createNewFile());
    decommissionFile.deleteOnExit();
    assertNotSame("Before disconnection, the local store state shouldn't be OFFLINE", ReplicaState.OFFLINE, localStore.getCurrentState());
    mockHelixParticipant.registerPartitionStateChangeListener(StateModelListenerType.ReplicationManagerListener, replicationManager.replicationListener);
    CountDownLatch participantLatch = new CountDownLatch(1);
    replicationManager.listenerExecutionLatch = new CountDownLatch(1);
    Utils.newThread(() -> {
        mockHelixParticipant.onPartitionBecomeOfflineFromInactive(existingPartition.toPathString());
        participantLatch.countDown();
    }, false).start();
    assertTrue("Partition state change listener in ReplicationManager didn't get called within 1 sec", replicationManager.listenerExecutionLatch.await(1, TimeUnit.SECONDS));
    // the state of local store should be updated to OFFLINE
    assertEquals("Local store state is not expected", ReplicaState.OFFLINE, localStore.getCurrentState());
    // update replication lag between local and peer replicas
    List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(existingPartition).getRemoteReplicaInfos();
    ReplicaId peerReplica1 = remoteReplicaInfos.get(0).getReplicaId();
    ReplicaId peerReplica2 = remoteReplicaInfos.get(1).getReplicaId();
    // peer1 catches up with last PUT, peer2 catches up with end offset of local store. In this case, SyncUp is not complete
    replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica1.getDataNodeId().getHostname(), peerReplica1.getReplicaPath(), sizeOfPutAndHeader);
    replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica2.getDataNodeId().getHostname(), peerReplica2.getReplicaPath(), sizeOfWhole);
    assertFalse("Only one peer replica has fully caught up with end offset so sync-up should not complete", mockHelixParticipant.getReplicaSyncUpManager().isSyncUpComplete(localReplica));
    // make peer1 catch up with end offset
    replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica1.getDataNodeId().getHostname(), peerReplica1.getReplicaPath(), sizeOfWhole);
    // Now, sync-up should complete and transition should be able to proceed.
    assertTrue("Inactive-To-Offline transition didn't complete within 1 sec", participantLatch.await(1, TimeUnit.SECONDS));
    assertFalse("Local store should be stopped after transition", localStore.isStarted());
    storageManager.shutdown();
}
Also used : StorageManager(com.github.ambry.store.StorageManager) ArrayList(java.util.ArrayList) Store(com.github.ambry.store.Store) MockId(com.github.ambry.store.MockId) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) Random(java.util.Random) MetricRegistry(com.codahale.metrics.MetricRegistry) DeleteMessageFormatInputStream(com.github.ambry.messageformat.DeleteMessageFormatInputStream) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) CountDownLatch(java.util.concurrent.CountDownLatch) ByteBuffer(java.nio.ByteBuffer) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) MessageInfo(com.github.ambry.store.MessageInfo) MockMessageWriteSet(com.github.ambry.store.MockMessageWriteSet) File(java.io.File) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) StateTransitionException(com.github.ambry.clustermap.StateTransitionException) Test(org.junit.Test)

Example 9 with StorageManager

use of com.github.ambry.store.StorageManager in project ambry by linkedin.

the class ReplicationTest method replicaFromStandbyToLeaderTest.

/**
 * Test state transition in replication manager from STANDBY to LEADER
 * Test setup: When creating partitions, make sure that there is exactly one replica in LEADER STATE on each data center
 * Test condition: When a partition on current node moves from standby to leader, verify that in-memory map storing
 * partition to peer leader replicas is updated correctly
 * @throws Exception
 */
@Test
public void replicaFromStandbyToLeaderTest() throws Exception {
    MockClusterMap clusterMap = new MockClusterMap();
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    MockHelixParticipant.metricRegistry = new MetricRegistry();
    MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
    ReplicationConfig initialReplicationConfig = replicationConfig;
    properties.setProperty("replication.model.across.datacenters", "LEADER_BASED");
    replicationConfig = new ReplicationConfig(new VerifiableProperties(properties));
    Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
    StorageManager storageManager = managers.getFirst();
    MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
    List<ReplicaId> replicaIds = clusterMap.getReplicaIds(replicationManager.dataNodeId);
    for (ReplicaId replicaId : replicaIds) {
        MockReplicaId mockReplicaId = (MockReplicaId) replicaId;
        if (mockReplicaId.getReplicaState() == ReplicaState.LEADER) {
            PartitionId existingPartition = mockReplicaId.getPartitionId();
            mockHelixParticipant.onPartitionBecomeLeaderFromStandby(existingPartition.toPathString());
            Set<ReplicaId> peerLeaderReplicasInReplicationManager = replicationManager.leaderBasedReplicationAdmin.getLeaderPartitionToPeerLeaderReplicas().get(existingPartition.toPathString());
            Set<ReplicaId> peerLeaderReplicasInClusterMap = new HashSet<>(existingPartition.getReplicaIdsByState(ReplicaState.LEADER, null));
            peerLeaderReplicasInClusterMap.remove(mockReplicaId);
            assertThat("Mismatch in list of leader peer replicas stored by partition in replication manager and cluster map", peerLeaderReplicasInReplicationManager, is(peerLeaderReplicasInClusterMap));
        }
    }
    storageManager.shutdown();
    replicationConfig = initialReplicationConfig;
}
Also used : ReplicationConfig(com.github.ambry.config.ReplicationConfig) VerifiableProperties(com.github.ambry.config.VerifiableProperties) MetricRegistry(com.codahale.metrics.MetricRegistry) StorageManager(com.github.ambry.store.StorageManager) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 10 with StorageManager

use of com.github.ambry.store.StorageManager in project ambry by linkedin.

the class AmbryServer method startup.

public void startup() throws InstantiationException {
    try {
        logger.info("starting");
        clusterParticipants = clusterAgentsFactory.getClusterParticipants();
        logger.info("Setting up JMX.");
        long startTime = SystemTime.getInstance().milliseconds();
        reporter = reporterFactory != null ? reporterFactory.apply(registry) : JmxReporter.forRegistry(registry).build();
        reporter.start();
        logger.info("creating configs");
        NetworkConfig networkConfig = new NetworkConfig(properties);
        StoreConfig storeConfig = new StoreConfig(properties);
        DiskManagerConfig diskManagerConfig = new DiskManagerConfig(properties);
        ServerConfig serverConfig = new ServerConfig(properties);
        ReplicationConfig replicationConfig = new ReplicationConfig(properties);
        ConnectionPoolConfig connectionPoolConfig = new ConnectionPoolConfig(properties);
        SSLConfig sslConfig = new SSLConfig(properties);
        ClusterMapConfig clusterMapConfig = new ClusterMapConfig(properties);
        StatsManagerConfig statsConfig = new StatsManagerConfig(properties);
        CloudConfig cloudConfig = new CloudConfig(properties);
        // verify the configs
        properties.verify();
        scheduler = Utils.newScheduler(serverConfig.serverSchedulerNumOfthreads, false);
        // mismatch in sealed/stopped replica lists that maintained by each participant.
        if (clusterParticipants != null && clusterParticipants.size() > 1 && serverConfig.serverParticipantsConsistencyCheckerPeriodSec > 0) {
            consistencyChecker = new ParticipantsConsistencyChecker(clusterParticipants, metrics);
            logger.info("Scheduling participants consistency checker with a period of {} secs", serverConfig.serverParticipantsConsistencyCheckerPeriodSec);
            consistencyCheckerScheduler = Utils.newScheduler(1, "consistency-checker-", false);
            consistencyCheckerTask = consistencyCheckerScheduler.scheduleAtFixedRate(consistencyChecker, 0, serverConfig.serverParticipantsConsistencyCheckerPeriodSec, TimeUnit.SECONDS);
        }
        logger.info("checking if node exists in clustermap host {} port {}", networkConfig.hostName, networkConfig.port);
        DataNodeId nodeId = clusterMap.getDataNodeId(networkConfig.hostName, networkConfig.port);
        if (nodeId == null) {
            throw new IllegalArgumentException("The node " + networkConfig.hostName + ":" + networkConfig.port + "is not present in the clustermap. Failing to start the datanode");
        }
        AccountServiceFactory accountServiceFactory = Utils.getObj(serverConfig.serverAccountServiceFactory, properties, registry);
        AccountService accountService = accountServiceFactory.getAccountService();
        StoreKeyFactory storeKeyFactory = Utils.getObj(storeConfig.storeKeyFactory, clusterMap);
        // In most cases, there should be only one participant in the clusterParticipants list. If there are more than one
        // and some components require sole participant, the first one in the list will be primary participant.
        storageManager = new StorageManager(storeConfig, diskManagerConfig, scheduler, registry, storeKeyFactory, clusterMap, nodeId, new BlobStoreHardDelete(), clusterParticipants, time, new BlobStoreRecovery(), accountService);
        storageManager.start();
        SSLFactory sslFactory = new NettySslHttp2Factory(sslConfig);
        if (clusterMapConfig.clusterMapEnableHttp2Replication) {
            connectionPool = new Http2BlockingChannelPool(sslFactory, new Http2ClientConfig(properties), new Http2ClientMetrics(registry));
        } else {
            connectionPool = new BlockingChannelConnectionPool(connectionPoolConfig, sslConfig, clusterMapConfig, registry);
        }
        connectionPool.start();
        StoreKeyConverterFactory storeKeyConverterFactory = Utils.getObj(serverConfig.serverStoreKeyConverterFactory, properties, registry);
        Predicate<MessageInfo> skipPredicate = new ReplicationSkipPredicate(accountService, replicationConfig);
        replicationManager = new ReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, storeKeyFactory, clusterMap, scheduler, nodeId, connectionPool, registry, notificationSystem, storeKeyConverterFactory, serverConfig.serverMessageTransformer, clusterParticipants.get(0), skipPredicate);
        replicationManager.start();
        if (replicationConfig.replicationEnabledWithVcrCluster) {
            logger.info("Creating Helix cluster spectator for cloud to store replication.");
            vcrClusterSpectator = _vcrClusterAgentsFactory.getVcrClusterSpectator(cloudConfig, clusterMapConfig);
            cloudToStoreReplicationManager = new CloudToStoreReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, storeKeyFactory, clusterMap, scheduler, nodeId, connectionPool, registry, notificationSystem, storeKeyConverterFactory, serverConfig.serverMessageTransformer, vcrClusterSpectator, clusterParticipants.get(0));
            cloudToStoreReplicationManager.start();
        }
        logger.info("Creating StatsManager to publish stats");
        accountStatsMySqlStore = statsConfig.enableMysqlReport ? (AccountStatsMySqlStore) new AccountStatsMySqlStoreFactory(properties, clusterMapConfig, registry).getAccountStatsStore() : null;
        statsManager = new StatsManager(storageManager, clusterMap.getReplicaIds(nodeId), registry, statsConfig, time, clusterParticipants.get(0), accountStatsMySqlStore, accountService);
        if (serverConfig.serverStatsPublishLocalEnabled) {
            statsManager.start();
        }
        ArrayList<Port> ports = new ArrayList<Port>();
        ports.add(new Port(networkConfig.port, PortType.PLAINTEXT));
        if (nodeId.hasSSLPort()) {
            ports.add(new Port(nodeId.getSSLPort(), PortType.SSL));
        }
        networkServer = new SocketServer(networkConfig, sslConfig, registry, ports);
        FindTokenHelper findTokenHelper = new FindTokenHelper(storeKeyFactory, replicationConfig);
        requests = new AmbryServerRequests(storageManager, networkServer.getRequestResponseChannel(), clusterMap, nodeId, registry, metrics, findTokenHelper, notificationSystem, replicationManager, storeKeyFactory, serverConfig, storeKeyConverterFactory, statsManager, clusterParticipants.get(0));
        requestHandlerPool = new RequestHandlerPool(serverConfig.serverRequestHandlerNumOfThreads, networkServer.getRequestResponseChannel(), requests);
        networkServer.start();
        // Start netty http2 server
        if (nodeId.hasHttp2Port()) {
            NettyConfig nettyConfig = new NettyConfig(properties);
            NettyMetrics nettyMetrics = new NettyMetrics(registry);
            Http2ServerMetrics http2ServerMetrics = new Http2ServerMetrics(registry);
            Http2ClientConfig http2ClientConfig = new Http2ClientConfig(properties);
            logger.info("Http2 port {} is enabled. Starting HTTP/2 service.", nodeId.getHttp2Port());
            NettyServerRequestResponseChannel requestResponseChannel = new NettyServerRequestResponseChannel(networkConfig.queuedMaxRequests, http2ServerMetrics);
            AmbryServerRequests ambryServerRequestsForHttp2 = new AmbryServerRequests(storageManager, requestResponseChannel, clusterMap, nodeId, registry, metrics, findTokenHelper, notificationSystem, replicationManager, storeKeyFactory, serverConfig, storeKeyConverterFactory, statsManager, clusterParticipants.get(0));
            requestHandlerPoolForHttp2 = new RequestHandlerPool(serverConfig.serverRequestHandlerNumOfThreads, requestResponseChannel, ambryServerRequestsForHttp2);
            NioServerFactory nioServerFactory = new StorageServerNettyFactory(nodeId.getHttp2Port(), requestResponseChannel, sslFactory, nettyConfig, http2ClientConfig, metrics, nettyMetrics, http2ServerMetrics, serverSecurityService);
            nettyHttp2Server = nioServerFactory.getNioServer();
            nettyHttp2Server.start();
        }
        // Other code
        List<AmbryStatsReport> ambryStatsReports = new ArrayList<>();
        Set<String> validStatsTypes = new HashSet<>();
        for (StatsReportType type : StatsReportType.values()) {
            validStatsTypes.add(type.toString());
        }
        if (serverConfig.serverStatsPublishReportEnabled) {
            serverConfig.serverStatsReportsToPublish.forEach(e -> {
                if (validStatsTypes.contains(e)) {
                    ambryStatsReports.add(new AmbryStatsReportImpl(serverConfig.serverQuotaStatsAggregateIntervalInMinutes, StatsReportType.valueOf(e)));
                }
            });
        }
        if (vcrClusterSpectator != null) {
            vcrClusterSpectator.spectate();
        }
        Callback<StatsSnapshot> accountServiceCallback = new AccountServiceCallback(accountService);
        for (ClusterParticipant clusterParticipant : clusterParticipants) {
            clusterParticipant.participate(ambryStatsReports, accountStatsMySqlStore, accountServiceCallback);
        }
        if (nettyInternalMetrics != null) {
            nettyInternalMetrics.start();
            logger.info("NettyInternalMetric starts");
        }
        logger.info("started");
        long processingTime = SystemTime.getInstance().milliseconds() - startTime;
        metrics.serverStartTimeInMs.update(processingTime);
        logger.info("Server startup time in Ms {}", processingTime);
    } catch (Exception e) {
        logger.error("Error during startup", e);
        throw new InstantiationException("failure during startup " + e);
    }
}
Also used : DiskManagerConfig(com.github.ambry.config.DiskManagerConfig) SSLFactory(com.github.ambry.commons.SSLFactory) Http2ClientMetrics(com.github.ambry.network.http2.Http2ClientMetrics) Port(com.github.ambry.network.Port) StorageManager(com.github.ambry.store.StorageManager) ReplicationSkipPredicate(com.github.ambry.replication.ReplicationSkipPredicate) ArrayList(java.util.ArrayList) NettySslHttp2Factory(com.github.ambry.commons.NettySslHttp2Factory) NettyConfig(com.github.ambry.config.NettyConfig) ServerConfig(com.github.ambry.config.ServerConfig) StoreKeyFactory(com.github.ambry.store.StoreKeyFactory) StoreKeyConverterFactory(com.github.ambry.store.StoreKeyConverterFactory) AccountStatsMySqlStoreFactory(com.github.ambry.accountstats.AccountStatsMySqlStoreFactory) HashSet(java.util.HashSet) CloudToStoreReplicationManager(com.github.ambry.replication.CloudToStoreReplicationManager) ReplicationManager(com.github.ambry.replication.ReplicationManager) ReplicationConfig(com.github.ambry.config.ReplicationConfig) Http2BlockingChannelPool(com.github.ambry.network.http2.Http2BlockingChannelPool) Http2ClientConfig(com.github.ambry.config.Http2ClientConfig) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) BlockingChannelConnectionPool(com.github.ambry.network.BlockingChannelConnectionPool) AccountServiceCallback(com.github.ambry.account.AccountServiceCallback) Http2ServerMetrics(com.github.ambry.network.http2.Http2ServerMetrics) BlobStoreRecovery(com.github.ambry.messageformat.BlobStoreRecovery) RequestHandlerPool(com.github.ambry.protocol.RequestHandlerPool) StoreConfig(com.github.ambry.config.StoreConfig) DataNodeId(com.github.ambry.clustermap.DataNodeId) AccountService(com.github.ambry.account.AccountService) ConnectionPoolConfig(com.github.ambry.config.ConnectionPoolConfig) CloudConfig(com.github.ambry.config.CloudConfig) StorageServerNettyFactory(com.github.ambry.rest.StorageServerNettyFactory) AccountStatsMySqlStore(com.github.ambry.accountstats.AccountStatsMySqlStore) SSLConfig(com.github.ambry.config.SSLConfig) StatsManagerConfig(com.github.ambry.config.StatsManagerConfig) SocketServer(com.github.ambry.network.SocketServer) NettyMetrics(com.github.ambry.rest.NettyMetrics) FindTokenHelper(com.github.ambry.replication.FindTokenHelper) NettyServerRequestResponseChannel(com.github.ambry.network.NettyServerRequestResponseChannel) NetworkConfig(com.github.ambry.config.NetworkConfig) BlobStoreHardDelete(com.github.ambry.messageformat.BlobStoreHardDelete) IOException(java.io.IOException) MessageInfo(com.github.ambry.store.MessageInfo) NioServerFactory(com.github.ambry.rest.NioServerFactory) CloudToStoreReplicationManager(com.github.ambry.replication.CloudToStoreReplicationManager) ClusterParticipant(com.github.ambry.clustermap.ClusterParticipant) AccountServiceFactory(com.github.ambry.account.AccountServiceFactory)

Aggregations

StorageManager (com.github.ambry.store.StorageManager)24 Test (org.junit.Test)20 ReplicaId (com.github.ambry.clustermap.ReplicaId)19 MockPartitionId (com.github.ambry.clustermap.MockPartitionId)18 PartitionId (com.github.ambry.clustermap.PartitionId)18 MockReplicaId (com.github.ambry.clustermap.MockReplicaId)15 MetricRegistry (com.codahale.metrics.MetricRegistry)14 ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)13 Store (com.github.ambry.store.Store)13 MockClusterMap (com.github.ambry.clustermap.MockClusterMap)12 MockHelixParticipant (com.github.ambry.clustermap.MockHelixParticipant)12 ArrayList (java.util.ArrayList)11 DataNodeId (com.github.ambry.clustermap.DataNodeId)10 StateTransitionException (com.github.ambry.clustermap.StateTransitionException)9 HashSet (java.util.HashSet)9 DiskManagerConfig (com.github.ambry.config.DiskManagerConfig)8 ReplicationConfig (com.github.ambry.config.ReplicationConfig)8 MockTime (com.github.ambry.utils.MockTime)8 HashMap (java.util.HashMap)8 CountDownLatch (java.util.concurrent.CountDownLatch)8