use of com.github.ambry.store.StorageManager in project ambry by linkedin.
the class AmbryServer method startup.
public void startup() throws InstantiationException {
try {
logger.info("starting");
clusterParticipants = clusterAgentsFactory.getClusterParticipants();
logger.info("Setting up JMX.");
long startTime = SystemTime.getInstance().milliseconds();
reporter = reporterFactory != null ? reporterFactory.apply(registry) : JmxReporter.forRegistry(registry).build();
reporter.start();
logger.info("creating configs");
NetworkConfig networkConfig = new NetworkConfig(properties);
StoreConfig storeConfig = new StoreConfig(properties);
DiskManagerConfig diskManagerConfig = new DiskManagerConfig(properties);
ServerConfig serverConfig = new ServerConfig(properties);
ReplicationConfig replicationConfig = new ReplicationConfig(properties);
ConnectionPoolConfig connectionPoolConfig = new ConnectionPoolConfig(properties);
SSLConfig sslConfig = new SSLConfig(properties);
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(properties);
StatsManagerConfig statsConfig = new StatsManagerConfig(properties);
CloudConfig cloudConfig = new CloudConfig(properties);
// verify the configs
properties.verify();
scheduler = Utils.newScheduler(serverConfig.serverSchedulerNumOfthreads, false);
// mismatch in sealed/stopped replica lists that maintained by each participant.
if (clusterParticipants != null && clusterParticipants.size() > 1 && serverConfig.serverParticipantsConsistencyCheckerPeriodSec > 0) {
consistencyChecker = new ParticipantsConsistencyChecker(clusterParticipants, metrics);
logger.info("Scheduling participants consistency checker with a period of {} secs", serverConfig.serverParticipantsConsistencyCheckerPeriodSec);
consistencyCheckerScheduler = Utils.newScheduler(1, "consistency-checker-", false);
consistencyCheckerTask = consistencyCheckerScheduler.scheduleAtFixedRate(consistencyChecker, 0, serverConfig.serverParticipantsConsistencyCheckerPeriodSec, TimeUnit.SECONDS);
}
logger.info("checking if node exists in clustermap host {} port {}", networkConfig.hostName, networkConfig.port);
DataNodeId nodeId = clusterMap.getDataNodeId(networkConfig.hostName, networkConfig.port);
if (nodeId == null) {
throw new IllegalArgumentException("The node " + networkConfig.hostName + ":" + networkConfig.port + "is not present in the clustermap. Failing to start the datanode");
}
AccountServiceFactory accountServiceFactory = Utils.getObj(serverConfig.serverAccountServiceFactory, properties, registry);
AccountService accountService = accountServiceFactory.getAccountService();
StoreKeyFactory storeKeyFactory = Utils.getObj(storeConfig.storeKeyFactory, clusterMap);
// In most cases, there should be only one participant in the clusterParticipants list. If there are more than one
// and some components require sole participant, the first one in the list will be primary participant.
storageManager = new StorageManager(storeConfig, diskManagerConfig, scheduler, registry, storeKeyFactory, clusterMap, nodeId, new BlobStoreHardDelete(), clusterParticipants, time, new BlobStoreRecovery(), accountService);
storageManager.start();
SSLFactory sslFactory = new NettySslHttp2Factory(sslConfig);
if (clusterMapConfig.clusterMapEnableHttp2Replication) {
connectionPool = new Http2BlockingChannelPool(sslFactory, new Http2ClientConfig(properties), new Http2ClientMetrics(registry));
} else {
connectionPool = new BlockingChannelConnectionPool(connectionPoolConfig, sslConfig, clusterMapConfig, registry);
}
connectionPool.start();
StoreKeyConverterFactory storeKeyConverterFactory = Utils.getObj(serverConfig.serverStoreKeyConverterFactory, properties, registry);
Predicate<MessageInfo> skipPredicate = new ReplicationSkipPredicate(accountService, replicationConfig);
replicationManager = new ReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, storeKeyFactory, clusterMap, scheduler, nodeId, connectionPool, registry, notificationSystem, storeKeyConverterFactory, serverConfig.serverMessageTransformer, clusterParticipants.get(0), skipPredicate);
replicationManager.start();
if (replicationConfig.replicationEnabledWithVcrCluster) {
logger.info("Creating Helix cluster spectator for cloud to store replication.");
vcrClusterSpectator = _vcrClusterAgentsFactory.getVcrClusterSpectator(cloudConfig, clusterMapConfig);
cloudToStoreReplicationManager = new CloudToStoreReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, storeKeyFactory, clusterMap, scheduler, nodeId, connectionPool, registry, notificationSystem, storeKeyConverterFactory, serverConfig.serverMessageTransformer, vcrClusterSpectator, clusterParticipants.get(0));
cloudToStoreReplicationManager.start();
}
logger.info("Creating StatsManager to publish stats");
accountStatsMySqlStore = statsConfig.enableMysqlReport ? (AccountStatsMySqlStore) new AccountStatsMySqlStoreFactory(properties, clusterMapConfig, registry).getAccountStatsStore() : null;
statsManager = new StatsManager(storageManager, clusterMap.getReplicaIds(nodeId), registry, statsConfig, time, clusterParticipants.get(0), accountStatsMySqlStore, accountService);
if (serverConfig.serverStatsPublishLocalEnabled) {
statsManager.start();
}
ArrayList<Port> ports = new ArrayList<Port>();
ports.add(new Port(networkConfig.port, PortType.PLAINTEXT));
if (nodeId.hasSSLPort()) {
ports.add(new Port(nodeId.getSSLPort(), PortType.SSL));
}
networkServer = new SocketServer(networkConfig, sslConfig, registry, ports);
FindTokenHelper findTokenHelper = new FindTokenHelper(storeKeyFactory, replicationConfig);
requests = new AmbryServerRequests(storageManager, networkServer.getRequestResponseChannel(), clusterMap, nodeId, registry, metrics, findTokenHelper, notificationSystem, replicationManager, storeKeyFactory, serverConfig, storeKeyConverterFactory, statsManager, clusterParticipants.get(0));
requestHandlerPool = new RequestHandlerPool(serverConfig.serverRequestHandlerNumOfThreads, networkServer.getRequestResponseChannel(), requests);
networkServer.start();
// Start netty http2 server
if (nodeId.hasHttp2Port()) {
NettyConfig nettyConfig = new NettyConfig(properties);
NettyMetrics nettyMetrics = new NettyMetrics(registry);
Http2ServerMetrics http2ServerMetrics = new Http2ServerMetrics(registry);
Http2ClientConfig http2ClientConfig = new Http2ClientConfig(properties);
logger.info("Http2 port {} is enabled. Starting HTTP/2 service.", nodeId.getHttp2Port());
NettyServerRequestResponseChannel requestResponseChannel = new NettyServerRequestResponseChannel(networkConfig.queuedMaxRequests, http2ServerMetrics);
AmbryServerRequests ambryServerRequestsForHttp2 = new AmbryServerRequests(storageManager, requestResponseChannel, clusterMap, nodeId, registry, metrics, findTokenHelper, notificationSystem, replicationManager, storeKeyFactory, serverConfig, storeKeyConverterFactory, statsManager, clusterParticipants.get(0));
requestHandlerPoolForHttp2 = new RequestHandlerPool(serverConfig.serverRequestHandlerNumOfThreads, requestResponseChannel, ambryServerRequestsForHttp2);
NioServerFactory nioServerFactory = new StorageServerNettyFactory(nodeId.getHttp2Port(), requestResponseChannel, sslFactory, nettyConfig, http2ClientConfig, metrics, nettyMetrics, http2ServerMetrics, serverSecurityService);
nettyHttp2Server = nioServerFactory.getNioServer();
nettyHttp2Server.start();
}
// Other code
List<AmbryStatsReport> ambryStatsReports = new ArrayList<>();
Set<String> validStatsTypes = new HashSet<>();
for (StatsReportType type : StatsReportType.values()) {
validStatsTypes.add(type.toString());
}
if (serverConfig.serverStatsPublishReportEnabled) {
serverConfig.serverStatsReportsToPublish.forEach(e -> {
if (validStatsTypes.contains(e)) {
ambryStatsReports.add(new AmbryStatsReportImpl(serverConfig.serverQuotaStatsAggregateIntervalInMinutes, StatsReportType.valueOf(e)));
}
});
}
if (vcrClusterSpectator != null) {
vcrClusterSpectator.spectate();
}
Callback<StatsSnapshot> accountServiceCallback = new AccountServiceCallback(accountService);
for (ClusterParticipant clusterParticipant : clusterParticipants) {
clusterParticipant.participate(ambryStatsReports, accountStatsMySqlStore, accountServiceCallback);
}
if (nettyInternalMetrics != null) {
nettyInternalMetrics.start();
logger.info("NettyInternalMetric starts");
}
logger.info("started");
long processingTime = SystemTime.getInstance().milliseconds() - startTime;
metrics.serverStartTimeInMs.update(processingTime);
logger.info("Server startup time in Ms {}", processingTime);
} catch (Exception e) {
logger.error("Error during startup", e);
throw new InstantiationException("failure during startup " + e);
}
}
use of com.github.ambry.store.StorageManager in project ambry by linkedin.
the class AmbryServerRequests method handleRemoveStoreRequest.
/**
* Handles admin request that removes a BlobStore from current node
* @param partitionId the {@link PartitionId} associated with BlobStore
* @return {@link ServerErrorCode} represents result of handling admin request.
*/
private ServerErrorCode handleRemoveStoreRequest(PartitionId partitionId) throws StoreException, IOException {
ServerErrorCode errorCode = ServerErrorCode.No_Error;
ReplicaId replicaId = storeManager.getReplica(partitionId.toPathString());
if (replicaId == null) {
logger.error("{} doesn't exist on current node", partitionId);
return ServerErrorCode.Partition_Unknown;
}
// Attempt to remove replica from stats manager. If replica doesn't exist, log info but don't fail the request
statsManager.removeReplica(replicaId);
// Attempt to remove replica from replication manager. If replica doesn't exist, log info but don't fail the request
((ReplicationManager) replicationEngine).removeReplica(replicaId);
Store store = ((StorageManager) storeManager).getStore(partitionId, true);
// Attempt to remove store from storage manager.
if (storeManager.removeBlobStore(partitionId) && store != null) {
((BlobStore) store).deleteStoreFiles();
for (ReplicaStatusDelegate replicaStatusDelegate : ((BlobStore) store).getReplicaStatusDelegates()) {
// Remove store from sealed and stopped list (if present)
logger.info("Removing store from sealed and stopped list(if present)");
replicaStatusDelegate.unseal(replicaId);
replicaStatusDelegate.unmarkStopped(Collections.singletonList(replicaId));
}
} else {
errorCode = ServerErrorCode.Unknown_Error;
}
return errorCode;
}
use of com.github.ambry.store.StorageManager in project ambry by linkedin.
the class ReplicationTest method replicaFromStandbyToInactiveTest.
/**
* Test STANDBY -> INACTIVE transition on existing replica (both success and failure cases)
*/
@Test
public void replicaFromStandbyToInactiveTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
MockHelixParticipant.metricRegistry = new MetricRegistry();
MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
// get an existing partition to test both success and failure cases
PartitionId existingPartition = replicationManager.partitionToPartitionInfo.keySet().iterator().next();
storageManager.shutdownBlobStore(existingPartition);
try {
mockHelixParticipant.onPartitionBecomeInactiveFromStandby(existingPartition.toPathString());
fail("should fail because store is not started");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
}
// restart the store and trigger Standby-To-Inactive transition again
storageManager.startBlobStore(existingPartition);
// write a blob with size = 100 into local store (end offset of last PUT = 100 + 18 = 118)
Store localStore = storageManager.getStore(existingPartition);
MockId id = new MockId(TestUtils.getRandomString(10), Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM));
long crc = (new Random()).nextLong();
long blobSize = 100;
MessageInfo info = new MessageInfo(id, blobSize, false, false, Utils.Infinite_Time, crc, id.getAccountId(), id.getContainerId(), Utils.Infinite_Time);
List<MessageInfo> infos = new ArrayList<>();
List<ByteBuffer> buffers = new ArrayList<>();
ByteBuffer buffer = ByteBuffer.wrap(TestUtils.getRandomBytes((int) blobSize));
infos.add(info);
buffers.add(buffer);
localStore.put(new MockMessageWriteSet(infos, buffers));
ReplicaId localReplica = storageManager.getReplica(existingPartition.toPathString());
// override partition state change listener in ReplicationManager to help thread manipulation
mockHelixParticipant.registerPartitionStateChangeListener(StateModelListenerType.ReplicationManagerListener, replicationManager.replicationListener);
CountDownLatch participantLatch = new CountDownLatch(1);
replicationManager.listenerExecutionLatch = new CountDownLatch(1);
// create a new thread and trigger STANDBY -> INACTIVE transition
Utils.newThread(() -> {
mockHelixParticipant.onPartitionBecomeInactiveFromStandby(existingPartition.toPathString());
participantLatch.countDown();
}, false).start();
assertTrue("Partition state change listener didn't get called within 1 sec", replicationManager.listenerExecutionLatch.await(1, TimeUnit.SECONDS));
assertEquals("Local store state should be INACTIVE", ReplicaState.INACTIVE, storageManager.getStore(existingPartition).getCurrentState());
List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(existingPartition).getRemoteReplicaInfos();
ReplicaId peerReplica1 = remoteReplicaInfos.get(0).getReplicaId();
assertFalse("Sync up should not complete because not enough replicas have caught up", mockHelixParticipant.getReplicaSyncUpManager().updateReplicaLagAndCheckSyncStatus(localReplica, peerReplica1, 10L, ReplicaState.INACTIVE));
// pick another remote replica to update the replication lag
ReplicaId peerReplica2 = remoteReplicaInfos.get(1).getReplicaId();
replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica1.getDataNodeId().getHostname(), peerReplica1.getReplicaPath(), 118);
assertFalse("Sync up shouldn't complete because only one replica has caught up with local replica", mockHelixParticipant.getReplicaSyncUpManager().isSyncUpComplete(localReplica));
// make second peer replica catch up with last PUT in local store
replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica2.getDataNodeId().getHostname(), peerReplica2.getReplicaPath(), 118);
assertTrue("Standby-To-Inactive transition didn't complete within 1 sec", participantLatch.await(1, TimeUnit.SECONDS));
// we purposely update lag against local replica to verify local replica is no longer in ReplicaSyncUpManager because
// deactivation is complete and local replica should be removed from "replicaToLagInfos" map.
assertFalse("Sync up should complete (2 replicas have caught up), hence updated should be false", mockHelixParticipant.getReplicaSyncUpManager().updateReplicaLagAndCheckSyncStatus(localReplica, peerReplica2, 0L, ReplicaState.INACTIVE));
storageManager.shutdown();
}
use of com.github.ambry.store.StorageManager in project ambry by linkedin.
the class ReplicationTest method replicaFromBootstrapToStandbyTest.
/**
* Test BOOTSTRAP -> STANDBY transition on both existing and new replicas. For new replica, we test both failure and
* success cases.
* @throws Exception
*/
@Test
public void replicaFromBootstrapToStandbyTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
MockHelixParticipant.metricRegistry = new MetricRegistry();
MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
// 1. test existing partition trough Bootstrap-To-Standby transition, should be no op.
PartitionId existingPartition = replicationManager.partitionToPartitionInfo.keySet().iterator().next();
mockHelixParticipant.onPartitionBecomeStandbyFromBootstrap(existingPartition.toPathString());
assertEquals("Store state doesn't match", ReplicaState.STANDBY, storageManager.getStore(existingPartition).getCurrentState());
// 2. test transition failure due to store not started
storageManager.shutdownBlobStore(existingPartition);
try {
mockHelixParticipant.onPartitionBecomeStandbyFromBootstrap(existingPartition.toPathString());
fail("should fail because store is not started");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
}
// 3. create new replica and add it into storage manager, test replica that needs to initiate bootstrap
ReplicaId newReplicaToAdd = getNewReplicaToAdd(clusterMap);
assertTrue("Adding new replica to Storage Manager should succeed", storageManager.addBlobStore(newReplicaToAdd));
// override partition state change listener in ReplicationManager to help thread manipulation
mockHelixParticipant.registerPartitionStateChangeListener(StateModelListenerType.ReplicationManagerListener, replicationManager.replicationListener);
CountDownLatch participantLatch = new CountDownLatch(1);
replicationManager.listenerExecutionLatch = new CountDownLatch(1);
// create a new thread and trigger BOOTSTRAP -> STANDBY transition
Utils.newThread(() -> {
mockHelixParticipant.onPartitionBecomeStandbyFromBootstrap(newReplicaToAdd.getPartitionId().toPathString());
participantLatch.countDown();
}, false).start();
assertTrue("Partition state change listener in ReplicationManager didn't get called within 1 sec", replicationManager.listenerExecutionLatch.await(1, TimeUnit.SECONDS));
assertEquals("Replica should be in BOOTSTRAP state before bootstrap is complete", ReplicaState.BOOTSTRAP, storageManager.getStore(newReplicaToAdd.getPartitionId()).getCurrentState());
// make bootstrap succeed
mockHelixParticipant.getReplicaSyncUpManager().onBootstrapComplete(newReplicaToAdd);
assertTrue("Bootstrap-To-Standby transition didn't complete within 1 sec", participantLatch.await(1, TimeUnit.SECONDS));
storageManager.shutdown();
}
use of com.github.ambry.store.StorageManager in project ambry by linkedin.
the class ReplicationTest method addAndRemoveReplicaTest.
/**
* Test dynamically add/remove replica in {@link ReplicationManager}
* @throws Exception
*/
@Test
public void addAndRemoveReplicaTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
StoreConfig storeConfig = new StoreConfig(verifiableProperties);
DataNodeId dataNodeId = clusterMap.getDataNodeIds().get(0);
MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
storeKeyConverterFactory.setConversionMap(new HashMap<>());
StorageManager storageManager = new StorageManager(storeConfig, new DiskManagerConfig(verifiableProperties), Utils.newScheduler(1, true), new MetricRegistry(), null, clusterMap, dataNodeId, null, null, new MockTime(), null, new InMemAccountService(false, false));
storageManager.start();
MockReplicationManager replicationManager = new MockReplicationManager(replicationConfig, clusterMapConfig, storeConfig, storageManager, clusterMap, dataNodeId, storeKeyConverterFactory, null);
ReplicaId replicaToTest = clusterMap.getReplicaIds(dataNodeId).get(0);
// Attempting to add replica that already exists should fail
assertFalse("Adding an existing replica should fail", replicationManager.addReplica(replicaToTest));
// Create a brand new replica that sits on one of the disk of datanode, add it into replication manager
PartitionId newPartition = clusterMap.createNewPartition(clusterMap.getDataNodes());
for (ReplicaId replicaId : newPartition.getReplicaIds()) {
if (replicaId.getDataNodeId() == dataNodeId) {
replicaToTest = replicaId;
break;
}
}
// Before adding replica, partitionToPartitionInfo and mountPathToPartitionInfos should not contain new partition
assertFalse("partitionToPartitionInfo should not contain new partition", replicationManager.getPartitionToPartitionInfoMap().containsKey(newPartition));
for (PartitionInfo partitionInfo : replicationManager.getMountPathToPartitionInfosMap().get(replicaToTest.getMountPath())) {
assertNotSame("mountPathToPartitionInfos should not contain new partition", partitionInfo.getPartitionId(), newPartition);
}
// Add new replica to replication manager
assertTrue("Adding new replica to replication manager should succeed", replicationManager.addReplica(replicaToTest));
// After adding replica, partitionToPartitionInfo and mountPathToPartitionInfos should contain new partition
assertTrue("partitionToPartitionInfo should contain new partition", replicationManager.getPartitionToPartitionInfoMap().containsKey(newPartition));
Optional<PartitionInfo> newPartitionInfo = replicationManager.getMountPathToPartitionInfosMap().get(replicaToTest.getMountPath()).stream().filter(partitionInfo -> partitionInfo.getPartitionId() == newPartition).findAny();
assertTrue("mountPathToPartitionInfos should contain new partition info", newPartitionInfo.isPresent());
// Verify that all remoteReplicaInfos of new added replica have assigned thread
for (RemoteReplicaInfo remoteReplicaInfo : newPartitionInfo.get().getRemoteReplicaInfos()) {
assertNotNull("The remote replica should be assigned to one replica thread", remoteReplicaInfo.getReplicaThread());
}
// Remove replica
assertTrue("Remove replica from replication manager should succeed", replicationManager.removeReplica(replicaToTest));
// Verify replica is removed, so partitionToPartitionInfo and mountPathToPartitionInfos should not contain new partition
assertFalse("partitionToPartitionInfo should not contain new partition", replicationManager.getPartitionToPartitionInfoMap().containsKey(newPartition));
for (PartitionInfo partitionInfo : replicationManager.getMountPathToPartitionInfosMap().get(replicaToTest.getMountPath())) {
assertNotSame("mountPathToPartitionInfos should not contain new partition", partitionInfo.getPartitionId(), newPartition);
}
// Verify that none of remoteReplicaInfo should have assigned thread
for (RemoteReplicaInfo remoteReplicaInfo : newPartitionInfo.get().getRemoteReplicaInfos()) {
assertNull("The remote replica should be assigned to one replica thread", remoteReplicaInfo.getReplicaThread());
}
// Remove the same replica that doesn't exist should be no-op
ReplicationManager mockManager = Mockito.spy(replicationManager);
assertFalse("Remove non-existent replica should return false", replicationManager.removeReplica(replicaToTest));
verify(mockManager, never()).removeRemoteReplicaInfoFromReplicaThread(anyList());
storageManager.shutdown();
}
Aggregations