use of io.pravega.controller.server.health.SegmentContainerMonitorHealthContributor in project pravega by pravega.
the class ControllerServiceStarter method startUp.
@Override
protected void startUp() {
long traceId = LoggerHelpers.traceEnterWithContext(log, this.objectId, "startUp");
log.info("Initiating controller service startUp");
log.info("Controller serviceConfig = {}", serviceConfig.toString());
log.info("Event processors enabled = {}", serviceConfig.getEventProcessorConfig().isPresent());
log.info("Cluster listener enabled = {}", serviceConfig.isControllerClusterListenerEnabled());
log.info(" Host monitor enabled = {}", serviceConfig.getHostMonitorConfig().isHostMonitorEnabled());
log.info(" gRPC server enabled = {}", serviceConfig.getGRPCServerConfig().isPresent());
log.info(" REST server enabled = {}", serviceConfig.getRestServerConfig().isPresent());
final BucketStore bucketStore;
final TaskMetadataStore taskMetadataStore;
final HostControllerStore hostStore;
final CheckpointStore checkpointStore;
try {
// Initialize the executor service.
controllerExecutor = ExecutorServiceHelpers.newScheduledThreadPool(serviceConfig.getThreadPoolSize(), "controllerpool");
eventExecutor = ExecutorServiceHelpers.newScheduledThreadPool(serviceConfig.getThreadPoolSize(), "eventprocessor");
retentionExecutor = ExecutorServiceHelpers.newScheduledThreadPool(Config.RETENTION_THREAD_POOL_SIZE, "retentionpool");
watermarkingExecutor = ExecutorServiceHelpers.newScheduledThreadPool(Config.WATERMARKING_THREAD_POOL_SIZE, "watermarkingpool");
bucketStore = StreamStoreFactory.createBucketStore(storeClient, controllerExecutor);
log.info("Created the bucket store.");
taskMetadataStore = TaskStoreFactory.createStore(storeClient, controllerExecutor);
log.info("Created the task store.");
hostStore = HostStoreFactory.createStore(serviceConfig.getHostMonitorConfig(), storeClient);
log.info("Created the host store.");
checkpointStore = CheckpointStoreFactory.create(storeClient);
log.info("Created the checkpoint store.");
// Initialize Stream and Transaction metrics.
StreamMetrics.initialize();
TransactionMetrics.initialize();
// On each controller process restart, we use a fresh hostId,
// which is a combination of hostname and random GUID.
String hostName = getHostName();
Host host = new Host(hostName, getPort(), UUID.randomUUID().toString());
// Create a RequestTracker instance to trace client requests end-to-end.
GRPCServerConfig grpcServerConfig = serviceConfig.getGRPCServerConfig().get();
RequestTracker requestTracker = new RequestTracker(grpcServerConfig.isRequestTracingEnabled());
// Create a Health Service Manager instance.
healthServiceManager = new HealthServiceManager(serviceConfig.getHealthCheckFrequency());
if (serviceConfig.getHostMonitorConfig().isHostMonitorEnabled()) {
// Start the Segment Container Monitor.
monitor = new SegmentContainerMonitor(hostStore, (CuratorFramework) storeClient.getClient(), new UniformContainerBalancer(), serviceConfig.getHostMonitorConfig().getHostMonitorMinRebalanceInterval());
monitor.startAsync();
log.info("Started Segment Container Monitor service.");
SegmentContainerMonitorHealthContributor segmentContainerMonitorHC = new SegmentContainerMonitorHealthContributor("segmentContainerMonitor", monitor);
healthServiceManager.register(segmentContainerMonitorHC);
}
// This client config is used by the segment store helper (SegmentHelper) to connect to the segment store.
ClientConfig.ClientConfigBuilder clientConfigBuilder = ClientConfig.builder().controllerURI(URI.create((grpcServerConfig.isTlsEnabled() ? "tls://" : "tcp://") + "localhost:" + grpcServerConfig.getPort())).trustStore(grpcServerConfig.getTlsTrustStore()).validateHostName(false);
Optional<Boolean> tlsEnabledForSegmentStore = BooleanUtils.extract(serviceConfig.getTlsEnabledForSegmentStore());
if (tlsEnabledForSegmentStore.isPresent()) {
clientConfigBuilder.enableTlsToSegmentStore(tlsEnabledForSegmentStore.get());
}
// Use one connection per Segment Store to save up resources.
ClientConfig clientConfig = clientConfigBuilder.maxConnectionsPerSegmentStore(1).build();
connectionFactory = connectionFactoryRef.orElseGet(() -> new SocketConnectionFactoryImpl(clientConfig));
connectionPool = new ConnectionPoolImpl(clientConfig, connectionFactory);
segmentHelper = segmentHelperRef.orElseGet(() -> new SegmentHelper(connectionPool, hostStore, controllerExecutor));
GrpcAuthHelper authHelper = new GrpcAuthHelper(serviceConfig.getGRPCServerConfig().get().isAuthorizationEnabled(), grpcServerConfig.getTokenSigningKey(), grpcServerConfig.getAccessTokenTTLInSeconds());
streamStore = streamMetadataStoreRef.orElseGet(() -> StreamStoreFactory.createStore(storeClient, segmentHelper, authHelper, controllerExecutor));
log.info("Created the stream store.");
streamMetadataTasks = new StreamMetadataTasks(streamStore, bucketStore, taskMetadataStore, segmentHelper, controllerExecutor, eventExecutor, host.getHostId(), authHelper, serviceConfig.getRetentionFrequency().toMillis());
streamTransactionMetadataTasks = new StreamTransactionMetadataTasks(streamStore, segmentHelper, controllerExecutor, eventExecutor, host.getHostId(), serviceConfig.getTimeoutServiceConfig(), authHelper);
BucketServiceFactory bucketServiceFactory = new BucketServiceFactory(host.getHostId(), bucketStore, 1000);
Duration executionDurationRetention = serviceConfig.getRetentionFrequency();
PeriodicRetention retentionWork = new PeriodicRetention(streamStore, streamMetadataTasks, retentionExecutor, requestTracker);
retentionService = bucketServiceFactory.createRetentionService(executionDurationRetention, retentionWork::retention, retentionExecutor);
retentionService.startAsync();
retentionService.awaitRunning();
log.info("Started background periodic service for Retention.");
RetentionServiceHealthContributor retentionServiceHC = new RetentionServiceHealthContributor("retentionService", retentionService);
healthServiceManager.register(retentionServiceHC);
Duration executionDurationWatermarking = Duration.ofSeconds(Config.MINIMUM_WATERMARKING_FREQUENCY_IN_SECONDS);
watermarkingWork = new PeriodicWatermarking(streamStore, bucketStore, clientConfig, watermarkingExecutor, requestTracker);
watermarkingService = bucketServiceFactory.createWatermarkingService(executionDurationWatermarking, watermarkingWork::watermark, watermarkingExecutor);
watermarkingService.startAsync();
watermarkingService.awaitRunning();
log.info("Started background periodic service for Watermarking.");
WatermarkingServiceHealthContributor watermarkingServiceHC = new WatermarkingServiceHealthContributor("watermarkingService", watermarkingService);
healthServiceManager.register(watermarkingServiceHC);
// Controller has a mechanism to track the currently active controller host instances. On detecting a failure of
// any controller instance, the failure detector stores the failed HostId in a failed hosts directory (FH), and
// invokes the taskSweeper.sweepOrphanedTasks for each failed host. When all resources under the failed hostId
// are processed and deleted, that failed HostId is removed from FH folder.
// Moreover, on controller process startup, it detects any hostIds not in the currently active set of
// controllers and starts sweeping tasks orphaned by those hostIds.
TaskSweeper taskSweeper = new TaskSweeper(taskMetadataStore, host.getHostId(), controllerExecutor, streamMetadataTasks);
TxnSweeper txnSweeper = new TxnSweeper(streamStore, streamTransactionMetadataTasks, serviceConfig.getTimeoutServiceConfig().getMaxLeaseValue(), controllerExecutor);
RequestSweeper requestSweeper = new RequestSweeper(streamStore, controllerExecutor, streamMetadataTasks);
if (serviceConfig.isControllerClusterListenerEnabled()) {
cluster = new ClusterZKImpl((CuratorFramework) storeClient.getClient(), ClusterType.CONTROLLER);
}
kvtMetadataStore = kvtMetaStoreRef.orElseGet(() -> KVTableStoreFactory.createStore(storeClient, segmentHelper, authHelper, controllerExecutor, streamStore));
kvtMetadataTasks = new TableMetadataTasks(kvtMetadataStore, segmentHelper, controllerExecutor, eventExecutor, host.getHostId(), authHelper);
controllerService = new ControllerService(kvtMetadataStore, kvtMetadataTasks, streamStore, bucketStore, streamMetadataTasks, streamTransactionMetadataTasks, segmentHelper, controllerExecutor, cluster, requestTracker);
// Setup event processors.
setController(new LocalController(controllerService, grpcServerConfig.isAuthorizationEnabled(), grpcServerConfig.getTokenSigningKey()));
CompletableFuture<Void> eventProcessorFuture = CompletableFuture.completedFuture(null);
if (serviceConfig.getEventProcessorConfig().isPresent()) {
// Create ControllerEventProcessor object.
controllerEventProcessors = new ControllerEventProcessors(host.getHostId(), serviceConfig.getEventProcessorConfig().get(), localController, checkpointStore, streamStore, bucketStore, connectionPool, streamMetadataTasks, streamTransactionMetadataTasks, kvtMetadataStore, kvtMetadataTasks, eventExecutor);
// Bootstrap and start it asynchronously.
eventProcessorFuture = controllerEventProcessors.bootstrap(streamTransactionMetadataTasks, streamMetadataTasks, kvtMetadataTasks).thenAcceptAsync(x -> controllerEventProcessors.startAsync(), eventExecutor);
EventProcessorHealthContributor eventProcessorHC = new EventProcessorHealthContributor("eventProcessor", controllerEventProcessors);
healthServiceManager.register(eventProcessorHC);
}
// Setup and start controller cluster listener after all sweepers have been initialized.
if (serviceConfig.isControllerClusterListenerEnabled()) {
List<FailoverSweeper> failoverSweepers = new ArrayList<>();
failoverSweepers.add(taskSweeper);
failoverSweepers.add(txnSweeper);
failoverSweepers.add(requestSweeper);
if (serviceConfig.getEventProcessorConfig().isPresent()) {
assert controllerEventProcessors != null;
failoverSweepers.add(controllerEventProcessors);
}
controllerClusterListener = new ControllerClusterListener(host, cluster, controllerExecutor, failoverSweepers);
controllerClusterListener.startAsync();
ClusterListenerHealthContributor clusterListenerHC = new ClusterListenerHealthContributor("clusterListener", controllerClusterListener);
healthServiceManager.register(clusterListenerHC);
}
// Start the Health Service.
healthServiceManager.start();
// Start RPC server.
if (serviceConfig.getGRPCServerConfig().isPresent()) {
grpcServer = new GRPCServer(controllerService, grpcServerConfig, requestTracker);
grpcServer.startAsync();
grpcServer.awaitRunning();
GRPCServerHealthContributor grpcServerHC = new GRPCServerHealthContributor("GRPCServer", grpcServer);
healthServiceManager.register(grpcServerHC);
}
// Start REST server.
if (serviceConfig.getRestServerConfig().isPresent()) {
List<Object> resources = new ArrayList<>();
resources.add(new StreamMetadataResourceImpl(this.localController, controllerService, grpcServer.getAuthHandlerManager(), connectionFactory, clientConfig));
resources.add(new HealthImpl(grpcServer.getAuthHandlerManager(), healthServiceManager.getEndpoint()));
resources.add(new PingImpl());
MetricsProvider.getMetricsProvider().prometheusResource().ifPresent(resources::add);
restServer = new RESTServer(serviceConfig.getRestServerConfig().get(), Set.copyOf(resources));
restServer.startAsync();
restServer.awaitRunning();
}
// Wait for controller event processors to start.
if (serviceConfig.getEventProcessorConfig().isPresent()) {
// if store client has failed because of session expiration, there are two possibilities where
// controllerEventProcessors.awaitRunning may be stuck forever -
// 1. stream creation is retried indefinitely and cannot complete because of zk session expiration
// 2. event writer after stream creation throws exception.
// In both of above cases controllerEventProcessors.startAsync may not get called.
CompletableFuture.anyOf(storeClientFailureFuture, eventProcessorFuture.thenAccept(x -> controllerEventProcessors.awaitRunning())).join();
}
// Wait for controller cluster listeners to start.
if (serviceConfig.isControllerClusterListenerEnabled()) {
controllerClusterListener.awaitRunning();
}
} catch (Exception e) {
log.error("Failed trying to start controller services", e);
throw e;
} finally {
LoggerHelpers.traceLeave(log, this.objectId, "startUp", traceId);
}
}
Aggregations