use of io.pravega.shared.health.HealthServiceManager in project pravega by pravega.
the class ControllerServiceStarter method startUp.
@Override
protected void startUp() {
long traceId = LoggerHelpers.traceEnterWithContext(log, this.objectId, "startUp");
log.info("Initiating controller service startUp");
log.info("Controller serviceConfig = {}", serviceConfig.toString());
log.info("Event processors enabled = {}", serviceConfig.getEventProcessorConfig().isPresent());
log.info("Cluster listener enabled = {}", serviceConfig.isControllerClusterListenerEnabled());
log.info(" Host monitor enabled = {}", serviceConfig.getHostMonitorConfig().isHostMonitorEnabled());
log.info(" gRPC server enabled = {}", serviceConfig.getGRPCServerConfig().isPresent());
log.info(" REST server enabled = {}", serviceConfig.getRestServerConfig().isPresent());
final BucketStore bucketStore;
final TaskMetadataStore taskMetadataStore;
final HostControllerStore hostStore;
final CheckpointStore checkpointStore;
try {
// Initialize the executor service.
controllerExecutor = ExecutorServiceHelpers.newScheduledThreadPool(serviceConfig.getThreadPoolSize(), "controllerpool");
eventExecutor = ExecutorServiceHelpers.newScheduledThreadPool(serviceConfig.getThreadPoolSize(), "eventprocessor");
retentionExecutor = ExecutorServiceHelpers.newScheduledThreadPool(Config.RETENTION_THREAD_POOL_SIZE, "retentionpool");
watermarkingExecutor = ExecutorServiceHelpers.newScheduledThreadPool(Config.WATERMARKING_THREAD_POOL_SIZE, "watermarkingpool");
bucketStore = StreamStoreFactory.createBucketStore(storeClient, controllerExecutor);
log.info("Created the bucket store.");
taskMetadataStore = TaskStoreFactory.createStore(storeClient, controllerExecutor);
log.info("Created the task store.");
hostStore = HostStoreFactory.createStore(serviceConfig.getHostMonitorConfig(), storeClient);
log.info("Created the host store.");
checkpointStore = CheckpointStoreFactory.create(storeClient);
log.info("Created the checkpoint store.");
// Initialize Stream and Transaction metrics.
StreamMetrics.initialize();
TransactionMetrics.initialize();
// On each controller process restart, we use a fresh hostId,
// which is a combination of hostname and random GUID.
String hostName = getHostName();
Host host = new Host(hostName, getPort(), UUID.randomUUID().toString());
// Create a RequestTracker instance to trace client requests end-to-end.
GRPCServerConfig grpcServerConfig = serviceConfig.getGRPCServerConfig().get();
RequestTracker requestTracker = new RequestTracker(grpcServerConfig.isRequestTracingEnabled());
// Create a Health Service Manager instance.
healthServiceManager = new HealthServiceManager(serviceConfig.getHealthCheckFrequency());
if (serviceConfig.getHostMonitorConfig().isHostMonitorEnabled()) {
// Start the Segment Container Monitor.
monitor = new SegmentContainerMonitor(hostStore, (CuratorFramework) storeClient.getClient(), new UniformContainerBalancer(), serviceConfig.getHostMonitorConfig().getHostMonitorMinRebalanceInterval());
monitor.startAsync();
log.info("Started Segment Container Monitor service.");
SegmentContainerMonitorHealthContributor segmentContainerMonitorHC = new SegmentContainerMonitorHealthContributor("segmentContainerMonitor", monitor);
healthServiceManager.register(segmentContainerMonitorHC);
}
// This client config is used by the segment store helper (SegmentHelper) to connect to the segment store.
ClientConfig.ClientConfigBuilder clientConfigBuilder = ClientConfig.builder().controllerURI(URI.create((grpcServerConfig.isTlsEnabled() ? "tls://" : "tcp://") + "localhost:" + grpcServerConfig.getPort())).trustStore(grpcServerConfig.getTlsTrustStore()).validateHostName(false);
Optional<Boolean> tlsEnabledForSegmentStore = BooleanUtils.extract(serviceConfig.getTlsEnabledForSegmentStore());
if (tlsEnabledForSegmentStore.isPresent()) {
clientConfigBuilder.enableTlsToSegmentStore(tlsEnabledForSegmentStore.get());
}
// Use one connection per Segment Store to save up resources.
ClientConfig clientConfig = clientConfigBuilder.maxConnectionsPerSegmentStore(1).build();
connectionFactory = connectionFactoryRef.orElseGet(() -> new SocketConnectionFactoryImpl(clientConfig));
connectionPool = new ConnectionPoolImpl(clientConfig, connectionFactory);
segmentHelper = segmentHelperRef.orElseGet(() -> new SegmentHelper(connectionPool, hostStore, controllerExecutor));
GrpcAuthHelper authHelper = new GrpcAuthHelper(serviceConfig.getGRPCServerConfig().get().isAuthorizationEnabled(), grpcServerConfig.getTokenSigningKey(), grpcServerConfig.getAccessTokenTTLInSeconds());
streamStore = streamMetadataStoreRef.orElseGet(() -> StreamStoreFactory.createStore(storeClient, segmentHelper, authHelper, controllerExecutor));
log.info("Created the stream store.");
streamMetadataTasks = new StreamMetadataTasks(streamStore, bucketStore, taskMetadataStore, segmentHelper, controllerExecutor, eventExecutor, host.getHostId(), authHelper, serviceConfig.getRetentionFrequency().toMillis());
streamTransactionMetadataTasks = new StreamTransactionMetadataTasks(streamStore, segmentHelper, controllerExecutor, eventExecutor, host.getHostId(), serviceConfig.getTimeoutServiceConfig(), authHelper);
BucketServiceFactory bucketServiceFactory = new BucketServiceFactory(host.getHostId(), bucketStore, 1000);
Duration executionDurationRetention = serviceConfig.getRetentionFrequency();
PeriodicRetention retentionWork = new PeriodicRetention(streamStore, streamMetadataTasks, retentionExecutor, requestTracker);
retentionService = bucketServiceFactory.createRetentionService(executionDurationRetention, retentionWork::retention, retentionExecutor);
retentionService.startAsync();
retentionService.awaitRunning();
log.info("Started background periodic service for Retention.");
RetentionServiceHealthContributor retentionServiceHC = new RetentionServiceHealthContributor("retentionService", retentionService);
healthServiceManager.register(retentionServiceHC);
Duration executionDurationWatermarking = Duration.ofSeconds(Config.MINIMUM_WATERMARKING_FREQUENCY_IN_SECONDS);
watermarkingWork = new PeriodicWatermarking(streamStore, bucketStore, clientConfig, watermarkingExecutor, requestTracker);
watermarkingService = bucketServiceFactory.createWatermarkingService(executionDurationWatermarking, watermarkingWork::watermark, watermarkingExecutor);
watermarkingService.startAsync();
watermarkingService.awaitRunning();
log.info("Started background periodic service for Watermarking.");
WatermarkingServiceHealthContributor watermarkingServiceHC = new WatermarkingServiceHealthContributor("watermarkingService", watermarkingService);
healthServiceManager.register(watermarkingServiceHC);
// Controller has a mechanism to track the currently active controller host instances. On detecting a failure of
// any controller instance, the failure detector stores the failed HostId in a failed hosts directory (FH), and
// invokes the taskSweeper.sweepOrphanedTasks for each failed host. When all resources under the failed hostId
// are processed and deleted, that failed HostId is removed from FH folder.
// Moreover, on controller process startup, it detects any hostIds not in the currently active set of
// controllers and starts sweeping tasks orphaned by those hostIds.
TaskSweeper taskSweeper = new TaskSweeper(taskMetadataStore, host.getHostId(), controllerExecutor, streamMetadataTasks);
TxnSweeper txnSweeper = new TxnSweeper(streamStore, streamTransactionMetadataTasks, serviceConfig.getTimeoutServiceConfig().getMaxLeaseValue(), controllerExecutor);
RequestSweeper requestSweeper = new RequestSweeper(streamStore, controllerExecutor, streamMetadataTasks);
if (serviceConfig.isControllerClusterListenerEnabled()) {
cluster = new ClusterZKImpl((CuratorFramework) storeClient.getClient(), ClusterType.CONTROLLER);
}
kvtMetadataStore = kvtMetaStoreRef.orElseGet(() -> KVTableStoreFactory.createStore(storeClient, segmentHelper, authHelper, controllerExecutor, streamStore));
kvtMetadataTasks = new TableMetadataTasks(kvtMetadataStore, segmentHelper, controllerExecutor, eventExecutor, host.getHostId(), authHelper);
controllerService = new ControllerService(kvtMetadataStore, kvtMetadataTasks, streamStore, bucketStore, streamMetadataTasks, streamTransactionMetadataTasks, segmentHelper, controllerExecutor, cluster, requestTracker);
// Setup event processors.
setController(new LocalController(controllerService, grpcServerConfig.isAuthorizationEnabled(), grpcServerConfig.getTokenSigningKey()));
CompletableFuture<Void> eventProcessorFuture = CompletableFuture.completedFuture(null);
if (serviceConfig.getEventProcessorConfig().isPresent()) {
// Create ControllerEventProcessor object.
controllerEventProcessors = new ControllerEventProcessors(host.getHostId(), serviceConfig.getEventProcessorConfig().get(), localController, checkpointStore, streamStore, bucketStore, connectionPool, streamMetadataTasks, streamTransactionMetadataTasks, kvtMetadataStore, kvtMetadataTasks, eventExecutor);
// Bootstrap and start it asynchronously.
eventProcessorFuture = controllerEventProcessors.bootstrap(streamTransactionMetadataTasks, streamMetadataTasks, kvtMetadataTasks).thenAcceptAsync(x -> controllerEventProcessors.startAsync(), eventExecutor);
EventProcessorHealthContributor eventProcessorHC = new EventProcessorHealthContributor("eventProcessor", controllerEventProcessors);
healthServiceManager.register(eventProcessorHC);
}
// Setup and start controller cluster listener after all sweepers have been initialized.
if (serviceConfig.isControllerClusterListenerEnabled()) {
List<FailoverSweeper> failoverSweepers = new ArrayList<>();
failoverSweepers.add(taskSweeper);
failoverSweepers.add(txnSweeper);
failoverSweepers.add(requestSweeper);
if (serviceConfig.getEventProcessorConfig().isPresent()) {
assert controllerEventProcessors != null;
failoverSweepers.add(controllerEventProcessors);
}
controllerClusterListener = new ControllerClusterListener(host, cluster, controllerExecutor, failoverSweepers);
controllerClusterListener.startAsync();
ClusterListenerHealthContributor clusterListenerHC = new ClusterListenerHealthContributor("clusterListener", controllerClusterListener);
healthServiceManager.register(clusterListenerHC);
}
// Start the Health Service.
healthServiceManager.start();
// Start RPC server.
if (serviceConfig.getGRPCServerConfig().isPresent()) {
grpcServer = new GRPCServer(controllerService, grpcServerConfig, requestTracker);
grpcServer.startAsync();
grpcServer.awaitRunning();
GRPCServerHealthContributor grpcServerHC = new GRPCServerHealthContributor("GRPCServer", grpcServer);
healthServiceManager.register(grpcServerHC);
}
// Start REST server.
if (serviceConfig.getRestServerConfig().isPresent()) {
List<Object> resources = new ArrayList<>();
resources.add(new StreamMetadataResourceImpl(this.localController, controllerService, grpcServer.getAuthHandlerManager(), connectionFactory, clientConfig));
resources.add(new HealthImpl(grpcServer.getAuthHandlerManager(), healthServiceManager.getEndpoint()));
resources.add(new PingImpl());
MetricsProvider.getMetricsProvider().prometheusResource().ifPresent(resources::add);
restServer = new RESTServer(serviceConfig.getRestServerConfig().get(), Set.copyOf(resources));
restServer.startAsync();
restServer.awaitRunning();
}
// Wait for controller event processors to start.
if (serviceConfig.getEventProcessorConfig().isPresent()) {
// if store client has failed because of session expiration, there are two possibilities where
// controllerEventProcessors.awaitRunning may be stuck forever -
// 1. stream creation is retried indefinitely and cannot complete because of zk session expiration
// 2. event writer after stream creation throws exception.
// In both of above cases controllerEventProcessors.startAsync may not get called.
CompletableFuture.anyOf(storeClientFailureFuture, eventProcessorFuture.thenAccept(x -> controllerEventProcessors.awaitRunning())).join();
}
// Wait for controller cluster listeners to start.
if (serviceConfig.isControllerClusterListenerEnabled()) {
controllerClusterListener.awaitRunning();
}
} catch (Exception e) {
log.error("Failed trying to start controller services", e);
throw e;
} finally {
LoggerHelpers.traceLeave(log, this.objectId, "startUp", traceId);
}
}
use of io.pravega.shared.health.HealthServiceManager in project pravega by pravega.
the class ServiceStarter method start.
// endregion
// region Service Operation
public void start() throws Exception {
Exceptions.checkNotClosed(this.closed, this);
healthServiceManager = new HealthServiceManager(serviceConfig.getHealthCheckInterval());
healthServiceManager.start();
log.info("Initializing HealthService ...");
MetricsConfig metricsConfig = builderConfig.getConfig(MetricsConfig::builder);
if (metricsConfig.isEnableStatistics()) {
log.info("Initializing metrics provider ...");
MetricsProvider.initialize(metricsConfig);
statsProvider = MetricsProvider.getMetricsProvider();
statsProvider.start();
}
log.info("Initializing ZooKeeper Client ...");
this.zkClient = createZKClient();
log.info("Initializing Service Builder ...");
this.serviceBuilder.initialize();
log.info("Creating StreamSegmentService ...");
StreamSegmentStore service = this.serviceBuilder.createStreamSegmentService();
log.info("Creating TableStoreService ...");
TableStore tableStoreService = this.serviceBuilder.createTableStoreService();
log.info("Creating Segment Stats recorder ...");
autoScaleMonitor = new AutoScaleMonitor(service, builderConfig.getConfig(AutoScalerConfig::builder));
AutoScalerConfig autoScalerConfig = builderConfig.getConfig(AutoScalerConfig::builder);
TokenVerifierImpl tokenVerifier = null;
if (autoScalerConfig.isAuthEnabled()) {
tokenVerifier = new TokenVerifierImpl(autoScalerConfig.getTokenSigningKey());
}
// Log the configuration
log.info(serviceConfig.toString());
log.info(autoScalerConfig.toString());
this.listener = new PravegaConnectionListener(this.serviceConfig.isEnableTls(), this.serviceConfig.isEnableTlsReload(), this.serviceConfig.getListeningIPAddress(), this.serviceConfig.getListeningPort(), service, tableStoreService, autoScaleMonitor.getStatsRecorder(), autoScaleMonitor.getTableSegmentStatsRecorder(), tokenVerifier, this.serviceConfig.getCertFile(), this.serviceConfig.getKeyFile(), this.serviceConfig.isReplyWithStackTraceOnError(), serviceBuilder.getLowPriorityExecutor(), this.serviceConfig.getTlsProtocolVersion(), healthServiceManager);
this.listener.startListening();
log.info("PravegaConnectionListener started successfully.");
if (serviceConfig.isEnableAdminGateway()) {
this.adminListener = new AdminConnectionListener(this.serviceConfig.isEnableTls(), this.serviceConfig.isEnableTlsReload(), this.serviceConfig.getListeningIPAddress(), this.serviceConfig.getAdminGatewayPort(), service, tableStoreService, tokenVerifier, this.serviceConfig.getCertFile(), this.serviceConfig.getKeyFile(), this.serviceConfig.getTlsProtocolVersion(), healthServiceManager);
this.adminListener.startListening();
log.info("AdminConnectionListener started successfully.");
}
log.info("StreamSegmentService started.");
healthServiceManager.register(new ZKHealthContributor(zkClient));
healthServiceManager.register(new CacheManagerHealthContributor(serviceBuilder.getCacheManager()));
healthServiceManager.register(new SegmentContainerRegistryHealthContributor(serviceBuilder.getSegmentContainerRegistry()));
if (this.serviceConfig.isRestServerEnabled()) {
log.info("Initializing RESTServer ...");
List<Object> resources = new ArrayList<>();
resources.add(new HealthImpl(new AuthHandlerManager(serviceConfig.getRestServerConfig()), healthServiceManager.getEndpoint()));
MetricsProvider.getMetricsProvider().prometheusResource().ifPresent(resources::add);
restServer = new RESTServer(serviceConfig.getRestServerConfig(), Set.copyOf(resources));
restServer.startAsync();
restServer.awaitRunning();
}
}
use of io.pravega.shared.health.HealthServiceManager in project pravega by pravega.
the class PravegaConnectionListenerTest method testHealth.
// Test the health status created with pravega listener.
@Test
public void testHealth() {
@Cleanup HealthServiceManager healthServiceManager = new HealthServiceManager(Duration.ofSeconds(2));
healthServiceManager.start();
int port = TestUtils.getAvailableListenPort();
@Cleanup PravegaConnectionListener listener = new PravegaConnectionListener(false, false, "localhost", port, mock(StreamSegmentStore.class), mock(TableStore.class), SegmentStatsRecorder.noOp(), TableSegmentStatsRecorder.noOp(), new PassingTokenVerifier(), null, null, true, NoOpScheduledExecutor.get(), TLS_PROTOCOL_VERSION.getDefaultValue().split(","), healthServiceManager);
listener.startListening();
Health health = listener.getHealthServiceManager().getHealthSnapshot();
Assert.assertEquals("HealthContributor should report an 'UP' Status.", Status.UP, health.getStatus());
listener.close();
health = listener.getHealthServiceManager().getHealthSnapshot();
Assert.assertEquals("HealthContributor should report an 'DOWN' Status.", Status.DOWN, health.getStatus());
}
use of io.pravega.shared.health.HealthServiceManager in project pravega by pravega.
the class HealthTests method setup.
@Before
public void setup() throws Exception {
serverConfig = getServerConfig();
healthServiceManager = new HealthServiceManager(Duration.ofMillis(100));
restServer = new RESTServer(serverConfig, Set.of(new HealthImpl(null, healthServiceManager.getEndpoint())));
healthServiceManager.start();
restServer.startAsync();
restServer.awaitRunning();
client = createJerseyClient();
}
Aggregations