Search in sources :

Example 1 with SegmentContainerMonitorHealthContributor

use of io.pravega.controller.server.health.SegmentContainerMonitorHealthContributor in project pravega by pravega.

the class ControllerServiceStarter method startUp.

@Override
protected void startUp() {
    long traceId = LoggerHelpers.traceEnterWithContext(log, this.objectId, "startUp");
    log.info("Initiating controller service startUp");
    log.info("Controller serviceConfig = {}", serviceConfig.toString());
    log.info("Event processors enabled = {}", serviceConfig.getEventProcessorConfig().isPresent());
    log.info("Cluster listener enabled = {}", serviceConfig.isControllerClusterListenerEnabled());
    log.info("    Host monitor enabled = {}", serviceConfig.getHostMonitorConfig().isHostMonitorEnabled());
    log.info("     gRPC server enabled = {}", serviceConfig.getGRPCServerConfig().isPresent());
    log.info("     REST server enabled = {}", serviceConfig.getRestServerConfig().isPresent());
    final BucketStore bucketStore;
    final TaskMetadataStore taskMetadataStore;
    final HostControllerStore hostStore;
    final CheckpointStore checkpointStore;
    try {
        // Initialize the executor service.
        controllerExecutor = ExecutorServiceHelpers.newScheduledThreadPool(serviceConfig.getThreadPoolSize(), "controllerpool");
        eventExecutor = ExecutorServiceHelpers.newScheduledThreadPool(serviceConfig.getThreadPoolSize(), "eventprocessor");
        retentionExecutor = ExecutorServiceHelpers.newScheduledThreadPool(Config.RETENTION_THREAD_POOL_SIZE, "retentionpool");
        watermarkingExecutor = ExecutorServiceHelpers.newScheduledThreadPool(Config.WATERMARKING_THREAD_POOL_SIZE, "watermarkingpool");
        bucketStore = StreamStoreFactory.createBucketStore(storeClient, controllerExecutor);
        log.info("Created the bucket store.");
        taskMetadataStore = TaskStoreFactory.createStore(storeClient, controllerExecutor);
        log.info("Created the task store.");
        hostStore = HostStoreFactory.createStore(serviceConfig.getHostMonitorConfig(), storeClient);
        log.info("Created the host store.");
        checkpointStore = CheckpointStoreFactory.create(storeClient);
        log.info("Created the checkpoint store.");
        // Initialize Stream and Transaction metrics.
        StreamMetrics.initialize();
        TransactionMetrics.initialize();
        // On each controller process restart, we use a fresh hostId,
        // which is a combination of hostname and random GUID.
        String hostName = getHostName();
        Host host = new Host(hostName, getPort(), UUID.randomUUID().toString());
        // Create a RequestTracker instance to trace client requests end-to-end.
        GRPCServerConfig grpcServerConfig = serviceConfig.getGRPCServerConfig().get();
        RequestTracker requestTracker = new RequestTracker(grpcServerConfig.isRequestTracingEnabled());
        // Create a Health Service Manager instance.
        healthServiceManager = new HealthServiceManager(serviceConfig.getHealthCheckFrequency());
        if (serviceConfig.getHostMonitorConfig().isHostMonitorEnabled()) {
            // Start the Segment Container Monitor.
            monitor = new SegmentContainerMonitor(hostStore, (CuratorFramework) storeClient.getClient(), new UniformContainerBalancer(), serviceConfig.getHostMonitorConfig().getHostMonitorMinRebalanceInterval());
            monitor.startAsync();
            log.info("Started Segment Container Monitor service.");
            SegmentContainerMonitorHealthContributor segmentContainerMonitorHC = new SegmentContainerMonitorHealthContributor("segmentContainerMonitor", monitor);
            healthServiceManager.register(segmentContainerMonitorHC);
        }
        // This client config is used by the segment store helper (SegmentHelper) to connect to the segment store.
        ClientConfig.ClientConfigBuilder clientConfigBuilder = ClientConfig.builder().controllerURI(URI.create((grpcServerConfig.isTlsEnabled() ? "tls://" : "tcp://") + "localhost:" + grpcServerConfig.getPort())).trustStore(grpcServerConfig.getTlsTrustStore()).validateHostName(false);
        Optional<Boolean> tlsEnabledForSegmentStore = BooleanUtils.extract(serviceConfig.getTlsEnabledForSegmentStore());
        if (tlsEnabledForSegmentStore.isPresent()) {
            clientConfigBuilder.enableTlsToSegmentStore(tlsEnabledForSegmentStore.get());
        }
        // Use one connection per Segment Store to save up resources.
        ClientConfig clientConfig = clientConfigBuilder.maxConnectionsPerSegmentStore(1).build();
        connectionFactory = connectionFactoryRef.orElseGet(() -> new SocketConnectionFactoryImpl(clientConfig));
        connectionPool = new ConnectionPoolImpl(clientConfig, connectionFactory);
        segmentHelper = segmentHelperRef.orElseGet(() -> new SegmentHelper(connectionPool, hostStore, controllerExecutor));
        GrpcAuthHelper authHelper = new GrpcAuthHelper(serviceConfig.getGRPCServerConfig().get().isAuthorizationEnabled(), grpcServerConfig.getTokenSigningKey(), grpcServerConfig.getAccessTokenTTLInSeconds());
        streamStore = streamMetadataStoreRef.orElseGet(() -> StreamStoreFactory.createStore(storeClient, segmentHelper, authHelper, controllerExecutor));
        log.info("Created the stream store.");
        streamMetadataTasks = new StreamMetadataTasks(streamStore, bucketStore, taskMetadataStore, segmentHelper, controllerExecutor, eventExecutor, host.getHostId(), authHelper, serviceConfig.getRetentionFrequency().toMillis());
        streamTransactionMetadataTasks = new StreamTransactionMetadataTasks(streamStore, segmentHelper, controllerExecutor, eventExecutor, host.getHostId(), serviceConfig.getTimeoutServiceConfig(), authHelper);
        BucketServiceFactory bucketServiceFactory = new BucketServiceFactory(host.getHostId(), bucketStore, 1000);
        Duration executionDurationRetention = serviceConfig.getRetentionFrequency();
        PeriodicRetention retentionWork = new PeriodicRetention(streamStore, streamMetadataTasks, retentionExecutor, requestTracker);
        retentionService = bucketServiceFactory.createRetentionService(executionDurationRetention, retentionWork::retention, retentionExecutor);
        retentionService.startAsync();
        retentionService.awaitRunning();
        log.info("Started background periodic service for Retention.");
        RetentionServiceHealthContributor retentionServiceHC = new RetentionServiceHealthContributor("retentionService", retentionService);
        healthServiceManager.register(retentionServiceHC);
        Duration executionDurationWatermarking = Duration.ofSeconds(Config.MINIMUM_WATERMARKING_FREQUENCY_IN_SECONDS);
        watermarkingWork = new PeriodicWatermarking(streamStore, bucketStore, clientConfig, watermarkingExecutor, requestTracker);
        watermarkingService = bucketServiceFactory.createWatermarkingService(executionDurationWatermarking, watermarkingWork::watermark, watermarkingExecutor);
        watermarkingService.startAsync();
        watermarkingService.awaitRunning();
        log.info("Started background periodic service for Watermarking.");
        WatermarkingServiceHealthContributor watermarkingServiceHC = new WatermarkingServiceHealthContributor("watermarkingService", watermarkingService);
        healthServiceManager.register(watermarkingServiceHC);
        // Controller has a mechanism to track the currently active controller host instances. On detecting a failure of
        // any controller instance, the failure detector stores the failed HostId in a failed hosts directory (FH), and
        // invokes the taskSweeper.sweepOrphanedTasks for each failed host. When all resources under the failed hostId
        // are processed and deleted, that failed HostId is removed from FH folder.
        // Moreover, on controller process startup, it detects any hostIds not in the currently active set of
        // controllers and starts sweeping tasks orphaned by those hostIds.
        TaskSweeper taskSweeper = new TaskSweeper(taskMetadataStore, host.getHostId(), controllerExecutor, streamMetadataTasks);
        TxnSweeper txnSweeper = new TxnSweeper(streamStore, streamTransactionMetadataTasks, serviceConfig.getTimeoutServiceConfig().getMaxLeaseValue(), controllerExecutor);
        RequestSweeper requestSweeper = new RequestSweeper(streamStore, controllerExecutor, streamMetadataTasks);
        if (serviceConfig.isControllerClusterListenerEnabled()) {
            cluster = new ClusterZKImpl((CuratorFramework) storeClient.getClient(), ClusterType.CONTROLLER);
        }
        kvtMetadataStore = kvtMetaStoreRef.orElseGet(() -> KVTableStoreFactory.createStore(storeClient, segmentHelper, authHelper, controllerExecutor, streamStore));
        kvtMetadataTasks = new TableMetadataTasks(kvtMetadataStore, segmentHelper, controllerExecutor, eventExecutor, host.getHostId(), authHelper);
        controllerService = new ControllerService(kvtMetadataStore, kvtMetadataTasks, streamStore, bucketStore, streamMetadataTasks, streamTransactionMetadataTasks, segmentHelper, controllerExecutor, cluster, requestTracker);
        // Setup event processors.
        setController(new LocalController(controllerService, grpcServerConfig.isAuthorizationEnabled(), grpcServerConfig.getTokenSigningKey()));
        CompletableFuture<Void> eventProcessorFuture = CompletableFuture.completedFuture(null);
        if (serviceConfig.getEventProcessorConfig().isPresent()) {
            // Create ControllerEventProcessor object.
            controllerEventProcessors = new ControllerEventProcessors(host.getHostId(), serviceConfig.getEventProcessorConfig().get(), localController, checkpointStore, streamStore, bucketStore, connectionPool, streamMetadataTasks, streamTransactionMetadataTasks, kvtMetadataStore, kvtMetadataTasks, eventExecutor);
            // Bootstrap and start it asynchronously.
            eventProcessorFuture = controllerEventProcessors.bootstrap(streamTransactionMetadataTasks, streamMetadataTasks, kvtMetadataTasks).thenAcceptAsync(x -> controllerEventProcessors.startAsync(), eventExecutor);
            EventProcessorHealthContributor eventProcessorHC = new EventProcessorHealthContributor("eventProcessor", controllerEventProcessors);
            healthServiceManager.register(eventProcessorHC);
        }
        // Setup and start controller cluster listener after all sweepers have been initialized.
        if (serviceConfig.isControllerClusterListenerEnabled()) {
            List<FailoverSweeper> failoverSweepers = new ArrayList<>();
            failoverSweepers.add(taskSweeper);
            failoverSweepers.add(txnSweeper);
            failoverSweepers.add(requestSweeper);
            if (serviceConfig.getEventProcessorConfig().isPresent()) {
                assert controllerEventProcessors != null;
                failoverSweepers.add(controllerEventProcessors);
            }
            controllerClusterListener = new ControllerClusterListener(host, cluster, controllerExecutor, failoverSweepers);
            controllerClusterListener.startAsync();
            ClusterListenerHealthContributor clusterListenerHC = new ClusterListenerHealthContributor("clusterListener", controllerClusterListener);
            healthServiceManager.register(clusterListenerHC);
        }
        // Start the Health Service.
        healthServiceManager.start();
        // Start RPC server.
        if (serviceConfig.getGRPCServerConfig().isPresent()) {
            grpcServer = new GRPCServer(controllerService, grpcServerConfig, requestTracker);
            grpcServer.startAsync();
            grpcServer.awaitRunning();
            GRPCServerHealthContributor grpcServerHC = new GRPCServerHealthContributor("GRPCServer", grpcServer);
            healthServiceManager.register(grpcServerHC);
        }
        // Start REST server.
        if (serviceConfig.getRestServerConfig().isPresent()) {
            List<Object> resources = new ArrayList<>();
            resources.add(new StreamMetadataResourceImpl(this.localController, controllerService, grpcServer.getAuthHandlerManager(), connectionFactory, clientConfig));
            resources.add(new HealthImpl(grpcServer.getAuthHandlerManager(), healthServiceManager.getEndpoint()));
            resources.add(new PingImpl());
            MetricsProvider.getMetricsProvider().prometheusResource().ifPresent(resources::add);
            restServer = new RESTServer(serviceConfig.getRestServerConfig().get(), Set.copyOf(resources));
            restServer.startAsync();
            restServer.awaitRunning();
        }
        // Wait for controller event processors to start.
        if (serviceConfig.getEventProcessorConfig().isPresent()) {
            // if store client has failed because of session expiration, there are two possibilities where
            // controllerEventProcessors.awaitRunning may be stuck forever -
            // 1. stream creation is retried indefinitely and cannot complete because of zk session expiration
            // 2. event writer after stream creation throws exception.
            // In both of above cases controllerEventProcessors.startAsync may not get called.
            CompletableFuture.anyOf(storeClientFailureFuture, eventProcessorFuture.thenAccept(x -> controllerEventProcessors.awaitRunning())).join();
        }
        // Wait for controller cluster listeners to start.
        if (serviceConfig.isControllerClusterListenerEnabled()) {
            controllerClusterListener.awaitRunning();
        }
    } catch (Exception e) {
        log.error("Failed trying to start controller services", e);
        throw e;
    } finally {
        LoggerHelpers.traceLeave(log, this.objectId, "startUp", traceId);
    }
}
Also used : GRPCServer(io.pravega.controller.server.rpc.grpc.GRPCServer) ControllerEventProcessors(io.pravega.controller.server.eventProcessor.ControllerEventProcessors) CheckpointStore(io.pravega.controller.store.checkpoint.CheckpointStore) StringUtils(org.apache.commons.lang3.StringUtils) InetAddress(java.net.InetAddress) Cluster(io.pravega.common.cluster.Cluster) LocalController(io.pravega.controller.server.eventProcessor.LocalController) HealthServiceManager(io.pravega.shared.health.HealthServiceManager) TaskMetadataStore(io.pravega.controller.store.task.TaskMetadataStore) TxnSweeper(io.pravega.controller.task.Stream.TxnSweeper) Duration(java.time.Duration) PeriodicRetention(io.pravega.controller.server.bucket.PeriodicRetention) WatermarkingServiceHealthContributor(io.pravega.controller.server.health.WatermarkingServiceHealthContributor) URI(java.net.URI) ClusterZKImpl(io.pravega.common.cluster.zkImpl.ClusterZKImpl) ControllerClusterListener(io.pravega.controller.fault.ControllerClusterListener) PeriodicWatermarking(io.pravega.controller.server.bucket.PeriodicWatermarking) RESTServer(io.pravega.shared.rest.RESTServer) Set(java.util.Set) RequestSweeper(io.pravega.controller.task.Stream.RequestSweeper) RequestTracker(io.pravega.common.tracing.RequestTracker) UUID(java.util.UUID) KVTableMetadataStore(io.pravega.controller.store.kvtable.KVTableMetadataStore) CountDownLatch(java.util.concurrent.CountDownLatch) MetricsProvider(io.pravega.shared.metrics.MetricsProvider) RetentionServiceHealthContributor(io.pravega.controller.server.health.RetentionServiceHealthContributor) List(java.util.List) Slf4j(lombok.extern.slf4j.Slf4j) CuratorFramework(org.apache.curator.framework.CuratorFramework) ClusterType(io.pravega.common.cluster.ClusterType) Config(io.pravega.controller.util.Config) GRPCServerHealthContributor(io.pravega.controller.server.health.GRPCServerHealthContributor) Optional(java.util.Optional) StoreType(io.pravega.controller.store.client.StoreType) BucketServiceFactory(io.pravega.controller.server.bucket.BucketServiceFactory) StreamMetadataStore(io.pravega.controller.store.stream.StreamMetadataStore) GrpcAuthHelper(io.pravega.controller.server.security.auth.GrpcAuthHelper) GRPCServerConfig(io.pravega.controller.server.rpc.grpc.GRPCServerConfig) KVTableStoreFactory(io.pravega.controller.store.kvtable.KVTableStoreFactory) StreamMetrics(io.pravega.controller.metrics.StreamMetrics) StreamStoreFactory(io.pravega.controller.store.stream.StreamStoreFactory) TransactionMetrics(io.pravega.controller.metrics.TransactionMetrics) Getter(lombok.Getter) ConnectionFactory(io.pravega.client.connection.impl.ConnectionFactory) BooleanUtils(io.pravega.common.util.BooleanUtils) CheckpointStoreFactory(io.pravega.controller.store.checkpoint.CheckpointStoreFactory) CompletableFuture(java.util.concurrent.CompletableFuture) ConnectionPoolImpl(io.pravega.client.connection.impl.ConnectionPoolImpl) PingImpl(io.pravega.controller.server.rest.resources.PingImpl) EventProcessorHealthContributor(io.pravega.controller.server.health.EventProcessorHealthContributor) StoreClient(io.pravega.controller.store.client.StoreClient) ArrayList(java.util.ArrayList) BucketStore(io.pravega.controller.store.stream.BucketStore) UniformContainerBalancer(io.pravega.controller.fault.UniformContainerBalancer) AccessLevel(lombok.AccessLevel) AbstractIdleService(com.google.common.util.concurrent.AbstractIdleService) BucketManager(io.pravega.controller.server.bucket.BucketManager) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) StreamMetadataTasks(io.pravega.controller.task.Stream.StreamMetadataTasks) SocketConnectionFactoryImpl(io.pravega.client.connection.impl.SocketConnectionFactoryImpl) Host(io.pravega.common.cluster.Host) LoggerHelpers(io.pravega.common.LoggerHelpers) Callbacks(io.pravega.common.function.Callbacks) ConnectionPool(io.pravega.client.connection.impl.ConnectionPool) TableMetadataTasks(io.pravega.controller.task.KeyValueTable.TableMetadataTasks) UnknownHostException(java.net.UnknownHostException) FailoverSweeper(io.pravega.controller.fault.FailoverSweeper) HostStoreFactory(io.pravega.controller.store.host.HostStoreFactory) TimeUnit(java.util.concurrent.TimeUnit) TaskStoreFactory(io.pravega.controller.store.task.TaskStoreFactory) SegmentContainerMonitorHealthContributor(io.pravega.controller.server.health.SegmentContainerMonitorHealthContributor) HealthImpl(io.pravega.shared.health.bindings.resources.HealthImpl) HostControllerStore(io.pravega.controller.store.host.HostControllerStore) StreamTransactionMetadataTasks(io.pravega.controller.task.Stream.StreamTransactionMetadataTasks) TaskSweeper(io.pravega.controller.task.TaskSweeper) SegmentContainerMonitor(io.pravega.controller.fault.SegmentContainerMonitor) ClusterListenerHealthContributor(io.pravega.controller.server.health.ClusterListenerHealthContributor) StreamMetadataResourceImpl(io.pravega.controller.server.rest.resources.StreamMetadataResourceImpl) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ExecutorServiceHelpers(io.pravega.common.concurrent.ExecutorServiceHelpers) ClientConfig(io.pravega.client.ClientConfig) GRPCServerHealthContributor(io.pravega.controller.server.health.GRPCServerHealthContributor) ArrayList(java.util.ArrayList) TxnSweeper(io.pravega.controller.task.Stream.TxnSweeper) CuratorFramework(org.apache.curator.framework.CuratorFramework) BucketServiceFactory(io.pravega.controller.server.bucket.BucketServiceFactory) ControllerEventProcessors(io.pravega.controller.server.eventProcessor.ControllerEventProcessors) HostControllerStore(io.pravega.controller.store.host.HostControllerStore) StreamTransactionMetadataTasks(io.pravega.controller.task.Stream.StreamTransactionMetadataTasks) TaskSweeper(io.pravega.controller.task.TaskSweeper) StreamMetadataTasks(io.pravega.controller.task.Stream.StreamMetadataTasks) PingImpl(io.pravega.controller.server.rest.resources.PingImpl) UniformContainerBalancer(io.pravega.controller.fault.UniformContainerBalancer) HealthServiceManager(io.pravega.shared.health.HealthServiceManager) TaskMetadataStore(io.pravega.controller.store.task.TaskMetadataStore) SegmentContainerMonitorHealthContributor(io.pravega.controller.server.health.SegmentContainerMonitorHealthContributor) Duration(java.time.Duration) SocketConnectionFactoryImpl(io.pravega.client.connection.impl.SocketConnectionFactoryImpl) EventProcessorHealthContributor(io.pravega.controller.server.health.EventProcessorHealthContributor) ControllerClusterListener(io.pravega.controller.fault.ControllerClusterListener) RetentionServiceHealthContributor(io.pravega.controller.server.health.RetentionServiceHealthContributor) StreamMetadataResourceImpl(io.pravega.controller.server.rest.resources.StreamMetadataResourceImpl) ClusterZKImpl(io.pravega.common.cluster.zkImpl.ClusterZKImpl) ConnectionPoolImpl(io.pravega.client.connection.impl.ConnectionPoolImpl) GRPCServerConfig(io.pravega.controller.server.rpc.grpc.GRPCServerConfig) PeriodicRetention(io.pravega.controller.server.bucket.PeriodicRetention) RequestSweeper(io.pravega.controller.task.Stream.RequestSweeper) CheckpointStore(io.pravega.controller.store.checkpoint.CheckpointStore) RequestTracker(io.pravega.common.tracing.RequestTracker) SegmentContainerMonitor(io.pravega.controller.fault.SegmentContainerMonitor) FailoverSweeper(io.pravega.controller.fault.FailoverSweeper) GRPCServer(io.pravega.controller.server.rpc.grpc.GRPCServer) LocalController(io.pravega.controller.server.eventProcessor.LocalController) HealthImpl(io.pravega.shared.health.bindings.resources.HealthImpl) TableMetadataTasks(io.pravega.controller.task.KeyValueTable.TableMetadataTasks) RESTServer(io.pravega.shared.rest.RESTServer) BucketStore(io.pravega.controller.store.stream.BucketStore) ClientConfig(io.pravega.client.ClientConfig) ClusterListenerHealthContributor(io.pravega.controller.server.health.ClusterListenerHealthContributor) Host(io.pravega.common.cluster.Host) UnknownHostException(java.net.UnknownHostException) GrpcAuthHelper(io.pravega.controller.server.security.auth.GrpcAuthHelper) WatermarkingServiceHealthContributor(io.pravega.controller.server.health.WatermarkingServiceHealthContributor) PeriodicWatermarking(io.pravega.controller.server.bucket.PeriodicWatermarking)

Aggregations

VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 AbstractIdleService (com.google.common.util.concurrent.AbstractIdleService)1 ClientConfig (io.pravega.client.ClientConfig)1 ConnectionFactory (io.pravega.client.connection.impl.ConnectionFactory)1 ConnectionPool (io.pravega.client.connection.impl.ConnectionPool)1 ConnectionPoolImpl (io.pravega.client.connection.impl.ConnectionPoolImpl)1 SocketConnectionFactoryImpl (io.pravega.client.connection.impl.SocketConnectionFactoryImpl)1 LoggerHelpers (io.pravega.common.LoggerHelpers)1 Cluster (io.pravega.common.cluster.Cluster)1 ClusterType (io.pravega.common.cluster.ClusterType)1 Host (io.pravega.common.cluster.Host)1 ClusterZKImpl (io.pravega.common.cluster.zkImpl.ClusterZKImpl)1 ExecutorServiceHelpers (io.pravega.common.concurrent.ExecutorServiceHelpers)1 Callbacks (io.pravega.common.function.Callbacks)1 RequestTracker (io.pravega.common.tracing.RequestTracker)1 BooleanUtils (io.pravega.common.util.BooleanUtils)1 ControllerClusterListener (io.pravega.controller.fault.ControllerClusterListener)1 FailoverSweeper (io.pravega.controller.fault.FailoverSweeper)1 SegmentContainerMonitor (io.pravega.controller.fault.SegmentContainerMonitor)1 UniformContainerBalancer (io.pravega.controller.fault.UniformContainerBalancer)1