use of io.pravega.controller.task.TaskSweeper in project pravega by pravega.
the class ControllerServiceStarter method startUp.
@Override
protected void startUp() {
long traceId = LoggerHelpers.traceEnterWithContext(log, this.objectId, "startUp");
log.info("Initiating controller service startUp");
log.info("Controller serviceConfig = {}", serviceConfig.toString());
log.info("Event processors enabled = {}", serviceConfig.getEventProcessorConfig().isPresent());
log.info("Cluster listener enabled = {}", serviceConfig.isControllerClusterListenerEnabled());
log.info(" Host monitor enabled = {}", serviceConfig.getHostMonitorConfig().isHostMonitorEnabled());
log.info(" gRPC server enabled = {}", serviceConfig.getGRPCServerConfig().isPresent());
log.info(" REST server enabled = {}", serviceConfig.getRestServerConfig().isPresent());
final BucketStore bucketStore;
final TaskMetadataStore taskMetadataStore;
final HostControllerStore hostStore;
final CheckpointStore checkpointStore;
try {
// Initialize the executor service.
controllerExecutor = ExecutorServiceHelpers.newScheduledThreadPool(serviceConfig.getThreadPoolSize(), "controllerpool");
eventExecutor = ExecutorServiceHelpers.newScheduledThreadPool(serviceConfig.getThreadPoolSize(), "eventprocessor");
retentionExecutor = ExecutorServiceHelpers.newScheduledThreadPool(Config.RETENTION_THREAD_POOL_SIZE, "retentionpool");
watermarkingExecutor = ExecutorServiceHelpers.newScheduledThreadPool(Config.WATERMARKING_THREAD_POOL_SIZE, "watermarkingpool");
bucketStore = StreamStoreFactory.createBucketStore(storeClient, controllerExecutor);
log.info("Created the bucket store.");
taskMetadataStore = TaskStoreFactory.createStore(storeClient, controllerExecutor);
log.info("Created the task store.");
hostStore = HostStoreFactory.createStore(serviceConfig.getHostMonitorConfig(), storeClient);
log.info("Created the host store.");
checkpointStore = CheckpointStoreFactory.create(storeClient);
log.info("Created the checkpoint store.");
// Initialize Stream and Transaction metrics.
StreamMetrics.initialize();
TransactionMetrics.initialize();
// On each controller process restart, we use a fresh hostId,
// which is a combination of hostname and random GUID.
String hostName = getHostName();
Host host = new Host(hostName, getPort(), UUID.randomUUID().toString());
// Create a RequestTracker instance to trace client requests end-to-end.
GRPCServerConfig grpcServerConfig = serviceConfig.getGRPCServerConfig().get();
RequestTracker requestTracker = new RequestTracker(grpcServerConfig.isRequestTracingEnabled());
// Create a Health Service Manager instance.
healthServiceManager = new HealthServiceManager(serviceConfig.getHealthCheckFrequency());
if (serviceConfig.getHostMonitorConfig().isHostMonitorEnabled()) {
// Start the Segment Container Monitor.
monitor = new SegmentContainerMonitor(hostStore, (CuratorFramework) storeClient.getClient(), new UniformContainerBalancer(), serviceConfig.getHostMonitorConfig().getHostMonitorMinRebalanceInterval());
monitor.startAsync();
log.info("Started Segment Container Monitor service.");
SegmentContainerMonitorHealthContributor segmentContainerMonitorHC = new SegmentContainerMonitorHealthContributor("segmentContainerMonitor", monitor);
healthServiceManager.register(segmentContainerMonitorHC);
}
// This client config is used by the segment store helper (SegmentHelper) to connect to the segment store.
ClientConfig.ClientConfigBuilder clientConfigBuilder = ClientConfig.builder().controllerURI(URI.create((grpcServerConfig.isTlsEnabled() ? "tls://" : "tcp://") + "localhost:" + grpcServerConfig.getPort())).trustStore(grpcServerConfig.getTlsTrustStore()).validateHostName(false);
Optional<Boolean> tlsEnabledForSegmentStore = BooleanUtils.extract(serviceConfig.getTlsEnabledForSegmentStore());
if (tlsEnabledForSegmentStore.isPresent()) {
clientConfigBuilder.enableTlsToSegmentStore(tlsEnabledForSegmentStore.get());
}
// Use one connection per Segment Store to save up resources.
ClientConfig clientConfig = clientConfigBuilder.maxConnectionsPerSegmentStore(1).build();
connectionFactory = connectionFactoryRef.orElseGet(() -> new SocketConnectionFactoryImpl(clientConfig));
connectionPool = new ConnectionPoolImpl(clientConfig, connectionFactory);
segmentHelper = segmentHelperRef.orElseGet(() -> new SegmentHelper(connectionPool, hostStore, controllerExecutor));
GrpcAuthHelper authHelper = new GrpcAuthHelper(serviceConfig.getGRPCServerConfig().get().isAuthorizationEnabled(), grpcServerConfig.getTokenSigningKey(), grpcServerConfig.getAccessTokenTTLInSeconds());
streamStore = streamMetadataStoreRef.orElseGet(() -> StreamStoreFactory.createStore(storeClient, segmentHelper, authHelper, controllerExecutor));
log.info("Created the stream store.");
streamMetadataTasks = new StreamMetadataTasks(streamStore, bucketStore, taskMetadataStore, segmentHelper, controllerExecutor, eventExecutor, host.getHostId(), authHelper, serviceConfig.getRetentionFrequency().toMillis());
streamTransactionMetadataTasks = new StreamTransactionMetadataTasks(streamStore, segmentHelper, controllerExecutor, eventExecutor, host.getHostId(), serviceConfig.getTimeoutServiceConfig(), authHelper);
BucketServiceFactory bucketServiceFactory = new BucketServiceFactory(host.getHostId(), bucketStore, 1000);
Duration executionDurationRetention = serviceConfig.getRetentionFrequency();
PeriodicRetention retentionWork = new PeriodicRetention(streamStore, streamMetadataTasks, retentionExecutor, requestTracker);
retentionService = bucketServiceFactory.createRetentionService(executionDurationRetention, retentionWork::retention, retentionExecutor);
retentionService.startAsync();
retentionService.awaitRunning();
log.info("Started background periodic service for Retention.");
RetentionServiceHealthContributor retentionServiceHC = new RetentionServiceHealthContributor("retentionService", retentionService);
healthServiceManager.register(retentionServiceHC);
Duration executionDurationWatermarking = Duration.ofSeconds(Config.MINIMUM_WATERMARKING_FREQUENCY_IN_SECONDS);
watermarkingWork = new PeriodicWatermarking(streamStore, bucketStore, clientConfig, watermarkingExecutor, requestTracker);
watermarkingService = bucketServiceFactory.createWatermarkingService(executionDurationWatermarking, watermarkingWork::watermark, watermarkingExecutor);
watermarkingService.startAsync();
watermarkingService.awaitRunning();
log.info("Started background periodic service for Watermarking.");
WatermarkingServiceHealthContributor watermarkingServiceHC = new WatermarkingServiceHealthContributor("watermarkingService", watermarkingService);
healthServiceManager.register(watermarkingServiceHC);
// Controller has a mechanism to track the currently active controller host instances. On detecting a failure of
// any controller instance, the failure detector stores the failed HostId in a failed hosts directory (FH), and
// invokes the taskSweeper.sweepOrphanedTasks for each failed host. When all resources under the failed hostId
// are processed and deleted, that failed HostId is removed from FH folder.
// Moreover, on controller process startup, it detects any hostIds not in the currently active set of
// controllers and starts sweeping tasks orphaned by those hostIds.
TaskSweeper taskSweeper = new TaskSweeper(taskMetadataStore, host.getHostId(), controllerExecutor, streamMetadataTasks);
TxnSweeper txnSweeper = new TxnSweeper(streamStore, streamTransactionMetadataTasks, serviceConfig.getTimeoutServiceConfig().getMaxLeaseValue(), controllerExecutor);
RequestSweeper requestSweeper = new RequestSweeper(streamStore, controllerExecutor, streamMetadataTasks);
if (serviceConfig.isControllerClusterListenerEnabled()) {
cluster = new ClusterZKImpl((CuratorFramework) storeClient.getClient(), ClusterType.CONTROLLER);
}
kvtMetadataStore = kvtMetaStoreRef.orElseGet(() -> KVTableStoreFactory.createStore(storeClient, segmentHelper, authHelper, controllerExecutor, streamStore));
kvtMetadataTasks = new TableMetadataTasks(kvtMetadataStore, segmentHelper, controllerExecutor, eventExecutor, host.getHostId(), authHelper);
controllerService = new ControllerService(kvtMetadataStore, kvtMetadataTasks, streamStore, bucketStore, streamMetadataTasks, streamTransactionMetadataTasks, segmentHelper, controllerExecutor, cluster, requestTracker);
// Setup event processors.
setController(new LocalController(controllerService, grpcServerConfig.isAuthorizationEnabled(), grpcServerConfig.getTokenSigningKey()));
CompletableFuture<Void> eventProcessorFuture = CompletableFuture.completedFuture(null);
if (serviceConfig.getEventProcessorConfig().isPresent()) {
// Create ControllerEventProcessor object.
controllerEventProcessors = new ControllerEventProcessors(host.getHostId(), serviceConfig.getEventProcessorConfig().get(), localController, checkpointStore, streamStore, bucketStore, connectionPool, streamMetadataTasks, streamTransactionMetadataTasks, kvtMetadataStore, kvtMetadataTasks, eventExecutor);
// Bootstrap and start it asynchronously.
eventProcessorFuture = controllerEventProcessors.bootstrap(streamTransactionMetadataTasks, streamMetadataTasks, kvtMetadataTasks).thenAcceptAsync(x -> controllerEventProcessors.startAsync(), eventExecutor);
EventProcessorHealthContributor eventProcessorHC = new EventProcessorHealthContributor("eventProcessor", controllerEventProcessors);
healthServiceManager.register(eventProcessorHC);
}
// Setup and start controller cluster listener after all sweepers have been initialized.
if (serviceConfig.isControllerClusterListenerEnabled()) {
List<FailoverSweeper> failoverSweepers = new ArrayList<>();
failoverSweepers.add(taskSweeper);
failoverSweepers.add(txnSweeper);
failoverSweepers.add(requestSweeper);
if (serviceConfig.getEventProcessorConfig().isPresent()) {
assert controllerEventProcessors != null;
failoverSweepers.add(controllerEventProcessors);
}
controllerClusterListener = new ControllerClusterListener(host, cluster, controllerExecutor, failoverSweepers);
controllerClusterListener.startAsync();
ClusterListenerHealthContributor clusterListenerHC = new ClusterListenerHealthContributor("clusterListener", controllerClusterListener);
healthServiceManager.register(clusterListenerHC);
}
// Start the Health Service.
healthServiceManager.start();
// Start RPC server.
if (serviceConfig.getGRPCServerConfig().isPresent()) {
grpcServer = new GRPCServer(controllerService, grpcServerConfig, requestTracker);
grpcServer.startAsync();
grpcServer.awaitRunning();
GRPCServerHealthContributor grpcServerHC = new GRPCServerHealthContributor("GRPCServer", grpcServer);
healthServiceManager.register(grpcServerHC);
}
// Start REST server.
if (serviceConfig.getRestServerConfig().isPresent()) {
List<Object> resources = new ArrayList<>();
resources.add(new StreamMetadataResourceImpl(this.localController, controllerService, grpcServer.getAuthHandlerManager(), connectionFactory, clientConfig));
resources.add(new HealthImpl(grpcServer.getAuthHandlerManager(), healthServiceManager.getEndpoint()));
resources.add(new PingImpl());
MetricsProvider.getMetricsProvider().prometheusResource().ifPresent(resources::add);
restServer = new RESTServer(serviceConfig.getRestServerConfig().get(), Set.copyOf(resources));
restServer.startAsync();
restServer.awaitRunning();
}
// Wait for controller event processors to start.
if (serviceConfig.getEventProcessorConfig().isPresent()) {
// if store client has failed because of session expiration, there are two possibilities where
// controllerEventProcessors.awaitRunning may be stuck forever -
// 1. stream creation is retried indefinitely and cannot complete because of zk session expiration
// 2. event writer after stream creation throws exception.
// In both of above cases controllerEventProcessors.startAsync may not get called.
CompletableFuture.anyOf(storeClientFailureFuture, eventProcessorFuture.thenAccept(x -> controllerEventProcessors.awaitRunning())).join();
}
// Wait for controller cluster listeners to start.
if (serviceConfig.isControllerClusterListenerEnabled()) {
controllerClusterListener.awaitRunning();
}
} catch (Exception e) {
log.error("Failed trying to start controller services", e);
throw e;
} finally {
LoggerHelpers.traceLeave(log, this.objectId, "startUp", traceId);
}
}
use of io.pravega.controller.task.TaskSweeper in project pravega by pravega.
the class ControllerClusterListenerTest method clusterListenerTest.
@Test(timeout = 60000L)
public void clusterListenerTest() throws Exception {
String hostName = "localhost";
Host host = new Host(hostName, 10, "host1");
// Create task sweeper.
TaskMetadataStore taskStore = TaskStoreFactory.createInMemoryStore(executor);
TaskSweeper taskSweeper = new TaskSweeper(taskStore, host.getHostId(), executor, new TestTasks(taskStore, executor, host.getHostId()));
// Create txn sweeper.
@Cleanup StreamMetadataStore streamStore = StreamStoreFactory.createInMemoryStore();
SegmentHelper segmentHelper = SegmentHelperMock.getSegmentHelperMock();
StreamTransactionMetadataTasks txnTasks = new StreamTransactionMetadataTasks(streamStore, segmentHelper, executor, host.getHostId(), GrpcAuthHelper.getDisabledAuthHelper());
txnTasks.initializeStreamWriters(new EventStreamWriterMock<>(), new EventStreamWriterMock<>());
TxnSweeper txnSweeper = new TxnSweeper(streamStore, txnTasks, 100, executor);
// Create ControllerClusterListener.
ControllerClusterListener clusterListener = new ControllerClusterListener(host, clusterZK, executor, Lists.newArrayList(taskSweeper, txnSweeper));
clusterListener.startAsync();
clusterListener.awaitRunning();
Assert.assertTrue(clusterListener.areAllSweepersReady());
validateAddedNode(host.getHostId());
// Add a new host
Host host1 = new Host(hostName, 20, "host2");
clusterZK.registerHost(host1);
validateAddedNode(host1.getHostId());
clusterZK.deregisterHost(host1);
validateRemovedNode(host1.getHostId());
clusterListener.stopAsync();
clusterListener.awaitTerminated();
Assert.assertFalse(clusterListener.isRunning());
validateRemovedNode(host.getHostId());
}
use of io.pravega.controller.task.TaskSweeper in project pravega by pravega.
the class ControllerClusterListenerTest method clusterListenerStarterTest.
@Test(timeout = 60000L)
@SuppressWarnings("unchecked")
public void clusterListenerStarterTest() throws InterruptedException, ExecutionException {
String hostName = "localhost";
Host host = new Host(hostName, 10, "originalhost");
// Following futures are used as latches. When awaitRunning a sweeper, we wait on a latch by calling
// Futures.await across the test case.
// Future for ensuring that task sweeper is ready and we let the sweep happen.
CompletableFuture<Void> taskSweep = new CompletableFuture<>();
// Future for when taskSweeper.failedHost is called once
CompletableFuture<Void> taskHostSweep1 = new CompletableFuture<>();
// Future for when taskSweeper.failedHost is called second time
CompletableFuture<Void> taskHostSweep2 = new CompletableFuture<>();
// Future for txn sweeper to get ready.
CompletableFuture<Void> txnSweep = new CompletableFuture<>();
// Future for txnsweeper.failedProcess to be called the first time
CompletableFuture<Void> txnHostSweepIgnore = new CompletableFuture<>();
CompletableFuture<Void> txnHostSweep2 = new CompletableFuture<>();
// Create task sweeper.
TaskMetadataStore taskStore = TaskStoreFactory.createZKStore(PRAVEGA_ZK_CURATOR_RESOURCE.client, executor);
TaskSweeper taskSweeper = spy(new TaskSweeper(taskStore, host.getHostId(), executor, new TestTasks(taskStore, executor, host.getHostId())));
doAnswer(invocation -> {
if (!taskSweep.isDone()) {
// we complete the future when this method is called for the first time.
taskSweep.complete(null);
}
return CompletableFuture.completedFuture(null);
}).when(taskSweeper).sweepFailedProcesses(any(Supplier.class));
doAnswer(invocation -> {
if (!taskHostSweep1.isDone()) {
// we complete this future when task sweeper for a failed host is called for the first time.
taskHostSweep1.complete(null);
} else if (!taskHostSweep2.isDone()) {
// we complete this future when task sweeper for a failed host is called for the second time
taskHostSweep2.complete(null);
}
return CompletableFuture.completedFuture(null);
}).when(taskSweeper).handleFailedProcess(anyString());
// Create txn sweeper.
StreamMetadataStore streamStore = StreamStoreFactory.createInMemoryStore();
SegmentHelper segmentHelper = SegmentHelperMock.getSegmentHelperMock();
// create streamtransactionmetadatatasks but dont initialize it with writers. this will not be
// ready until writers are supplied.
StreamTransactionMetadataTasks txnTasks = new StreamTransactionMetadataTasks(streamStore, segmentHelper, executor, host.getHostId(), GrpcAuthHelper.getDisabledAuthHelper());
TxnSweeper txnSweeper = spy(new TxnSweeper(streamStore, txnTasks, 100, executor));
// any attempt to sweep txnHost should have been ignored
AtomicBoolean txnSweeperRealMethod = new AtomicBoolean(false);
doAnswer(invocation -> {
if (txnSweeperRealMethod.get()) {
return invocation.callRealMethod();
}
txnHostSweepIgnore.complete(null);
return false;
}).when(txnSweeper).isReady();
doAnswer(invocation -> {
if (!txnSweep.isDone()) {
txnSweep.complete(null);
}
return CompletableFuture.completedFuture(null);
}).when(txnSweeper).sweepFailedProcesses(any());
doAnswer(invocation -> {
if (!txnHostSweep2.isDone()) {
txnHostSweep2.complete(null);
}
return CompletableFuture.completedFuture(null);
}).when(txnSweeper).handleFailedProcess(anyString());
// Create request sweeper.
StreamMetadataTasks streamMetadataTasks = new StreamMetadataTasks(streamStore, mock(BucketStore.class), taskStore, segmentHelper, executor, host.getHostId(), GrpcAuthHelper.getDisabledAuthHelper(), mock(EventHelper.class));
RequestSweeper requestSweeper = spy(new RequestSweeper(streamStore, executor, streamMetadataTasks));
// any attempt to sweep requests should have been ignored
CompletableFuture<Void> requestSweep = new CompletableFuture<>();
// Future for txnsweeper.failedProcess to be called the first time
CompletableFuture<Void> requestHostSweepIgnore = new CompletableFuture<>();
CompletableFuture<Void> requestHostSweep2 = new CompletableFuture<>();
AtomicBoolean requestSweeperRealMethod = new AtomicBoolean(false);
doAnswer(invocation -> {
if (requestSweeperRealMethod.get()) {
return invocation.callRealMethod();
}
requestHostSweepIgnore.complete(null);
return false;
}).when(requestSweeper).isReady();
doAnswer(invocation -> {
if (!requestSweep.isDone()) {
requestSweep.complete(null);
}
return CompletableFuture.completedFuture(null);
}).when(requestSweeper).sweepFailedProcesses(any());
doAnswer(invocation -> {
if (!requestHostSweep2.isDone()) {
requestHostSweep2.complete(null);
}
return CompletableFuture.completedFuture(null);
}).when(requestSweeper).handleFailedProcess(anyString());
// Create ControllerClusterListener.
ControllerClusterListener clusterListener = new ControllerClusterListener(host, clusterZK, executor, Lists.newArrayList(taskSweeper, txnSweeper, requestSweeper));
clusterListener.startAsync();
clusterListener.awaitRunning();
log.info("cluster started");
// ensure that task sweep happens after cluster listener becomes ready.
assertTrue(Futures.await(taskSweep, 3000));
log.info("task sweeper completed");
// ensure only tasks are swept
verify(taskSweeper, times(1)).sweepFailedProcesses(any(Supplier.class));
verify(txnSweeper, times(0)).sweepFailedProcesses(any());
verify(requestSweeper, times(0)).sweepFailedProcesses(any());
verify(taskSweeper, times(0)).handleFailedProcess(anyString());
verify(txnSweeper, times(0)).handleFailedProcess(anyString());
verify(requestSweeper, times(0)).handleFailedProcess(anyString());
validateAddedNode(host.getHostId());
log.info("adding new host");
// now add and remove a new host
Host newHost = new Host(hostName, 20, "newHost1");
clusterZK.registerHost(newHost);
validateAddedNode(newHost.getHostId());
clusterZK.deregisterHost(newHost);
validateRemovedNode(newHost.getHostId());
log.info("deregistering new host");
assertTrue(Futures.await(taskHostSweep1, 3000));
assertTrue(Futures.await(txnHostSweepIgnore, 10000));
log.info("task sweep for new host done");
// verify that all tasks are not swept again.
verify(taskSweeper, times(1)).sweepFailedProcesses(any(Supplier.class));
// verify that host specific sweep happens once.
verify(taskSweeper, atLeast(1)).handleFailedProcess(anyString());
// verify that txns are not yet swept as txnsweeper is not yet ready.
verify(txnSweeper, times(0)).sweepFailedProcesses(any());
verify(txnSweeper, times(0)).handleFailedProcess(anyString());
// verify that txn sweeper was checked to be ready. It would have found it not ready at this point
verify(txnSweeper, atLeast(1)).isReady();
// request sweeper
// verify that txns are not yet swept as txnsweeper is not yet ready.
verify(requestSweeper, times(0)).sweepFailedProcesses(any());
verify(requestSweeper, times(0)).handleFailedProcess(anyString());
// verify that txn sweeper was checked to be ready. It would have found it not ready at this point
verify(requestSweeper, atLeast(1)).isReady();
// Reset the mock to call real method on txnsweeper.isReady.
txnSweeperRealMethod.set(true);
// Complete txn sweeper initialization by adding event writers.
txnTasks.initializeStreamWriters(new EventStreamWriterMock<>(), new EventStreamWriterMock<>());
txnSweeper.awaitInitialization();
assertTrue(Futures.await(txnSweep, 3000));
// verify that post initialization txns are swept. And host specific txn sweep is also performed.
verify(txnSweeper, times(1)).sweepFailedProcesses(any());
// Reset the mock to call real method on requestSweeper.isReady.
requestSweeperRealMethod.set(true);
// Complete requestSweeper initialization by adding event writers.
streamMetadataTasks.setRequestEventWriter(new EventStreamWriterMock<>());
assertTrue(Futures.await(requestSweep, 3000));
// verify that post initialization requests are swept. And host specific request sweep is also performed.
verify(requestSweeper, times(1)).sweepFailedProcesses(any());
// now add another host
newHost = new Host(hostName, 20, "newHost2");
clusterZK.registerHost(newHost);
validateAddedNode(newHost.getHostId());
clusterZK.deregisterHost(newHost);
log.info("removing newhost2");
validateRemovedNode(newHost.getHostId());
assertTrue(Futures.await(taskHostSweep2, 3000));
assertTrue(Futures.await(txnHostSweep2, 3000));
assertTrue(Futures.await(requestHostSweep2, 3000));
verify(taskSweeper, atLeast(2)).handleFailedProcess(anyString());
verify(txnSweeper, atLeast(1)).handleFailedProcess(anyString());
verify(requestSweeper, atLeast(1)).handleFailedProcess(anyString());
clusterListener.stopAsync();
clusterListener.awaitTerminated();
}
use of io.pravega.controller.task.TaskSweeper in project pravega by pravega.
the class ClusterListenerHealthContributorTest method setup.
@Before
public void setup() {
Host host = mock(Host.class);
Cluster cluster = mock(Cluster.class);
ScheduledExecutorService executor = mock(ScheduledExecutorService.class);
ControllerServiceConfig serviceConfig = mock(ControllerServiceConfigImpl.class);
TaskSweeper taskSweeper = mock(TaskSweeper.class);
TxnSweeper txnSweeper = mock(TxnSweeper.class);
List<FailoverSweeper> failoverSweepers = new ArrayList<>();
failoverSweepers.add(taskSweeper);
failoverSweepers.add(txnSweeper);
doReturn(true).when(serviceConfig).isControllerClusterListenerEnabled();
clusterListener = spy(new ControllerClusterListener(host, cluster, executor, failoverSweepers));
doReturn(true).when(clusterListener).isReady();
contributor = new ClusterListenerHealthContributor("clusterlistener", clusterListener);
builder = Health.builder().name("clusterlistener");
}
Aggregations