use of io.pravega.common.cluster.Host in project pravega by pravega.
the class SegmentContainerMonitorTest method testMonitor.
private void testMonitor(HostControllerStore hostStore) throws Exception {
// To coordinate the test cases.
Semaphore sync = new Semaphore(0);
// Decorating hostStore to add the coordination logic using semaphore.
class MockHostControllerStore implements HostControllerStore {
@Override
public Map<Host, Set<Integer>> getHostContainersMap() {
return hostStore.getHostContainersMap();
}
@Override
public void updateHostContainersMap(Map<Host, Set<Integer>> newMapping) {
hostStore.updateHostContainersMap(newMapping);
// Notify the test case of the update.
sync.release();
}
@Override
public int getContainerCount() {
return hostStore.getContainerCount();
}
@Override
public Host getHostForSegment(String scope, String stream, int segmentNumber) {
return null;
}
}
SegmentContainerMonitor monitor = new SegmentContainerMonitor(new MockHostControllerStore(), zkClient, new UniformContainerBalancer(), 2);
monitor.startAsync().awaitRunning();
assertEquals(hostStore.getContainerCount(), Config.HOST_STORE_CONTAINER_COUNT);
// Rebalance should be triggered for the very first attempt. Verify that no hosts are added to the store.
assertTrue(sync.tryAcquire(10, TimeUnit.SECONDS));
assertEquals(0, hostStore.getHostContainersMap().size());
// New host added.
cluster.registerHost(new Host("localhost1", 1, null));
assertTrue(sync.tryAcquire(10, TimeUnit.SECONDS));
assertEquals(1, hostStore.getHostContainersMap().size());
// Multiple hosts added and removed.
cluster.registerHost(new Host("localhost2", 2, null));
cluster.registerHost(new Host("localhost3", 3, null));
cluster.registerHost(new Host("localhost4", 4, null));
cluster.deregisterHost(new Host("localhost1", 1, null));
assertTrue(sync.tryAcquire(10, TimeUnit.SECONDS));
assertEquals(3, hostStore.getHostContainersMap().size());
// Add a host.
cluster.registerHost(new Host("localhost1", 1, null));
// Rebalance should not have been triggered since the min rebalance interval is not yet elapsed.
assertEquals(3, hostStore.getHostContainersMap().size());
// Wait for rebalance and verify the host update.
assertTrue(sync.tryAcquire(10, TimeUnit.SECONDS));
assertEquals(4, hostStore.getHostContainersMap().size());
monitor.shutDown();
}
use of io.pravega.common.cluster.Host in project pravega by pravega.
the class ControllerServiceStarter method startUp.
@Override
protected void startUp() {
long traceId = LoggerHelpers.traceEnterWithContext(log, this.objectId, "startUp");
log.info("Initiating controller service startUp");
log.info("Controller serviceConfig = {}", serviceConfig.toString());
log.info("Event processors enabled = {}", serviceConfig.getEventProcessorConfig().isPresent());
log.info("Cluster listener enabled = {}", serviceConfig.isControllerClusterListenerEnabled());
log.info(" Host monitor enabled = {}", serviceConfig.getHostMonitorConfig().isHostMonitorEnabled());
log.info(" gRPC server enabled = {}", serviceConfig.getGRPCServerConfig().isPresent());
log.info(" REST server enabled = {}", serviceConfig.getRestServerConfig().isPresent());
final BucketStore bucketStore;
final TaskMetadataStore taskMetadataStore;
final HostControllerStore hostStore;
final CheckpointStore checkpointStore;
try {
// Initialize the executor service.
controllerExecutor = ExecutorServiceHelpers.newScheduledThreadPool(serviceConfig.getThreadPoolSize(), "controllerpool");
eventExecutor = ExecutorServiceHelpers.newScheduledThreadPool(serviceConfig.getThreadPoolSize(), "eventprocessor");
retentionExecutor = ExecutorServiceHelpers.newScheduledThreadPool(Config.RETENTION_THREAD_POOL_SIZE, "retentionpool");
watermarkingExecutor = ExecutorServiceHelpers.newScheduledThreadPool(Config.WATERMARKING_THREAD_POOL_SIZE, "watermarkingpool");
bucketStore = StreamStoreFactory.createBucketStore(storeClient, controllerExecutor);
log.info("Created the bucket store.");
taskMetadataStore = TaskStoreFactory.createStore(storeClient, controllerExecutor);
log.info("Created the task store.");
hostStore = HostStoreFactory.createStore(serviceConfig.getHostMonitorConfig(), storeClient);
log.info("Created the host store.");
checkpointStore = CheckpointStoreFactory.create(storeClient);
log.info("Created the checkpoint store.");
// Initialize Stream and Transaction metrics.
StreamMetrics.initialize();
TransactionMetrics.initialize();
// On each controller process restart, we use a fresh hostId,
// which is a combination of hostname and random GUID.
String hostName = getHostName();
Host host = new Host(hostName, getPort(), UUID.randomUUID().toString());
// Create a RequestTracker instance to trace client requests end-to-end.
GRPCServerConfig grpcServerConfig = serviceConfig.getGRPCServerConfig().get();
RequestTracker requestTracker = new RequestTracker(grpcServerConfig.isRequestTracingEnabled());
// Create a Health Service Manager instance.
healthServiceManager = new HealthServiceManager(serviceConfig.getHealthCheckFrequency());
if (serviceConfig.getHostMonitorConfig().isHostMonitorEnabled()) {
// Start the Segment Container Monitor.
monitor = new SegmentContainerMonitor(hostStore, (CuratorFramework) storeClient.getClient(), new UniformContainerBalancer(), serviceConfig.getHostMonitorConfig().getHostMonitorMinRebalanceInterval());
monitor.startAsync();
log.info("Started Segment Container Monitor service.");
SegmentContainerMonitorHealthContributor segmentContainerMonitorHC = new SegmentContainerMonitorHealthContributor("segmentContainerMonitor", monitor);
healthServiceManager.register(segmentContainerMonitorHC);
}
// This client config is used by the segment store helper (SegmentHelper) to connect to the segment store.
ClientConfig.ClientConfigBuilder clientConfigBuilder = ClientConfig.builder().controllerURI(URI.create((grpcServerConfig.isTlsEnabled() ? "tls://" : "tcp://") + "localhost:" + grpcServerConfig.getPort())).trustStore(grpcServerConfig.getTlsTrustStore()).validateHostName(false);
Optional<Boolean> tlsEnabledForSegmentStore = BooleanUtils.extract(serviceConfig.getTlsEnabledForSegmentStore());
if (tlsEnabledForSegmentStore.isPresent()) {
clientConfigBuilder.enableTlsToSegmentStore(tlsEnabledForSegmentStore.get());
}
// Use one connection per Segment Store to save up resources.
ClientConfig clientConfig = clientConfigBuilder.maxConnectionsPerSegmentStore(1).build();
connectionFactory = connectionFactoryRef.orElseGet(() -> new SocketConnectionFactoryImpl(clientConfig));
connectionPool = new ConnectionPoolImpl(clientConfig, connectionFactory);
segmentHelper = segmentHelperRef.orElseGet(() -> new SegmentHelper(connectionPool, hostStore, controllerExecutor));
GrpcAuthHelper authHelper = new GrpcAuthHelper(serviceConfig.getGRPCServerConfig().get().isAuthorizationEnabled(), grpcServerConfig.getTokenSigningKey(), grpcServerConfig.getAccessTokenTTLInSeconds());
streamStore = streamMetadataStoreRef.orElseGet(() -> StreamStoreFactory.createStore(storeClient, segmentHelper, authHelper, controllerExecutor));
log.info("Created the stream store.");
streamMetadataTasks = new StreamMetadataTasks(streamStore, bucketStore, taskMetadataStore, segmentHelper, controllerExecutor, eventExecutor, host.getHostId(), authHelper, serviceConfig.getRetentionFrequency().toMillis());
streamTransactionMetadataTasks = new StreamTransactionMetadataTasks(streamStore, segmentHelper, controllerExecutor, eventExecutor, host.getHostId(), serviceConfig.getTimeoutServiceConfig(), authHelper);
BucketServiceFactory bucketServiceFactory = new BucketServiceFactory(host.getHostId(), bucketStore, 1000);
Duration executionDurationRetention = serviceConfig.getRetentionFrequency();
PeriodicRetention retentionWork = new PeriodicRetention(streamStore, streamMetadataTasks, retentionExecutor, requestTracker);
retentionService = bucketServiceFactory.createRetentionService(executionDurationRetention, retentionWork::retention, retentionExecutor);
retentionService.startAsync();
retentionService.awaitRunning();
log.info("Started background periodic service for Retention.");
RetentionServiceHealthContributor retentionServiceHC = new RetentionServiceHealthContributor("retentionService", retentionService);
healthServiceManager.register(retentionServiceHC);
Duration executionDurationWatermarking = Duration.ofSeconds(Config.MINIMUM_WATERMARKING_FREQUENCY_IN_SECONDS);
watermarkingWork = new PeriodicWatermarking(streamStore, bucketStore, clientConfig, watermarkingExecutor, requestTracker);
watermarkingService = bucketServiceFactory.createWatermarkingService(executionDurationWatermarking, watermarkingWork::watermark, watermarkingExecutor);
watermarkingService.startAsync();
watermarkingService.awaitRunning();
log.info("Started background periodic service for Watermarking.");
WatermarkingServiceHealthContributor watermarkingServiceHC = new WatermarkingServiceHealthContributor("watermarkingService", watermarkingService);
healthServiceManager.register(watermarkingServiceHC);
// Controller has a mechanism to track the currently active controller host instances. On detecting a failure of
// any controller instance, the failure detector stores the failed HostId in a failed hosts directory (FH), and
// invokes the taskSweeper.sweepOrphanedTasks for each failed host. When all resources under the failed hostId
// are processed and deleted, that failed HostId is removed from FH folder.
// Moreover, on controller process startup, it detects any hostIds not in the currently active set of
// controllers and starts sweeping tasks orphaned by those hostIds.
TaskSweeper taskSweeper = new TaskSweeper(taskMetadataStore, host.getHostId(), controllerExecutor, streamMetadataTasks);
TxnSweeper txnSweeper = new TxnSweeper(streamStore, streamTransactionMetadataTasks, serviceConfig.getTimeoutServiceConfig().getMaxLeaseValue(), controllerExecutor);
RequestSweeper requestSweeper = new RequestSweeper(streamStore, controllerExecutor, streamMetadataTasks);
if (serviceConfig.isControllerClusterListenerEnabled()) {
cluster = new ClusterZKImpl((CuratorFramework) storeClient.getClient(), ClusterType.CONTROLLER);
}
kvtMetadataStore = kvtMetaStoreRef.orElseGet(() -> KVTableStoreFactory.createStore(storeClient, segmentHelper, authHelper, controllerExecutor, streamStore));
kvtMetadataTasks = new TableMetadataTasks(kvtMetadataStore, segmentHelper, controllerExecutor, eventExecutor, host.getHostId(), authHelper);
controllerService = new ControllerService(kvtMetadataStore, kvtMetadataTasks, streamStore, bucketStore, streamMetadataTasks, streamTransactionMetadataTasks, segmentHelper, controllerExecutor, cluster, requestTracker);
// Setup event processors.
setController(new LocalController(controllerService, grpcServerConfig.isAuthorizationEnabled(), grpcServerConfig.getTokenSigningKey()));
CompletableFuture<Void> eventProcessorFuture = CompletableFuture.completedFuture(null);
if (serviceConfig.getEventProcessorConfig().isPresent()) {
// Create ControllerEventProcessor object.
controllerEventProcessors = new ControllerEventProcessors(host.getHostId(), serviceConfig.getEventProcessorConfig().get(), localController, checkpointStore, streamStore, bucketStore, connectionPool, streamMetadataTasks, streamTransactionMetadataTasks, kvtMetadataStore, kvtMetadataTasks, eventExecutor);
// Bootstrap and start it asynchronously.
eventProcessorFuture = controllerEventProcessors.bootstrap(streamTransactionMetadataTasks, streamMetadataTasks, kvtMetadataTasks).thenAcceptAsync(x -> controllerEventProcessors.startAsync(), eventExecutor);
EventProcessorHealthContributor eventProcessorHC = new EventProcessorHealthContributor("eventProcessor", controllerEventProcessors);
healthServiceManager.register(eventProcessorHC);
}
// Setup and start controller cluster listener after all sweepers have been initialized.
if (serviceConfig.isControllerClusterListenerEnabled()) {
List<FailoverSweeper> failoverSweepers = new ArrayList<>();
failoverSweepers.add(taskSweeper);
failoverSweepers.add(txnSweeper);
failoverSweepers.add(requestSweeper);
if (serviceConfig.getEventProcessorConfig().isPresent()) {
assert controllerEventProcessors != null;
failoverSweepers.add(controllerEventProcessors);
}
controllerClusterListener = new ControllerClusterListener(host, cluster, controllerExecutor, failoverSweepers);
controllerClusterListener.startAsync();
ClusterListenerHealthContributor clusterListenerHC = new ClusterListenerHealthContributor("clusterListener", controllerClusterListener);
healthServiceManager.register(clusterListenerHC);
}
// Start the Health Service.
healthServiceManager.start();
// Start RPC server.
if (serviceConfig.getGRPCServerConfig().isPresent()) {
grpcServer = new GRPCServer(controllerService, grpcServerConfig, requestTracker);
grpcServer.startAsync();
grpcServer.awaitRunning();
GRPCServerHealthContributor grpcServerHC = new GRPCServerHealthContributor("GRPCServer", grpcServer);
healthServiceManager.register(grpcServerHC);
}
// Start REST server.
if (serviceConfig.getRestServerConfig().isPresent()) {
List<Object> resources = new ArrayList<>();
resources.add(new StreamMetadataResourceImpl(this.localController, controllerService, grpcServer.getAuthHandlerManager(), connectionFactory, clientConfig));
resources.add(new HealthImpl(grpcServer.getAuthHandlerManager(), healthServiceManager.getEndpoint()));
resources.add(new PingImpl());
MetricsProvider.getMetricsProvider().prometheusResource().ifPresent(resources::add);
restServer = new RESTServer(serviceConfig.getRestServerConfig().get(), Set.copyOf(resources));
restServer.startAsync();
restServer.awaitRunning();
}
// Wait for controller event processors to start.
if (serviceConfig.getEventProcessorConfig().isPresent()) {
// if store client has failed because of session expiration, there are two possibilities where
// controllerEventProcessors.awaitRunning may be stuck forever -
// 1. stream creation is retried indefinitely and cannot complete because of zk session expiration
// 2. event writer after stream creation throws exception.
// In both of above cases controllerEventProcessors.startAsync may not get called.
CompletableFuture.anyOf(storeClientFailureFuture, eventProcessorFuture.thenAccept(x -> controllerEventProcessors.awaitRunning())).join();
}
// Wait for controller cluster listeners to start.
if (serviceConfig.isControllerClusterListenerEnabled()) {
controllerClusterListener.awaitRunning();
}
} catch (Exception e) {
log.error("Failed trying to start controller services", e);
throw e;
} finally {
LoggerHelpers.traceLeave(log, this.objectId, "startUp", traceId);
}
}
use of io.pravega.common.cluster.Host in project pravega by pravega.
the class ClusterZKTest method deregisterNode.
@Test(timeout = TEST_TIMEOUT)
public void deregisterNode() throws Exception {
LinkedBlockingQueue<String> nodeAddedQueue = new LinkedBlockingQueue<>();
LinkedBlockingQueue<String> nodeRemovedQueue = new LinkedBlockingQueue<>();
LinkedBlockingQueue<Exception> exceptionsQueue = new LinkedBlockingQueue<>();
@Cleanup CuratorFramework client2 = CuratorFrameworkFactory.builder().connectString(zkUrl).retryPolicy(new ExponentialBackoffRetry(RETRY_SLEEP_MS, MAX_RETRY)).namespace(CLUSTER_NAME_2).build();
@Cleanup Cluster clusterListener = new ClusterZKImpl(client2, ClusterType.HOST);
clusterListener.addListener((eventType, host) -> {
switch(eventType) {
case HOST_ADDED:
nodeAddedQueue.offer(host.getIpAddr());
break;
case HOST_REMOVED:
nodeRemovedQueue.offer(host.getIpAddr());
break;
case ERROR:
exceptionsQueue.offer(new RuntimeException("Encountered error"));
break;
default:
exceptionsQueue.offer(new RuntimeException("Unhandled case"));
break;
}
});
@Cleanup CuratorFramework client = CuratorFrameworkFactory.builder().connectString(zkUrl).retryPolicy(new ExponentialBackoffRetry(RETRY_SLEEP_MS, MAX_RETRY)).namespace(CLUSTER_NAME_2).build();
// Create Add a node to the cluster.
@Cleanup Cluster clusterZKInstance1 = new ClusterZKImpl(client, ClusterType.HOST);
clusterZKInstance1.registerHost(new Host(HOST_1, PORT, null));
assertEquals(HOST_1, nodeAddedQueue.poll(5, TimeUnit.SECONDS));
clusterZKInstance1.deregisterHost(new Host(HOST_1, PORT, null));
assertEquals(HOST_1, nodeRemovedQueue.poll(5, TimeUnit.SECONDS));
Exception exception = exceptionsQueue.poll();
if (exception != null) {
throw exception;
}
}
use of io.pravega.common.cluster.Host in project pravega by pravega.
the class SegmentContainerMonitorTest method testMonitor.
private void testMonitor(HostControllerStore hostStore, List<CompletableFuture<Void>> latches) throws Exception {
// To coordinate the test cases.
Semaphore sync = new Semaphore(0);
// Decorating hostStore to add the coordination logic using semaphore.
class MockHostControllerStore implements HostControllerStore {
@Override
public Map<Host, Set<Integer>> getHostContainersMap() {
return hostStore.getHostContainersMap();
}
@Override
public void updateHostContainersMap(Map<Host, Set<Integer>> newMapping) {
hostStore.updateHostContainersMap(newMapping);
// Notify the test case of the update.
sync.release();
}
@Override
public int getContainerCount() {
return hostStore.getContainerCount();
}
@Override
public Host getHostForSegment(String scope, String stream, long segmentNumber) {
return null;
}
@Override
public Host getHostForTableSegment(String table) {
return null;
}
}
SegmentContainerMonitor monitor = new SegmentContainerMonitor(new MockHostControllerStore(), PRAVEGA_ZK_CURATOR_RESOURCE.client, new UniformContainerBalancer(), 2);
monitor.startAsync().awaitRunning();
assertEquals(hostStore.getContainerCount(), Config.HOST_STORE_CONTAINER_COUNT);
// Rebalance should be triggered for the very first attempt. Verify that no hosts are added to the store.
assertTrue(sync.tryAcquire(10, TimeUnit.SECONDS));
if (latches != null) {
latches.get(1).join();
}
assertEquals(0, hostStore.getHostContainersMap().size());
// New host added.
cluster.registerHost(new Host("localhost1", 1, null));
assertTrue(sync.tryAcquire(10, TimeUnit.SECONDS));
if (latches != null) {
latches.get(2).join();
}
assertEquals(1, hostStore.getHostContainersMap().size());
// Multiple hosts added and removed.
cluster.registerHost(new Host("localhost2", 2, null));
cluster.registerHost(new Host("localhost3", 3, null));
cluster.registerHost(new Host("localhost4", 4, null));
cluster.deregisterHost(new Host("localhost1", 1, null));
assertTrue(sync.tryAcquire(10, TimeUnit.SECONDS));
if (latches != null) {
latches.get(3).join();
}
assertEquals(3, hostStore.getHostContainersMap().size());
// Add a host.
cluster.registerHost(new Host("localhost1", 1, null));
// Rebalance should not have been triggered since the min rebalance interval is not yet elapsed.
assertEquals(3, hostStore.getHostContainersMap().size());
// Wait for rebalance and verify the host update.
assertTrue(sync.tryAcquire(10, TimeUnit.SECONDS));
if (latches != null) {
latches.get(4).join();
}
assertEquals(4, hostStore.getHostContainersMap().size());
monitor.shutDown();
}
use of io.pravega.common.cluster.Host in project pravega by pravega.
the class UniformContainerBalancerTest method testRebalancer.
@Test(timeout = 5000)
public void testRebalancer() {
UniformContainerBalancer balancer = new UniformContainerBalancer();
// Validate empty host.
HashSet<Host> hosts = new HashSet<>();
Map<Host, Set<Integer>> rebalance = balancer.rebalance(new HashMap<>(), hosts);
assertEquals(0, rebalance.size());
// Validate initialization.
hosts.add(new Host("host1", 123, null));
rebalance = balancer.rebalance(new HashMap<>(), hosts);
assertEquals(1, rebalance.size());
validateContainerCount(rebalance, hosts);
// New host added.
hosts.add(new Host("host2", 123, null));
rebalance = balancer.rebalance(rebalance, hosts);
assertEquals(2, rebalance.size());
validateContainerCount(rebalance, hosts);
// Add multiple hosts.
hosts.add(new Host("host3", 123, null));
hosts.add(new Host("host4", 123, null));
rebalance = balancer.rebalance(rebalance, hosts);
assertEquals(4, rebalance.size());
validateContainerCount(rebalance, hosts);
// Remove host.
hosts.remove(new Host("host2", 123, null));
rebalance = balancer.rebalance(rebalance, hosts);
assertEquals(3, rebalance.size());
validateContainerCount(rebalance, hosts);
// Add and remove multiple hosts.
hosts.add(new Host("host2", 123, null));
hosts.add(new Host("host5", 123, null));
hosts.add(new Host("host6", 123, null));
hosts.add(new Host("host7", 123, null));
hosts.add(new Host("host8", 123, null));
hosts.add(new Host("host9", 123, null));
hosts.remove(new Host("host1", 123, null));
hosts.remove(new Host("host3", 123, null));
hosts.remove(new Host("host4", 123, null));
rebalance = balancer.rebalance(rebalance, hosts);
assertEquals(6, rebalance.size());
validateContainerCount(rebalance, hosts);
// Remove multiple hosts.
hosts.remove(new Host("host2", 123, null));
hosts.remove(new Host("host5", 123, null));
hosts.remove(new Host("host6", 123, null));
rebalance = balancer.rebalance(rebalance, hosts);
assertEquals(3, rebalance.size());
validateContainerCount(rebalance, hosts);
// No changes.
rebalance = balancer.rebalance(rebalance, hosts);
assertEquals(3, rebalance.size());
validateContainerCount(rebalance, hosts);
}
Aggregations