use of io.pravega.common.cluster.zkImpl.ClusterZKImpl in project pravega by pravega.
the class SegmentMonitorLeader method takeLeadership.
/**
* This function is called when the current instance is made the leader. The leadership is relinquished when this
* function exits.
*
* @param client The curator client.
* @throws Exception On any error. This would result in leadership being relinquished.
*/
@Override
@Synchronized
public void takeLeadership(CuratorFramework client) throws Exception {
log.info("Obtained leadership to monitor the Host to Segment Container Mapping");
// Attempt a rebalance whenever leadership is obtained to ensure no host events are missed.
hostsChange.release();
// Start cluster monitor.
pravegaServiceCluster = new ClusterZKImpl(client, ClusterType.HOST);
// Add listener to track host changes on the monitored pravega cluster.
pravegaServiceCluster.addListener((type, host) -> {
switch(type) {
case HOST_ADDED:
case HOST_REMOVED:
// We don't keep track of the hosts and we always query for the entire set from the cluster
// when changes occur. This is to avoid any inconsistencies if we miss any notifications.
log.info("Received event: {} for host: {}. Wake up leader for rebalancing", type, host);
hostsChange.release();
break;
case ERROR:
// This event should be due to ZK connection errors and would have been received by the monitor too,
// hence not handling it explicitly here.
log.info("Received error event when monitoring the pravega host cluster, ignoring...");
break;
}
});
// Keep looping here as long as possible to stay as the leader and exclusively monitor the pravega cluster.
while (true) {
try {
if (suspended.get()) {
log.info("Monitor is suspended, waiting for notification to resume");
suspendMonitor.acquire();
log.info("Resuming monitor");
}
hostsChange.acquire();
log.info("Received rebalance event");
// Wait here until rebalance can be performed.
waitForRebalance();
// Clear all events that has been received until this point since this will be included in the current
// rebalance operation.
hostsChange.drainPermits();
triggerRebalance();
} catch (InterruptedException e) {
log.warn("Leadership interrupted, releasing monitor thread");
// Stop watching the pravega cluster.
pravegaServiceCluster.close();
throw e;
} catch (Exception e) {
// We will not release leadership if in suspended mode.
if (!suspended.get()) {
log.warn("Failed to perform rebalancing, relinquishing leadership");
// Stop watching the pravega cluster.
pravegaServiceCluster.close();
throw e;
}
}
}
}
use of io.pravega.common.cluster.zkImpl.ClusterZKImpl in project pravega by pravega.
the class ControllerServiceStarter method startUp.
@Override
protected void startUp() {
long traceId = LoggerHelpers.traceEnterWithContext(log, this.objectId, "startUp");
log.info("Initiating controller service startUp");
log.info("Event processors enabled = {}", serviceConfig.getEventProcessorConfig().isPresent());
log.info("Cluster listener enabled = {}", serviceConfig.isControllerClusterListenerEnabled());
log.info(" Host monitor enabled = {}", serviceConfig.getHostMonitorConfig().isHostMonitorEnabled());
log.info(" gRPC server enabled = {}", serviceConfig.getGRPCServerConfig().isPresent());
log.info(" REST server enabled = {}", serviceConfig.getRestServerConfig().isPresent());
final StreamMetadataStore streamStore;
final TaskMetadataStore taskMetadataStore;
final HostControllerStore hostStore;
final CheckpointStore checkpointStore;
try {
// Initialize the executor service.
controllerExecutor = ExecutorServiceHelpers.newScheduledThreadPool(serviceConfig.getThreadPoolSize(), "controllerpool");
retentionExecutor = ExecutorServiceHelpers.newScheduledThreadPool(Config.RETENTION_THREAD_POOL_SIZE, "retentionpool");
log.info("Creating the stream store");
streamStore = StreamStoreFactory.createStore(storeClient, controllerExecutor);
log.info("Creating the task store");
taskMetadataStore = TaskStoreFactory.createStore(storeClient, controllerExecutor);
log.info("Creating the host store");
hostStore = HostStoreFactory.createStore(serviceConfig.getHostMonitorConfig(), storeClient);
log.info("Creating the checkpoint store");
checkpointStore = CheckpointStoreFactory.create(storeClient);
// On each controller process restart, we use a fresh hostId,
// which is a combination of hostname and random GUID.
String hostName = getHostName();
Host host = new Host(hostName, getPort(), UUID.randomUUID().toString());
if (serviceConfig.getHostMonitorConfig().isHostMonitorEnabled()) {
// Start the Segment Container Monitor.
monitor = new SegmentContainerMonitor(hostStore, (CuratorFramework) storeClient.getClient(), new UniformContainerBalancer(), serviceConfig.getHostMonitorConfig().getHostMonitorMinRebalanceInterval());
log.info("Starting segment container monitor");
monitor.startAsync();
}
ClientConfig clientConfig = ClientConfig.builder().controllerURI(URI.create((serviceConfig.getGRPCServerConfig().get().isTlsEnabled() ? "tls://" : "tcp://") + "localhost")).trustStore(serviceConfig.getGRPCServerConfig().get().getTlsTrustStore()).validateHostName(false).build();
connectionFactory = new ConnectionFactoryImpl(clientConfig);
SegmentHelper segmentHelper = new SegmentHelper();
streamMetadataTasks = new StreamMetadataTasks(streamStore, hostStore, taskMetadataStore, segmentHelper, controllerExecutor, host.getHostId(), connectionFactory, serviceConfig.getGRPCServerConfig().get().isAuthorizationEnabled(), serviceConfig.getGRPCServerConfig().get().getTokenSigningKey());
streamTransactionMetadataTasks = new StreamTransactionMetadataTasks(streamStore, hostStore, segmentHelper, controllerExecutor, host.getHostId(), serviceConfig.getTimeoutServiceConfig(), connectionFactory, serviceConfig.getGRPCServerConfig().get().isAuthorizationEnabled(), serviceConfig.getGRPCServerConfig().get().getTokenSigningKey());
streamCutService = new StreamCutService(Config.BUCKET_COUNT, host.getHostId(), streamStore, streamMetadataTasks, retentionExecutor);
log.info("starting auto retention service asynchronously");
streamCutService.startAsync();
streamCutService.awaitRunning();
// Controller has a mechanism to track the currently active controller host instances. On detecting a failure of
// any controller instance, the failure detector stores the failed HostId in a failed hosts directory (FH), and
// invokes the taskSweeper.sweepOrphanedTasks for each failed host. When all resources under the failed hostId
// are processed and deleted, that failed HostId is removed from FH folder.
// Moreover, on controller process startup, it detects any hostIds not in the currently active set of
// controllers and starts sweeping tasks orphaned by those hostIds.
TaskSweeper taskSweeper = new TaskSweeper(taskMetadataStore, host.getHostId(), controllerExecutor, streamMetadataTasks);
TxnSweeper txnSweeper = new TxnSweeper(streamStore, streamTransactionMetadataTasks, serviceConfig.getTimeoutServiceConfig().getMaxLeaseValue(), controllerExecutor);
if (serviceConfig.isControllerClusterListenerEnabled()) {
cluster = new ClusterZKImpl((CuratorFramework) storeClient.getClient(), ClusterType.CONTROLLER);
}
controllerService = new ControllerService(streamStore, hostStore, streamMetadataTasks, streamTransactionMetadataTasks, new SegmentHelper(), controllerExecutor, cluster);
// Setup event processors.
setController(new LocalController(controllerService, serviceConfig.getGRPCServerConfig().get().isAuthorizationEnabled(), serviceConfig.getGRPCServerConfig().get().getTokenSigningKey()));
if (serviceConfig.getEventProcessorConfig().isPresent()) {
// Create ControllerEventProcessor object.
controllerEventProcessors = new ControllerEventProcessors(host.getHostId(), serviceConfig.getEventProcessorConfig().get(), localController, checkpointStore, streamStore, hostStore, segmentHelper, connectionFactory, streamMetadataTasks, controllerExecutor);
// Bootstrap and start it asynchronously.
log.info("Starting event processors");
controllerEventProcessors.bootstrap(streamTransactionMetadataTasks, streamMetadataTasks).thenAcceptAsync(x -> controllerEventProcessors.startAsync(), controllerExecutor);
}
// Setup and start controller cluster listener after all sweepers have been initialized.
if (serviceConfig.isControllerClusterListenerEnabled()) {
List<FailoverSweeper> failoverSweepers = new ArrayList<>();
failoverSweepers.add(taskSweeper);
failoverSweepers.add(txnSweeper);
if (serviceConfig.getEventProcessorConfig().isPresent()) {
assert controllerEventProcessors != null;
failoverSweepers.add(controllerEventProcessors);
}
controllerClusterListener = new ControllerClusterListener(host, cluster, controllerExecutor, failoverSweepers);
log.info("Starting controller cluster listener");
controllerClusterListener.startAsync();
}
// Start RPC server.
if (serviceConfig.getGRPCServerConfig().isPresent()) {
grpcServer = new GRPCServer(controllerService, serviceConfig.getGRPCServerConfig().get());
grpcServer.startAsync();
log.info("Awaiting start of rpc server");
grpcServer.awaitRunning();
}
// Start REST server.
if (serviceConfig.getRestServerConfig().isPresent()) {
restServer = new RESTServer(this.localController, controllerService, grpcServer.getPravegaAuthManager(), serviceConfig.getRestServerConfig().get(), connectionFactory);
restServer.startAsync();
log.info("Awaiting start of REST server");
restServer.awaitRunning();
}
// Wait for controller event processors to start.
if (serviceConfig.getEventProcessorConfig().isPresent()) {
log.info("Awaiting start of controller event processors");
controllerEventProcessors.awaitRunning();
}
// Wait for controller cluster listeners to start.
if (serviceConfig.isControllerClusterListenerEnabled()) {
log.info("Awaiting start of controller cluster listener");
controllerClusterListener.awaitRunning();
}
} finally {
LoggerHelpers.traceLeave(log, this.objectId, "startUp", traceId);
}
}
use of io.pravega.common.cluster.zkImpl.ClusterZKImpl in project pravega by pravega.
the class SegmentContainerMonitorTest method startZookeeper.
@Before
public void startZookeeper() throws Exception {
zkTestServer = new TestingServerStarter().start();
String zkUrl = zkTestServer.getConnectString();
zkClient = CuratorFrameworkFactory.newClient(zkUrl, new ExponentialBackoffRetry(200, 10, 5000));
zkClient.start();
cluster = new ClusterZKImpl(zkClient, ClusterType.HOST);
}
use of io.pravega.common.cluster.zkImpl.ClusterZKImpl in project pravega by pravega.
the class ControllerClusterListenerTest method setup.
@Before
public void setup() throws Exception {
// 1. Start ZK server.
zkServer = new TestingServerStarter().start();
// 2. Start ZK client.
curatorClient = CuratorFrameworkFactory.newClient(zkServer.getConnectString(), new ExponentialBackoffRetry(200, 10, 5000));
curatorClient.start();
// 3. Start executor service.
executor = Executors.newScheduledThreadPool(5);
// 4. start cluster event listener
clusterZK = new ClusterZKImpl(curatorClient, ClusterType.CONTROLLER);
clusterZK.addListener((eventType, host) -> {
switch(eventType) {
case HOST_ADDED:
nodeAddedQueue.offer(host.getHostId());
break;
case HOST_REMOVED:
nodeRemovedQueue.offer(host.getHostId());
break;
case ERROR:
default:
break;
}
});
}
use of io.pravega.common.cluster.zkImpl.ClusterZKImpl in project pravega by pravega.
the class ZKControllerServiceImplTest method setup.
@Override
public void setup() throws Exception {
final StreamMetadataStore streamStore;
final HostControllerStore hostStore;
final TaskMetadataStore taskMetadataStore;
final SegmentHelper segmentHelper;
zkServer = new TestingServerStarter().start();
zkServer.start();
zkClient = CuratorFrameworkFactory.newClient(zkServer.getConnectString(), new ExponentialBackoffRetry(200, 10, 5000));
zkClient.start();
storeClient = StoreClientFactory.createZKStoreClient(zkClient);
executorService = ExecutorServiceHelpers.newScheduledThreadPool(20, "testpool");
taskMetadataStore = TaskStoreFactory.createStore(storeClient, executorService);
hostStore = HostStoreFactory.createInMemoryStore(HostMonitorConfigImpl.dummyConfig());
streamStore = StreamStoreFactory.createZKStore(zkClient, executorService);
segmentHelper = SegmentHelperMock.getSegmentHelperMock();
ConnectionFactoryImpl connectionFactory = new ConnectionFactoryImpl(ClientConfig.builder().build());
streamMetadataTasks = new StreamMetadataTasks(streamStore, hostStore, taskMetadataStore, segmentHelper, executorService, "host", connectionFactory, false, "");
this.streamRequestHandler = new StreamRequestHandler(new AutoScaleTask(streamMetadataTasks, streamStore, executorService), new ScaleOperationTask(streamMetadataTasks, streamStore, executorService), new UpdateStreamTask(streamMetadataTasks, streamStore, executorService), new SealStreamTask(streamMetadataTasks, streamStore, executorService), new DeleteStreamTask(streamMetadataTasks, streamStore, executorService), new TruncateStreamTask(streamMetadataTasks, streamStore, executorService), executorService);
streamMetadataTasks.setRequestEventWriter(new ControllerEventStreamWriterMock(streamRequestHandler, executorService));
streamTransactionMetadataTasks = new StreamTransactionMetadataTasks(streamStore, hostStore, segmentHelper, executorService, "host", connectionFactory, false, "");
streamTransactionMetadataTasks.initializeStreamWriters("commitStream", new EventStreamWriterMock<>(), "abortStream", new EventStreamWriterMock<>());
cluster = new ClusterZKImpl(zkClient, ClusterType.CONTROLLER);
final CountDownLatch latch = new CountDownLatch(1);
cluster.addListener((type, host) -> latch.countDown());
cluster.registerHost(new Host("localhost", 9090, null));
latch.await();
ControllerService controller = new ControllerService(streamStore, hostStore, streamMetadataTasks, streamTransactionMetadataTasks, new SegmentHelper(), executorService, cluster);
controllerService = new ControllerServiceImpl(controller, "", false);
}
Aggregations