use of io.pravega.controller.task.Stream.StreamTransactionMetadataTasks in project pravega by pravega.
the class ControllerServiceStarter method startUp.
@Override
protected void startUp() {
long traceId = LoggerHelpers.traceEnterWithContext(log, this.objectId, "startUp");
log.info("Initiating controller service startUp");
log.info("Event processors enabled = {}", serviceConfig.getEventProcessorConfig().isPresent());
log.info("Cluster listener enabled = {}", serviceConfig.isControllerClusterListenerEnabled());
log.info(" Host monitor enabled = {}", serviceConfig.getHostMonitorConfig().isHostMonitorEnabled());
log.info(" gRPC server enabled = {}", serviceConfig.getGRPCServerConfig().isPresent());
log.info(" REST server enabled = {}", serviceConfig.getRestServerConfig().isPresent());
final StreamMetadataStore streamStore;
final TaskMetadataStore taskMetadataStore;
final HostControllerStore hostStore;
final CheckpointStore checkpointStore;
try {
// Initialize the executor service.
controllerExecutor = ExecutorServiceHelpers.newScheduledThreadPool(serviceConfig.getThreadPoolSize(), "controllerpool");
retentionExecutor = ExecutorServiceHelpers.newScheduledThreadPool(Config.RETENTION_THREAD_POOL_SIZE, "retentionpool");
log.info("Creating the stream store");
streamStore = StreamStoreFactory.createStore(storeClient, controllerExecutor);
log.info("Creating the task store");
taskMetadataStore = TaskStoreFactory.createStore(storeClient, controllerExecutor);
log.info("Creating the host store");
hostStore = HostStoreFactory.createStore(serviceConfig.getHostMonitorConfig(), storeClient);
log.info("Creating the checkpoint store");
checkpointStore = CheckpointStoreFactory.create(storeClient);
// On each controller process restart, we use a fresh hostId,
// which is a combination of hostname and random GUID.
String hostName = getHostName();
Host host = new Host(hostName, getPort(), UUID.randomUUID().toString());
if (serviceConfig.getHostMonitorConfig().isHostMonitorEnabled()) {
// Start the Segment Container Monitor.
monitor = new SegmentContainerMonitor(hostStore, (CuratorFramework) storeClient.getClient(), new UniformContainerBalancer(), serviceConfig.getHostMonitorConfig().getHostMonitorMinRebalanceInterval());
log.info("Starting segment container monitor");
monitor.startAsync();
}
ClientConfig clientConfig = ClientConfig.builder().controllerURI(URI.create((serviceConfig.getGRPCServerConfig().get().isTlsEnabled() ? "tls://" : "tcp://") + "localhost")).trustStore(serviceConfig.getGRPCServerConfig().get().getTlsTrustStore()).validateHostName(false).build();
connectionFactory = new ConnectionFactoryImpl(clientConfig);
SegmentHelper segmentHelper = new SegmentHelper();
streamMetadataTasks = new StreamMetadataTasks(streamStore, hostStore, taskMetadataStore, segmentHelper, controllerExecutor, host.getHostId(), connectionFactory, serviceConfig.getGRPCServerConfig().get().isAuthorizationEnabled(), serviceConfig.getGRPCServerConfig().get().getTokenSigningKey());
streamTransactionMetadataTasks = new StreamTransactionMetadataTasks(streamStore, hostStore, segmentHelper, controllerExecutor, host.getHostId(), serviceConfig.getTimeoutServiceConfig(), connectionFactory, serviceConfig.getGRPCServerConfig().get().isAuthorizationEnabled(), serviceConfig.getGRPCServerConfig().get().getTokenSigningKey());
streamCutService = new StreamCutService(Config.BUCKET_COUNT, host.getHostId(), streamStore, streamMetadataTasks, retentionExecutor);
log.info("starting auto retention service asynchronously");
streamCutService.startAsync();
streamCutService.awaitRunning();
// Controller has a mechanism to track the currently active controller host instances. On detecting a failure of
// any controller instance, the failure detector stores the failed HostId in a failed hosts directory (FH), and
// invokes the taskSweeper.sweepOrphanedTasks for each failed host. When all resources under the failed hostId
// are processed and deleted, that failed HostId is removed from FH folder.
// Moreover, on controller process startup, it detects any hostIds not in the currently active set of
// controllers and starts sweeping tasks orphaned by those hostIds.
TaskSweeper taskSweeper = new TaskSweeper(taskMetadataStore, host.getHostId(), controllerExecutor, streamMetadataTasks);
TxnSweeper txnSweeper = new TxnSweeper(streamStore, streamTransactionMetadataTasks, serviceConfig.getTimeoutServiceConfig().getMaxLeaseValue(), controllerExecutor);
if (serviceConfig.isControllerClusterListenerEnabled()) {
cluster = new ClusterZKImpl((CuratorFramework) storeClient.getClient(), ClusterType.CONTROLLER);
}
controllerService = new ControllerService(streamStore, hostStore, streamMetadataTasks, streamTransactionMetadataTasks, new SegmentHelper(), controllerExecutor, cluster);
// Setup event processors.
setController(new LocalController(controllerService, serviceConfig.getGRPCServerConfig().get().isAuthorizationEnabled(), serviceConfig.getGRPCServerConfig().get().getTokenSigningKey()));
if (serviceConfig.getEventProcessorConfig().isPresent()) {
// Create ControllerEventProcessor object.
controllerEventProcessors = new ControllerEventProcessors(host.getHostId(), serviceConfig.getEventProcessorConfig().get(), localController, checkpointStore, streamStore, hostStore, segmentHelper, connectionFactory, streamMetadataTasks, controllerExecutor);
// Bootstrap and start it asynchronously.
log.info("Starting event processors");
controllerEventProcessors.bootstrap(streamTransactionMetadataTasks, streamMetadataTasks).thenAcceptAsync(x -> controllerEventProcessors.startAsync(), controllerExecutor);
}
// Setup and start controller cluster listener after all sweepers have been initialized.
if (serviceConfig.isControllerClusterListenerEnabled()) {
List<FailoverSweeper> failoverSweepers = new ArrayList<>();
failoverSweepers.add(taskSweeper);
failoverSweepers.add(txnSweeper);
if (serviceConfig.getEventProcessorConfig().isPresent()) {
assert controllerEventProcessors != null;
failoverSweepers.add(controllerEventProcessors);
}
controllerClusterListener = new ControllerClusterListener(host, cluster, controllerExecutor, failoverSweepers);
log.info("Starting controller cluster listener");
controllerClusterListener.startAsync();
}
// Start RPC server.
if (serviceConfig.getGRPCServerConfig().isPresent()) {
grpcServer = new GRPCServer(controllerService, serviceConfig.getGRPCServerConfig().get());
grpcServer.startAsync();
log.info("Awaiting start of rpc server");
grpcServer.awaitRunning();
}
// Start REST server.
if (serviceConfig.getRestServerConfig().isPresent()) {
restServer = new RESTServer(this.localController, controllerService, grpcServer.getPravegaAuthManager(), serviceConfig.getRestServerConfig().get(), connectionFactory);
restServer.startAsync();
log.info("Awaiting start of REST server");
restServer.awaitRunning();
}
// Wait for controller event processors to start.
if (serviceConfig.getEventProcessorConfig().isPresent()) {
log.info("Awaiting start of controller event processors");
controllerEventProcessors.awaitRunning();
}
// Wait for controller cluster listeners to start.
if (serviceConfig.isControllerClusterListenerEnabled()) {
log.info("Awaiting start of controller cluster listener");
controllerClusterListener.awaitRunning();
}
} finally {
LoggerHelpers.traceLeave(log, this.objectId, "startUp", traceId);
}
}
use of io.pravega.controller.task.Stream.StreamTransactionMetadataTasks in project pravega by pravega.
the class ControllerEventProcessorsTest method testHandleOrphaned.
@Test(timeout = 10000)
public void testHandleOrphaned() {
Controller localController = mock(Controller.class);
CheckpointStore checkpointStore = mock(CheckpointStore.class);
StreamMetadataStore streamStore = mock(StreamMetadataStore.class);
HostControllerStore hostStore = mock(HostControllerStore.class);
ConnectionFactory connectionFactory = mock(ConnectionFactory.class);
StreamMetadataTasks streamMetadataTasks = mock(StreamMetadataTasks.class);
StreamTransactionMetadataTasks streamTransactionMetadataTasks = mock(StreamTransactionMetadataTasks.class);
executor = Executors.newSingleThreadScheduledExecutor();
ControllerEventProcessorConfig config = ControllerEventProcessorConfigImpl.withDefault();
EventProcessorSystem system = mock(EventProcessorSystem.class);
EventProcessorGroup<ControllerEvent> processor = new EventProcessorGroup<ControllerEvent>() {
@Override
public void notifyProcessFailure(String process) throws CheckpointStoreException {
}
@Override
public EventStreamWriter<ControllerEvent> getWriter() {
return null;
}
@Override
public Set<String> getProcesses() throws CheckpointStoreException {
return Sets.newHashSet("host1", "host2");
}
@Override
public Service startAsync() {
return null;
}
@Override
public boolean isRunning() {
return false;
}
@Override
public State state() {
return null;
}
@Override
public Service stopAsync() {
return null;
}
@Override
public void awaitRunning() {
}
@Override
public void awaitRunning(long timeout, TimeUnit unit) throws TimeoutException {
}
@Override
public void awaitTerminated() {
}
@Override
public void awaitTerminated(long timeout, TimeUnit unit) throws TimeoutException {
}
@Override
public Throwable failureCause() {
return null;
}
@Override
public void addListener(Listener listener, Executor executor) {
}
@Override
public void close() throws Exception {
}
};
try {
when(system.createEventProcessorGroup(any(), any())).thenReturn(processor);
} catch (CheckpointStoreException e) {
e.printStackTrace();
}
ControllerEventProcessors processors = new ControllerEventProcessors("host1", config, localController, checkpointStore, streamStore, hostStore, SegmentHelperMock.getSegmentHelperMock(), connectionFactory, streamMetadataTasks, system, executor);
processors.startAsync();
processors.awaitRunning();
assertTrue(Futures.await(processors.sweepFailedProcesses(() -> Sets.newHashSet("host1"))));
assertTrue(Futures.await(processors.handleFailedProcess("host1")));
processors.shutDown();
}
use of io.pravega.controller.task.Stream.StreamTransactionMetadataTasks in project pravega by pravega.
the class TimeoutServiceTest method testPingOwnershipTransfer.
@Test(timeout = 30000)
public void testPingOwnershipTransfer() throws Exception {
StreamMetadataStore streamStore2 = StreamStoreFactory.createZKStore(client, executor);
HostControllerStore hostStore = HostStoreFactory.createInMemoryStore(HostMonitorConfigImpl.dummyConfig());
TaskMetadataStore taskMetadataStore = TaskStoreFactory.createStore(storeClient, executor);
ConnectionFactoryImpl connectionFactory = new ConnectionFactoryImpl(ClientConfig.builder().build());
@Cleanup StreamMetadataTasks streamMetadataTasks2 = new StreamMetadataTasks(streamStore2, hostStore, taskMetadataStore, new SegmentHelper(), executor, "2", connectionFactory, false, "");
@Cleanup StreamTransactionMetadataTasks streamTransactionMetadataTasks2 = new StreamTransactionMetadataTasks(streamStore2, hostStore, SegmentHelperMock.getSegmentHelperMock(), executor, "2", TimeoutServiceConfig.defaultConfig(), new LinkedBlockingQueue<>(5), connectionFactory, false, "");
streamTransactionMetadataTasks2.initializeStreamWriters("commitStream", new EventStreamWriterMock<>(), "abortStream", new EventStreamWriterMock<>());
// Create TimeoutService
TimerWheelTimeoutService timeoutService2 = (TimerWheelTimeoutService) streamTransactionMetadataTasks2.getTimeoutService();
ControllerService controllerService2 = new ControllerService(streamStore2, hostStore, streamMetadataTasks2, streamTransactionMetadataTasks2, new SegmentHelper(), executor, null);
TxnId txnId = controllerService.createTransaction(SCOPE, STREAM, LEASE, SCALE_GRACE_PERIOD).thenApply(x -> ModelHelper.decode(x.getKey())).join();
VersionedTransactionData txnData = streamStore.getTransactionData(SCOPE, STREAM, ModelHelper.encode(txnId), null, executor).join();
Assert.assertEquals(txnData.getVersion(), 0);
Optional<Throwable> result = timeoutService.getTaskCompletionQueue().poll((long) (0.75 * LEASE), TimeUnit.MILLISECONDS);
Assert.assertNull(result);
TxnState txnState = controllerService.checkTransactionStatus(SCOPE, STREAM, txnId).join();
Assert.assertEquals(TxnState.State.OPEN, txnState.getState());
// increasing lease -> total effective lease = 3 * LEASE
PingTxnStatus pingStatus = controllerService2.pingTransaction(SCOPE, STREAM, txnId, 2 * LEASE).join();
Assert.assertEquals(PingTxnStatus.Status.OK, pingStatus.getStatus());
txnData = streamStore.getTransactionData(SCOPE, STREAM, ModelHelper.encode(txnId), null, executor).join();
Assert.assertEquals(txnData.getVersion(), 1);
// timeoutService1 should believe that LEASE has expired and should get non empty completion tasks
result = timeoutService.getTaskCompletionQueue().poll((long) (1.3 * LEASE + RETRY_DELAY), TimeUnit.MILLISECONDS);
Assert.assertNotNull(result);
// the txn may have been attempted to be aborted by timeoutService1 but would have failed. So txn to remain open
txnState = controllerService.checkTransactionStatus(SCOPE, STREAM, txnId).join();
Assert.assertEquals(TxnState.State.OPEN, txnState.getState());
// timeoutService2 should continue to wait on lease expiry and should get empty completion tasks
result = timeoutService2.getTaskCompletionQueue().poll(0L, TimeUnit.MILLISECONDS);
Assert.assertNull(result);
result = timeoutService2.getTaskCompletionQueue().poll(2 * LEASE + RETRY_DELAY, TimeUnit.MILLISECONDS);
Assert.assertNotNull(result);
// now txn should have moved to aborting because timeoutservice2 has initiated abort
txnState = controllerService.checkTransactionStatus(SCOPE, STREAM, txnId).join();
Assert.assertEquals(TxnState.State.ABORTING, txnState.getState());
}
use of io.pravega.controller.task.Stream.StreamTransactionMetadataTasks in project pravega by pravega.
the class ControllerClusterListenerTest method clusterListenerTest.
@Test(timeout = 60000L)
public void clusterListenerTest() throws InterruptedException {
String hostName = "localhost";
Host host = new Host(hostName, 10, "host1");
// Create task sweeper.
TaskMetadataStore taskStore = TaskStoreFactory.createInMemoryStore(executor);
TaskSweeper taskSweeper = new TaskSweeper(taskStore, host.getHostId(), executor, new TestTasks(taskStore, executor, host.getHostId()));
// Create txn sweeper.
StreamMetadataStore streamStore = StreamStoreFactory.createInMemoryStore(executor);
HostControllerStore hostStore = HostStoreFactory.createInMemoryStore(HostMonitorConfigImpl.dummyConfig());
SegmentHelper segmentHelper = SegmentHelperMock.getSegmentHelperMock();
ConnectionFactory connectionFactory = mock(ConnectionFactory.class);
StreamTransactionMetadataTasks txnTasks = new StreamTransactionMetadataTasks(streamStore, hostStore, segmentHelper, executor, host.getHostId(), connectionFactory, false, "");
txnTasks.initializeStreamWriters("commitStream", new EventStreamWriterMock<>(), "abortStream", new EventStreamWriterMock<>());
TxnSweeper txnSweeper = new TxnSweeper(streamStore, txnTasks, 100, executor);
// Create ControllerClusterListener.
ControllerClusterListener clusterListener = new ControllerClusterListener(host, clusterZK, executor, Lists.newArrayList(taskSweeper, txnSweeper));
clusterListener.startAsync();
clusterListener.awaitRunning();
validateAddedNode(host.getHostId());
// Add a new host
Host host1 = new Host(hostName, 20, "host2");
clusterZK.registerHost(host1);
validateAddedNode(host1.getHostId());
clusterZK.deregisterHost(host1);
validateRemovedNode(host1.getHostId());
clusterListener.stopAsync();
clusterListener.awaitTerminated();
validateRemovedNode(host.getHostId());
}
use of io.pravega.controller.task.Stream.StreamTransactionMetadataTasks in project pravega by pravega.
the class ControllerClusterListenerTest method clusterListenerStarterTest.
@Test(timeout = 60000L)
public void clusterListenerStarterTest() throws InterruptedException, ExecutionException {
String hostName = "localhost";
Host host = new Host(hostName, 10, "originalhost");
// Following futures are used as latches. When awaitRunning a sweeper, we wait on a latch by calling
// Futures.await across the test case.
// Future for ensuring that task sweeper is ready and we let the sweep happen.
CompletableFuture<Void> taskSweep = new CompletableFuture<>();
// Future for when taskSweeper.failedHost is called once
CompletableFuture<Void> taskHostSweep1 = new CompletableFuture<>();
// Future for when taskSweeper.failedHost is called second time
CompletableFuture<Void> taskHostSweep2 = new CompletableFuture<>();
// Future for txn sweeper to get ready.
CompletableFuture<Void> txnSweep = new CompletableFuture<>();
// Future for txnsweeper.failedProcess to be called the first time
CompletableFuture<Void> txnHostSweepIgnore = new CompletableFuture<>();
CompletableFuture<Void> txnHostSweep2 = new CompletableFuture<>();
// Create task sweeper.
TaskMetadataStore taskStore = TaskStoreFactory.createZKStore(curatorClient, executor);
TaskSweeper taskSweeper = spy(new TaskSweeper(taskStore, host.getHostId(), executor, new TestTasks(taskStore, executor, host.getHostId())));
when(taskSweeper.sweepFailedProcesses(any(Supplier.class))).thenAnswer(invocation -> {
if (!taskSweep.isDone()) {
// we complete the future when this method is called for the first time.
taskSweep.complete(null);
}
return CompletableFuture.completedFuture(null);
});
when(taskSweeper.handleFailedProcess(anyString())).thenAnswer(invocation -> {
if (!taskHostSweep1.isDone()) {
// we complete this future when task sweeper for a failed host is called for the first time.
taskHostSweep1.complete(null);
} else if (!taskHostSweep2.isDone()) {
// we complete this future when task sweeper for a failed host is called for the second time
taskHostSweep2.complete(null);
}
return CompletableFuture.completedFuture(null);
});
// Create txn sweeper.
StreamMetadataStore streamStore = StreamStoreFactory.createInMemoryStore(executor);
HostControllerStore hostStore = HostStoreFactory.createInMemoryStore(HostMonitorConfigImpl.dummyConfig());
SegmentHelper segmentHelper = SegmentHelperMock.getSegmentHelperMock();
ConnectionFactory connectionFactory = mock(ConnectionFactory.class);
// create streamtransactionmetadatatasks but dont initialize it with writers. this will not be
// ready until writers are supplied.
StreamTransactionMetadataTasks txnTasks = new StreamTransactionMetadataTasks(streamStore, hostStore, segmentHelper, executor, host.getHostId(), connectionFactory, false, "");
TxnSweeper txnSweeper = spy(new TxnSweeper(streamStore, txnTasks, 100, executor));
// any attempt to sweep txnHost should have been ignored
doAnswer(invocation -> {
txnHostSweepIgnore.complete(null);
return false;
}).when(txnSweeper).isReady();
when(txnSweeper.sweepFailedProcesses(any())).thenAnswer(invocation -> {
if (!txnSweep.isDone()) {
txnSweep.complete(null);
}
return CompletableFuture.completedFuture(null);
});
when(txnSweeper.handleFailedProcess(anyString())).thenAnswer(invocation -> {
if (!txnHostSweep2.isDone()) {
txnHostSweep2.complete(null);
}
return CompletableFuture.completedFuture(null);
});
// Create ControllerClusterListener.
ControllerClusterListener clusterListener = new ControllerClusterListener(host, clusterZK, executor, Lists.newArrayList(taskSweeper, txnSweeper));
clusterListener.startAsync();
clusterListener.awaitRunning();
log.info("cluster started");
// ensure that task sweep happens after cluster listener becomes ready.
assertTrue(Futures.await(taskSweep, 3000));
log.info("task sweeper completed");
// ensure only tasks are swept
verify(taskSweeper, times(1)).sweepFailedProcesses(any(Supplier.class));
verify(txnSweeper, times(0)).sweepFailedProcesses(any());
verify(taskSweeper, times(0)).handleFailedProcess(anyString());
verify(txnSweeper, times(0)).handleFailedProcess(anyString());
validateAddedNode(host.getHostId());
log.info("adding new host");
// now add and remove a new host
Host newHost = new Host(hostName, 20, "newHost1");
clusterZK.registerHost(newHost);
validateAddedNode(newHost.getHostId());
clusterZK.deregisterHost(newHost);
validateRemovedNode(newHost.getHostId());
log.info("deregistering new host");
assertTrue(Futures.await(taskHostSweep1, 3000));
assertTrue(Futures.await(txnHostSweepIgnore, 10000));
log.info("task sweep for new host done");
// verify that all tasks are not swept again.
verify(taskSweeper, times(1)).sweepFailedProcesses(any(Supplier.class));
// verify that host specific sweep happens once.
verify(taskSweeper, atLeast(1)).handleFailedProcess(anyString());
// verify that txns are not yet swept as txnsweeper is not yet ready.
verify(txnSweeper, times(0)).sweepFailedProcesses(any());
verify(txnSweeper, times(0)).handleFailedProcess(anyString());
// verify that txn sweeper was checked to be ready. It would have found it not ready at this point
verify(txnSweeper, atLeast(1)).isReady();
// Reset the mock to call real method on txnsweeper.isReady.
doCallRealMethod().when(txnSweeper).isReady();
// Complete txn sweeper initialization by adding event writers.
txnTasks.initializeStreamWriters("commitStream", new EventStreamWriterMock<>(), "abortStream", new EventStreamWriterMock<>());
txnSweeper.awaitInitialization();
assertTrue(Futures.await(txnSweep, 3000));
// verify that post initialization txns are swept. And host specific txn sweep is also performed.
verify(txnSweeper, times(1)).sweepFailedProcesses(any());
// now add another host
newHost = new Host(hostName, 20, "newHost2");
clusterZK.registerHost(newHost);
validateAddedNode(newHost.getHostId());
clusterZK.deregisterHost(newHost);
log.info("removing newhost2");
validateRemovedNode(newHost.getHostId());
assertTrue(Futures.await(taskHostSweep2, 3000));
assertTrue(Futures.await(txnHostSweep2, 3000));
verify(taskSweeper, atLeast(2)).handleFailedProcess(anyString());
verify(txnSweeper, atLeast(1)).handleFailedProcess(anyString());
clusterListener.stopAsync();
clusterListener.awaitTerminated();
}
Aggregations