use of org.apache.samza.container.RunLoop in project samza by apache.
the class ContainerStorageManager method startSideInputs.
// Read sideInputs until all sideInputStreams are caughtup, so start() can return
private void startSideInputs() {
LOG.info("SideInput Restore started");
// initialize the sideInputStorageManagers
getSideInputHandlers().forEach(TaskSideInputHandler::init);
Map<TaskName, TaskSideInputHandler> taskSideInputHandlers = this.sspSideInputHandlers.values().stream().distinct().collect(Collectors.toMap(TaskSideInputHandler::getTaskName, Function.identity()));
Map<TaskName, TaskInstanceMetrics> sideInputTaskMetrics = new HashMap<>();
Map<TaskName, RunLoopTask> sideInputTasks = new HashMap<>();
this.taskSideInputStoreSSPs.forEach((taskName, storesToSSPs) -> {
Set<SystemStreamPartition> taskSSPs = this.taskSideInputStoreSSPs.get(taskName).values().stream().flatMap(Set::stream).collect(Collectors.toSet());
if (!taskSSPs.isEmpty()) {
String sideInputSource = SIDEINPUTS_METRICS_PREFIX + this.taskInstanceMetrics.get(taskName).source();
TaskInstanceMetrics sideInputMetrics = new TaskInstanceMetrics(sideInputSource, this.taskInstanceMetrics.get(taskName).registry(), SIDEINPUTS_METRICS_PREFIX);
sideInputTaskMetrics.put(taskName, sideInputMetrics);
RunLoopTask sideInputTask = new SideInputTask(taskName, taskSSPs, taskSideInputHandlers.get(taskName), sideInputTaskMetrics.get(taskName));
sideInputTasks.put(taskName, sideInputTask);
}
});
// register all sideInput SSPs with the consumers
for (SystemStreamPartition ssp : this.sspSideInputHandlers.keySet()) {
String startingOffset = this.sspSideInputHandlers.get(ssp).getStartingOffset(ssp);
if (startingOffset == null) {
throw new SamzaException("No starting offset could be obtained for SideInput SystemStreamPartition : " + ssp + ". Consumer cannot start.");
}
// register startingOffset with the sysConsumer and register a metric for it
sideInputSystemConsumers.register(ssp, startingOffset);
taskInstanceMetrics.get(this.sspSideInputHandlers.get(ssp).getTaskName()).addOffsetGauge(ssp, ScalaJavaUtil.toScalaFunction(() -> this.sspSideInputHandlers.get(ssp).getLastProcessedOffset(ssp)));
sideInputTaskMetrics.get(this.sspSideInputHandlers.get(ssp).getTaskName()).addOffsetGauge(ssp, ScalaJavaUtil.toScalaFunction(() -> this.sspSideInputHandlers.get(ssp).getLastProcessedOffset(ssp)));
}
// start the systemConsumers for consuming input
this.sideInputSystemConsumers.start();
TaskConfig taskConfig = new TaskConfig(this.config);
SamzaContainerMetrics sideInputContainerMetrics = new SamzaContainerMetrics(SIDEINPUTS_METRICS_PREFIX + this.samzaContainerMetrics.source(), this.samzaContainerMetrics.registry(), SIDEINPUTS_METRICS_PREFIX);
this.sideInputRunLoop = new RunLoop(sideInputTasks, // all operations are executed in the main runloop thread
null, this.sideInputSystemConsumers, // single message in flight per task
1, // no windowing
-1, taskConfig.getCommitMs(), taskConfig.getCallbackTimeoutMs(), // TODO consolidate these container configs SAMZA-2275
this.config.getLong("container.disk.quota.delay.max.ms", TimeUnit.SECONDS.toMillis(1)), taskConfig.getMaxIdleMs(), sideInputContainerMetrics, System::nanoTime, // commit must be synchronous to ensure integrity of state flush
false);
try {
sideInputsExecutor.submit(() -> {
try {
sideInputRunLoop.run();
} catch (Exception e) {
LOG.error("Exception in reading sideInputs", e);
sideInputException = e;
}
});
// Make the main thread wait until all sideInputs have been caughtup or an exception was thrown
while (!shouldShutdown && sideInputException == null && !awaitSideInputTasks()) {
LOG.debug("Waiting for SideInput bootstrap to complete");
}
if (sideInputException != null) {
// Throw exception if there was an exception in catching-up sideInputs
throw new SamzaException("Exception in restoring sideInputs", sideInputException);
}
} catch (InterruptedException e) {
LOG.warn("Received an interrupt during side inputs store restoration." + " Exiting prematurely without completing store restore.");
/*
* We want to stop side input restoration and rethrow the exception upstream. Container should handle the
* interrupt exception and shutdown the components and cleaning up the resource. We don't want to clean up the
* resources prematurely here.
*/
// todo: should we cancel the flush future right away or wait for container to handle it as part of shutdown sequence?
shouldShutdown = true;
throw new SamzaException("Side inputs read was interrupted", e);
}
LOG.info("SideInput Restore complete");
}
use of org.apache.samza.container.RunLoop in project samza by apache.
the class TestStreamProcessor method testContainerFailureCorrectlyStopsProcessor.
/**
* Tests that a failure in container correctly stops a running JobCoordinator and propagates the exception
* through the StreamProcessor
*
* Assertions:
* - JobCoordinator has been stopped from the JobCoordinatorListener callback
* - ProcessorLifecycleListener#afterStop(Throwable) has been invoked w/ non-null Throwable
*/
@Test
public void testContainerFailureCorrectlyStopsProcessor() throws InterruptedException {
JobCoordinator mockJobCoordinator = Mockito.mock(JobCoordinator.class);
Throwable expectedThrowable = new SamzaException("Failure in Container!");
AtomicReference<Throwable> actualThrowable = new AtomicReference<>();
final CountDownLatch runLoopStartedLatch = new CountDownLatch(1);
RunLoop failingRunLoop = mock(RunLoop.class);
doAnswer(invocation -> {
try {
runLoopStartedLatch.countDown();
throw expectedThrowable;
} catch (InterruptedException ie) {
ie.printStackTrace();
}
return null;
}).when(failingRunLoop).run();
SamzaContainer mockContainer = StreamProcessorTestUtils.getDummyContainer(failingRunLoop, mock(StreamTask.class));
final CountDownLatch processorListenerFailed = new CountDownLatch(1);
TestableStreamProcessor processor = new TestableStreamProcessor(new MapConfig(), new HashMap<>(), mock(StreamTaskFactory.class), new ProcessorLifecycleListener() {
@Override
public void beforeStart() {
processorListenerState.put(ListenerCallback.BEFORE_START, true);
}
@Override
public void afterStart() {
processorListenerState.put(ListenerCallback.AFTER_START, true);
}
@Override
public void afterStop() {
processorListenerState.put(ListenerCallback.AFTER_STOP, true);
}
@Override
public void afterFailure(Throwable t) {
processorListenerState.put(ListenerCallback.AFTER_FAILURE, true);
actualThrowable.getAndSet(t);
processorListenerFailed.countDown();
}
}, mockJobCoordinator, mockContainer);
final CountDownLatch coordinatorStop = new CountDownLatch(1);
doAnswer(invocation -> {
coordinatorStop.countDown();
return null;
}).when(mockJobCoordinator).stop();
doAnswer(invocation -> {
new Thread(() -> {
try {
processor.jobCoordinatorListener.onJobModelExpired();
processor.jobCoordinatorListener.onNewJobModel("1", getMockJobModel());
coordinatorStop.await();
processor.jobCoordinatorListener.onCoordinatorStop();
} catch (InterruptedException e) {
e.printStackTrace();
}
}).start();
return null;
}).when(mockJobCoordinator).start();
processor.start();
// This block is required for the mockRunloop is actually started.
// Otherwise, processor.stop gets triggered before mockRunloop begins to block
runLoopStartedLatch.await();
assertTrue("Container failed and processor listener failed was not invoked within timeout!", processorListenerFailed.await(30, TimeUnit.SECONDS));
assertEquals(expectedThrowable, actualThrowable.get());
assertTrue(processorListenerState.get(ListenerCallback.BEFORE_START));
assertTrue(processorListenerState.get(ListenerCallback.AFTER_START));
Assert.assertFalse(processorListenerState.get(ListenerCallback.AFTER_STOP));
assertTrue(processorListenerState.get(ListenerCallback.AFTER_FAILURE));
}
Aggregations