Search in sources :

Example 1 with RunLoopTask

use of org.apache.samza.container.RunLoopTask in project samza by apache.

the class ContainerStorageManager method startSideInputs.

// Read sideInputs until all sideInputStreams are caughtup, so start() can return
private void startSideInputs() {
    LOG.info("SideInput Restore started");
    // initialize the sideInputStorageManagers
    getSideInputHandlers().forEach(TaskSideInputHandler::init);
    Map<TaskName, TaskSideInputHandler> taskSideInputHandlers = this.sspSideInputHandlers.values().stream().distinct().collect(Collectors.toMap(TaskSideInputHandler::getTaskName, Function.identity()));
    Map<TaskName, TaskInstanceMetrics> sideInputTaskMetrics = new HashMap<>();
    Map<TaskName, RunLoopTask> sideInputTasks = new HashMap<>();
    this.taskSideInputStoreSSPs.forEach((taskName, storesToSSPs) -> {
        Set<SystemStreamPartition> taskSSPs = this.taskSideInputStoreSSPs.get(taskName).values().stream().flatMap(Set::stream).collect(Collectors.toSet());
        if (!taskSSPs.isEmpty()) {
            String sideInputSource = SIDEINPUTS_METRICS_PREFIX + this.taskInstanceMetrics.get(taskName).source();
            TaskInstanceMetrics sideInputMetrics = new TaskInstanceMetrics(sideInputSource, this.taskInstanceMetrics.get(taskName).registry(), SIDEINPUTS_METRICS_PREFIX);
            sideInputTaskMetrics.put(taskName, sideInputMetrics);
            RunLoopTask sideInputTask = new SideInputTask(taskName, taskSSPs, taskSideInputHandlers.get(taskName), sideInputTaskMetrics.get(taskName));
            sideInputTasks.put(taskName, sideInputTask);
        }
    });
    // register all sideInput SSPs with the consumers
    for (SystemStreamPartition ssp : this.sspSideInputHandlers.keySet()) {
        String startingOffset = this.sspSideInputHandlers.get(ssp).getStartingOffset(ssp);
        if (startingOffset == null) {
            throw new SamzaException("No starting offset could be obtained for SideInput SystemStreamPartition : " + ssp + ". Consumer cannot start.");
        }
        // register startingOffset with the sysConsumer and register a metric for it
        sideInputSystemConsumers.register(ssp, startingOffset);
        taskInstanceMetrics.get(this.sspSideInputHandlers.get(ssp).getTaskName()).addOffsetGauge(ssp, ScalaJavaUtil.toScalaFunction(() -> this.sspSideInputHandlers.get(ssp).getLastProcessedOffset(ssp)));
        sideInputTaskMetrics.get(this.sspSideInputHandlers.get(ssp).getTaskName()).addOffsetGauge(ssp, ScalaJavaUtil.toScalaFunction(() -> this.sspSideInputHandlers.get(ssp).getLastProcessedOffset(ssp)));
    }
    // start the systemConsumers for consuming input
    this.sideInputSystemConsumers.start();
    TaskConfig taskConfig = new TaskConfig(this.config);
    SamzaContainerMetrics sideInputContainerMetrics = new SamzaContainerMetrics(SIDEINPUTS_METRICS_PREFIX + this.samzaContainerMetrics.source(), this.samzaContainerMetrics.registry(), SIDEINPUTS_METRICS_PREFIX);
    this.sideInputRunLoop = new RunLoop(sideInputTasks, // all operations are executed in the main runloop thread
    null, this.sideInputSystemConsumers, // single message in flight per task
    1, // no windowing
    -1, taskConfig.getCommitMs(), taskConfig.getCallbackTimeoutMs(), // TODO consolidate these container configs SAMZA-2275
    this.config.getLong("container.disk.quota.delay.max.ms", TimeUnit.SECONDS.toMillis(1)), taskConfig.getMaxIdleMs(), sideInputContainerMetrics, System::nanoTime, // commit must be synchronous to ensure integrity of state flush
    false);
    try {
        sideInputsExecutor.submit(() -> {
            try {
                sideInputRunLoop.run();
            } catch (Exception e) {
                LOG.error("Exception in reading sideInputs", e);
                sideInputException = e;
            }
        });
        // Make the main thread wait until all sideInputs have been caughtup or an exception was thrown
        while (!shouldShutdown && sideInputException == null && !awaitSideInputTasks()) {
            LOG.debug("Waiting for SideInput bootstrap to complete");
        }
        if (sideInputException != null) {
            // Throw exception if there was an exception in catching-up sideInputs
            throw new SamzaException("Exception in restoring sideInputs", sideInputException);
        }
    } catch (InterruptedException e) {
        LOG.warn("Received an interrupt during side inputs store restoration." + " Exiting prematurely without completing store restore.");
        /*
       * We want to stop side input restoration and rethrow the exception upstream. Container should handle the
       * interrupt exception and shutdown the components and cleaning up the resource. We don't want to clean up the
       * resources prematurely here.
       */
        // todo: should we cancel the flush future right away or wait for container to handle it as part of shutdown sequence?
        shouldShutdown = true;
        throw new SamzaException("Side inputs read was interrupted", e);
    }
    LOG.info("SideInput Restore complete");
}
Also used : HashMap(java.util.HashMap) TaskInstanceMetrics(org.apache.samza.container.TaskInstanceMetrics) TaskConfig(org.apache.samza.config.TaskConfig) SamzaException(org.apache.samza.SamzaException) SamzaException(org.apache.samza.SamzaException) TaskName(org.apache.samza.container.TaskName) RunLoopTask(org.apache.samza.container.RunLoopTask) RunLoop(org.apache.samza.container.RunLoop) SamzaContainerMetrics(org.apache.samza.container.SamzaContainerMetrics) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Aggregations

HashMap (java.util.HashMap)1 SamzaException (org.apache.samza.SamzaException)1 TaskConfig (org.apache.samza.config.TaskConfig)1 RunLoop (org.apache.samza.container.RunLoop)1 RunLoopTask (org.apache.samza.container.RunLoopTask)1 SamzaContainerMetrics (org.apache.samza.container.SamzaContainerMetrics)1 TaskInstanceMetrics (org.apache.samza.container.TaskInstanceMetrics)1 TaskName (org.apache.samza.container.TaskName)1 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)1