Search in sources :

Example 66 with TaskName

use of org.apache.samza.container.TaskName in project samza by apache.

the class TestStartpointManager method testStaleStartpoints.

@Test
public void testStaleStartpoints() {
    SystemStreamPartition ssp = new SystemStreamPartition("mockSystem", "mockStream", new Partition(2));
    TaskName taskName = new TaskName("MockTask");
    long staleTimestamp = Instant.now().toEpochMilli() - StartpointManager.DEFAULT_EXPIRATION_DURATION.toMillis() - 2;
    StartpointTimestamp startpoint = new StartpointTimestamp(staleTimestamp, staleTimestamp);
    startpointManager.writeStartpoint(ssp, startpoint);
    Assert.assertFalse(startpointManager.readStartpoint(ssp).isPresent());
    startpointManager.writeStartpoint(ssp, taskName, startpoint);
    Assert.assertFalse(startpointManager.readStartpoint(ssp, taskName).isPresent());
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) TaskName(org.apache.samza.container.TaskName) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 67 with TaskName

use of org.apache.samza.container.TaskName in project samza by apache.

the class AllSspToSingleTaskGrouperFactory method group.

@Override
public Map<TaskName, Set<SystemStreamPartition>> group(final Set<SystemStreamPartition> ssps) {
    Map<TaskName, Set<SystemStreamPartition>> groupedMap = new HashMap<>();
    if (ssps == null) {
        throw new SamzaException("ssp set cannot be null!");
    }
    if (ssps.size() == 0) {
        throw new SamzaException("Cannot process stream task with no input system stream partitions");
    }
    processorList.forEach(processor -> {
        // Create a task name for each processor and assign all partitions to each task name.
        final TaskName taskName = new TaskName(String.format("Task-%s", processor));
        groupedMap.put(taskName, ssps);
    });
    return groupedMap;
}
Also used : Set(java.util.Set) TaskName(org.apache.samza.container.TaskName) HashMap(java.util.HashMap) SamzaException(org.apache.samza.SamzaException)

Example 68 with TaskName

use of org.apache.samza.container.TaskName in project samza by apache.

the class GroupBySystemStreamPartition method group.

@Override
public Map<TaskName, Set<SystemStreamPartition>> group(Set<SystemStreamPartition> ssps) {
    Map<TaskName, Set<SystemStreamPartition>> groupedMap = new HashMap<TaskName, Set<SystemStreamPartition>>();
    for (SystemStreamPartition ssp : ssps) {
        if (broadcastStreams.contains(ssp)) {
            continue;
        }
        // each task portion corresponding to keyBucket of the SSP.
        for (int i = 0; i < elasticityFactor; i++) {
            int keyBucket = elasticityFactor == 1 ? -1 : i;
            SystemStreamPartition sspWithKeyBucket = new SystemStreamPartition(ssp, keyBucket);
            HashSet<SystemStreamPartition> sspSet = new HashSet<SystemStreamPartition>();
            sspSet.add(sspWithKeyBucket);
            groupedMap.put(new TaskName(sspWithKeyBucket.toString()), sspSet);
        }
    }
    // assign the broadcast streams to all the taskNames
    if (!broadcastStreams.isEmpty()) {
        for (Set<SystemStreamPartition> value : groupedMap.values()) {
            value.addAll(broadcastStreams);
        }
    }
    return groupedMap;
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) TaskName(org.apache.samza.container.TaskName) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashSet(java.util.HashSet)

Example 69 with TaskName

use of org.apache.samza.container.TaskName in project samza by apache.

the class ContainerStorageManager method startSideInputs.

// Read sideInputs until all sideInputStreams are caughtup, so start() can return
private void startSideInputs() {
    LOG.info("SideInput Restore started");
    // initialize the sideInputStorageManagers
    getSideInputHandlers().forEach(TaskSideInputHandler::init);
    Map<TaskName, TaskSideInputHandler> taskSideInputHandlers = this.sspSideInputHandlers.values().stream().distinct().collect(Collectors.toMap(TaskSideInputHandler::getTaskName, Function.identity()));
    Map<TaskName, TaskInstanceMetrics> sideInputTaskMetrics = new HashMap<>();
    Map<TaskName, RunLoopTask> sideInputTasks = new HashMap<>();
    this.taskSideInputStoreSSPs.forEach((taskName, storesToSSPs) -> {
        Set<SystemStreamPartition> taskSSPs = this.taskSideInputStoreSSPs.get(taskName).values().stream().flatMap(Set::stream).collect(Collectors.toSet());
        if (!taskSSPs.isEmpty()) {
            String sideInputSource = SIDEINPUTS_METRICS_PREFIX + this.taskInstanceMetrics.get(taskName).source();
            TaskInstanceMetrics sideInputMetrics = new TaskInstanceMetrics(sideInputSource, this.taskInstanceMetrics.get(taskName).registry(), SIDEINPUTS_METRICS_PREFIX);
            sideInputTaskMetrics.put(taskName, sideInputMetrics);
            RunLoopTask sideInputTask = new SideInputTask(taskName, taskSSPs, taskSideInputHandlers.get(taskName), sideInputTaskMetrics.get(taskName));
            sideInputTasks.put(taskName, sideInputTask);
        }
    });
    // register all sideInput SSPs with the consumers
    for (SystemStreamPartition ssp : this.sspSideInputHandlers.keySet()) {
        String startingOffset = this.sspSideInputHandlers.get(ssp).getStartingOffset(ssp);
        if (startingOffset == null) {
            throw new SamzaException("No starting offset could be obtained for SideInput SystemStreamPartition : " + ssp + ". Consumer cannot start.");
        }
        // register startingOffset with the sysConsumer and register a metric for it
        sideInputSystemConsumers.register(ssp, startingOffset);
        taskInstanceMetrics.get(this.sspSideInputHandlers.get(ssp).getTaskName()).addOffsetGauge(ssp, ScalaJavaUtil.toScalaFunction(() -> this.sspSideInputHandlers.get(ssp).getLastProcessedOffset(ssp)));
        sideInputTaskMetrics.get(this.sspSideInputHandlers.get(ssp).getTaskName()).addOffsetGauge(ssp, ScalaJavaUtil.toScalaFunction(() -> this.sspSideInputHandlers.get(ssp).getLastProcessedOffset(ssp)));
    }
    // start the systemConsumers for consuming input
    this.sideInputSystemConsumers.start();
    TaskConfig taskConfig = new TaskConfig(this.config);
    SamzaContainerMetrics sideInputContainerMetrics = new SamzaContainerMetrics(SIDEINPUTS_METRICS_PREFIX + this.samzaContainerMetrics.source(), this.samzaContainerMetrics.registry(), SIDEINPUTS_METRICS_PREFIX);
    this.sideInputRunLoop = new RunLoop(sideInputTasks, // all operations are executed in the main runloop thread
    null, this.sideInputSystemConsumers, // single message in flight per task
    1, // no windowing
    -1, taskConfig.getCommitMs(), taskConfig.getCallbackTimeoutMs(), // TODO consolidate these container configs SAMZA-2275
    this.config.getLong("container.disk.quota.delay.max.ms", TimeUnit.SECONDS.toMillis(1)), taskConfig.getMaxIdleMs(), sideInputContainerMetrics, System::nanoTime, // commit must be synchronous to ensure integrity of state flush
    false);
    try {
        sideInputsExecutor.submit(() -> {
            try {
                sideInputRunLoop.run();
            } catch (Exception e) {
                LOG.error("Exception in reading sideInputs", e);
                sideInputException = e;
            }
        });
        // Make the main thread wait until all sideInputs have been caughtup or an exception was thrown
        while (!shouldShutdown && sideInputException == null && !awaitSideInputTasks()) {
            LOG.debug("Waiting for SideInput bootstrap to complete");
        }
        if (sideInputException != null) {
            // Throw exception if there was an exception in catching-up sideInputs
            throw new SamzaException("Exception in restoring sideInputs", sideInputException);
        }
    } catch (InterruptedException e) {
        LOG.warn("Received an interrupt during side inputs store restoration." + " Exiting prematurely without completing store restore.");
        /*
       * We want to stop side input restoration and rethrow the exception upstream. Container should handle the
       * interrupt exception and shutdown the components and cleaning up the resource. We don't want to clean up the
       * resources prematurely here.
       */
        // todo: should we cancel the flush future right away or wait for container to handle it as part of shutdown sequence?
        shouldShutdown = true;
        throw new SamzaException("Side inputs read was interrupted", e);
    }
    LOG.info("SideInput Restore complete");
}
Also used : HashMap(java.util.HashMap) TaskInstanceMetrics(org.apache.samza.container.TaskInstanceMetrics) TaskConfig(org.apache.samza.config.TaskConfig) SamzaException(org.apache.samza.SamzaException) SamzaException(org.apache.samza.SamzaException) TaskName(org.apache.samza.container.TaskName) RunLoopTask(org.apache.samza.container.RunLoopTask) RunLoop(org.apache.samza.container.RunLoop) SamzaContainerMetrics(org.apache.samza.container.SamzaContainerMetrics) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 70 with TaskName

use of org.apache.samza.container.TaskName in project samza by apache.

the class ZkJobCoordinator method onNewJobModel.

/**
 * Checks if the new job model contains a different work assignment for the processor compared to the last active
 * job model. In case of different work assignment, update the task locality of the tasks associated with the
 * processor and notify new job model to the registered {@link JobCoordinatorListener}.
 *
 * @param newJobModel new job model agreed by the quorum
 */
@VisibleForTesting
void onNewJobModel(JobModel newJobModel) {
    Preconditions.checkNotNull(newJobModel, "JobModel cannot be null. Failing onNewJobModel");
    // start the container with the new model
    if (jobModelExpired.compareAndSet(true, false)) {
        LOG.info("Work assignment changed for the processor {}. Updating task locality and notifying coordinator listener", processorId);
        if (newJobModel.getContainers().containsKey(processorId)) {
            for (TaskName taskName : JobModelUtil.getTaskNamesForProcessor(processorId, newJobModel)) {
                zkUtils.writeTaskLocality(taskName, locationId);
            }
            if (coordinatorListener != null) {
                coordinatorListener.onNewJobModel(processorId, newJobModel);
            }
        }
    } else {
        /*
       * We don't expire the job model if the proposed work assignment is same as the current work assignment.
       * The implication of work assignment remaining the same can be categorized into
       *   1. Processor part of the job model
       *   2. Processor not part of the job model.
       * For both the state of the processor remains what it was when the rebalance started. e.g.,
       *   [1] should continue to process its work assignment without any interruption as part of the rebalance. i.e.,
       *       there will be no expiration of the existing work (a.k.a samza container won't be stopped) and also no
       *       notification to StreamProcessor about the rebalance since work assignment didn't change.
       *   [2] should have no work and be idle processor and will continue to be idle.
       */
        LOG.info("Skipping onNewJobModel since there are no changes in work assignment.");
    }
    /*
     * Update the last active job model to new job model regardless of whether the work assignment for the processor
     * has changed or not. It is important to do it so that all the processors has a consistent view what the latest
     * active job model is.
     */
    activeJobModel = newJobModel;
}
Also used : TaskName(org.apache.samza.container.TaskName) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

TaskName (org.apache.samza.container.TaskName)212 HashMap (java.util.HashMap)136 Test (org.junit.Test)133 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)117 Partition (org.apache.samza.Partition)106 MapConfig (org.apache.samza.config.MapConfig)92 TaskModel (org.apache.samza.job.model.TaskModel)90 Map (java.util.Map)75 Set (java.util.Set)73 Config (org.apache.samza.config.Config)71 ContainerModel (org.apache.samza.job.model.ContainerModel)63 ImmutableMap (com.google.common.collect.ImmutableMap)53 File (java.io.File)53 SystemStream (org.apache.samza.system.SystemStream)52 ImmutableSet (com.google.common.collect.ImmutableSet)50 TaskMode (org.apache.samza.job.model.TaskMode)46 TaskConfig (org.apache.samza.config.TaskConfig)43 ImmutableList (com.google.common.collect.ImmutableList)42 Collections (java.util.Collections)41 CheckpointId (org.apache.samza.checkpoint.CheckpointId)41