use of org.apache.samza.container.TaskName in project samza by apache.
the class TestStartpointManager method testStaleStartpoints.
@Test
public void testStaleStartpoints() {
SystemStreamPartition ssp = new SystemStreamPartition("mockSystem", "mockStream", new Partition(2));
TaskName taskName = new TaskName("MockTask");
long staleTimestamp = Instant.now().toEpochMilli() - StartpointManager.DEFAULT_EXPIRATION_DURATION.toMillis() - 2;
StartpointTimestamp startpoint = new StartpointTimestamp(staleTimestamp, staleTimestamp);
startpointManager.writeStartpoint(ssp, startpoint);
Assert.assertFalse(startpointManager.readStartpoint(ssp).isPresent());
startpointManager.writeStartpoint(ssp, taskName, startpoint);
Assert.assertFalse(startpointManager.readStartpoint(ssp, taskName).isPresent());
}
use of org.apache.samza.container.TaskName in project samza by apache.
the class AllSspToSingleTaskGrouperFactory method group.
@Override
public Map<TaskName, Set<SystemStreamPartition>> group(final Set<SystemStreamPartition> ssps) {
Map<TaskName, Set<SystemStreamPartition>> groupedMap = new HashMap<>();
if (ssps == null) {
throw new SamzaException("ssp set cannot be null!");
}
if (ssps.size() == 0) {
throw new SamzaException("Cannot process stream task with no input system stream partitions");
}
processorList.forEach(processor -> {
// Create a task name for each processor and assign all partitions to each task name.
final TaskName taskName = new TaskName(String.format("Task-%s", processor));
groupedMap.put(taskName, ssps);
});
return groupedMap;
}
use of org.apache.samza.container.TaskName in project samza by apache.
the class GroupBySystemStreamPartition method group.
@Override
public Map<TaskName, Set<SystemStreamPartition>> group(Set<SystemStreamPartition> ssps) {
Map<TaskName, Set<SystemStreamPartition>> groupedMap = new HashMap<TaskName, Set<SystemStreamPartition>>();
for (SystemStreamPartition ssp : ssps) {
if (broadcastStreams.contains(ssp)) {
continue;
}
// each task portion corresponding to keyBucket of the SSP.
for (int i = 0; i < elasticityFactor; i++) {
int keyBucket = elasticityFactor == 1 ? -1 : i;
SystemStreamPartition sspWithKeyBucket = new SystemStreamPartition(ssp, keyBucket);
HashSet<SystemStreamPartition> sspSet = new HashSet<SystemStreamPartition>();
sspSet.add(sspWithKeyBucket);
groupedMap.put(new TaskName(sspWithKeyBucket.toString()), sspSet);
}
}
// assign the broadcast streams to all the taskNames
if (!broadcastStreams.isEmpty()) {
for (Set<SystemStreamPartition> value : groupedMap.values()) {
value.addAll(broadcastStreams);
}
}
return groupedMap;
}
use of org.apache.samza.container.TaskName in project samza by apache.
the class ContainerStorageManager method startSideInputs.
// Read sideInputs until all sideInputStreams are caughtup, so start() can return
private void startSideInputs() {
LOG.info("SideInput Restore started");
// initialize the sideInputStorageManagers
getSideInputHandlers().forEach(TaskSideInputHandler::init);
Map<TaskName, TaskSideInputHandler> taskSideInputHandlers = this.sspSideInputHandlers.values().stream().distinct().collect(Collectors.toMap(TaskSideInputHandler::getTaskName, Function.identity()));
Map<TaskName, TaskInstanceMetrics> sideInputTaskMetrics = new HashMap<>();
Map<TaskName, RunLoopTask> sideInputTasks = new HashMap<>();
this.taskSideInputStoreSSPs.forEach((taskName, storesToSSPs) -> {
Set<SystemStreamPartition> taskSSPs = this.taskSideInputStoreSSPs.get(taskName).values().stream().flatMap(Set::stream).collect(Collectors.toSet());
if (!taskSSPs.isEmpty()) {
String sideInputSource = SIDEINPUTS_METRICS_PREFIX + this.taskInstanceMetrics.get(taskName).source();
TaskInstanceMetrics sideInputMetrics = new TaskInstanceMetrics(sideInputSource, this.taskInstanceMetrics.get(taskName).registry(), SIDEINPUTS_METRICS_PREFIX);
sideInputTaskMetrics.put(taskName, sideInputMetrics);
RunLoopTask sideInputTask = new SideInputTask(taskName, taskSSPs, taskSideInputHandlers.get(taskName), sideInputTaskMetrics.get(taskName));
sideInputTasks.put(taskName, sideInputTask);
}
});
// register all sideInput SSPs with the consumers
for (SystemStreamPartition ssp : this.sspSideInputHandlers.keySet()) {
String startingOffset = this.sspSideInputHandlers.get(ssp).getStartingOffset(ssp);
if (startingOffset == null) {
throw new SamzaException("No starting offset could be obtained for SideInput SystemStreamPartition : " + ssp + ". Consumer cannot start.");
}
// register startingOffset with the sysConsumer and register a metric for it
sideInputSystemConsumers.register(ssp, startingOffset);
taskInstanceMetrics.get(this.sspSideInputHandlers.get(ssp).getTaskName()).addOffsetGauge(ssp, ScalaJavaUtil.toScalaFunction(() -> this.sspSideInputHandlers.get(ssp).getLastProcessedOffset(ssp)));
sideInputTaskMetrics.get(this.sspSideInputHandlers.get(ssp).getTaskName()).addOffsetGauge(ssp, ScalaJavaUtil.toScalaFunction(() -> this.sspSideInputHandlers.get(ssp).getLastProcessedOffset(ssp)));
}
// start the systemConsumers for consuming input
this.sideInputSystemConsumers.start();
TaskConfig taskConfig = new TaskConfig(this.config);
SamzaContainerMetrics sideInputContainerMetrics = new SamzaContainerMetrics(SIDEINPUTS_METRICS_PREFIX + this.samzaContainerMetrics.source(), this.samzaContainerMetrics.registry(), SIDEINPUTS_METRICS_PREFIX);
this.sideInputRunLoop = new RunLoop(sideInputTasks, // all operations are executed in the main runloop thread
null, this.sideInputSystemConsumers, // single message in flight per task
1, // no windowing
-1, taskConfig.getCommitMs(), taskConfig.getCallbackTimeoutMs(), // TODO consolidate these container configs SAMZA-2275
this.config.getLong("container.disk.quota.delay.max.ms", TimeUnit.SECONDS.toMillis(1)), taskConfig.getMaxIdleMs(), sideInputContainerMetrics, System::nanoTime, // commit must be synchronous to ensure integrity of state flush
false);
try {
sideInputsExecutor.submit(() -> {
try {
sideInputRunLoop.run();
} catch (Exception e) {
LOG.error("Exception in reading sideInputs", e);
sideInputException = e;
}
});
// Make the main thread wait until all sideInputs have been caughtup or an exception was thrown
while (!shouldShutdown && sideInputException == null && !awaitSideInputTasks()) {
LOG.debug("Waiting for SideInput bootstrap to complete");
}
if (sideInputException != null) {
// Throw exception if there was an exception in catching-up sideInputs
throw new SamzaException("Exception in restoring sideInputs", sideInputException);
}
} catch (InterruptedException e) {
LOG.warn("Received an interrupt during side inputs store restoration." + " Exiting prematurely without completing store restore.");
/*
* We want to stop side input restoration and rethrow the exception upstream. Container should handle the
* interrupt exception and shutdown the components and cleaning up the resource. We don't want to clean up the
* resources prematurely here.
*/
// todo: should we cancel the flush future right away or wait for container to handle it as part of shutdown sequence?
shouldShutdown = true;
throw new SamzaException("Side inputs read was interrupted", e);
}
LOG.info("SideInput Restore complete");
}
use of org.apache.samza.container.TaskName in project samza by apache.
the class ZkJobCoordinator method onNewJobModel.
/**
* Checks if the new job model contains a different work assignment for the processor compared to the last active
* job model. In case of different work assignment, update the task locality of the tasks associated with the
* processor and notify new job model to the registered {@link JobCoordinatorListener}.
*
* @param newJobModel new job model agreed by the quorum
*/
@VisibleForTesting
void onNewJobModel(JobModel newJobModel) {
Preconditions.checkNotNull(newJobModel, "JobModel cannot be null. Failing onNewJobModel");
// start the container with the new model
if (jobModelExpired.compareAndSet(true, false)) {
LOG.info("Work assignment changed for the processor {}. Updating task locality and notifying coordinator listener", processorId);
if (newJobModel.getContainers().containsKey(processorId)) {
for (TaskName taskName : JobModelUtil.getTaskNamesForProcessor(processorId, newJobModel)) {
zkUtils.writeTaskLocality(taskName, locationId);
}
if (coordinatorListener != null) {
coordinatorListener.onNewJobModel(processorId, newJobModel);
}
}
} else {
/*
* We don't expire the job model if the proposed work assignment is same as the current work assignment.
* The implication of work assignment remaining the same can be categorized into
* 1. Processor part of the job model
* 2. Processor not part of the job model.
* For both the state of the processor remains what it was when the rebalance started. e.g.,
* [1] should continue to process its work assignment without any interruption as part of the rebalance. i.e.,
* there will be no expiration of the existing work (a.k.a samza container won't be stopped) and also no
* notification to StreamProcessor about the rebalance since work assignment didn't change.
* [2] should have no work and be idle processor and will continue to be idle.
*/
LOG.info("Skipping onNewJobModel since there are no changes in work assignment.");
}
/*
* Update the last active job model to new job model regardless of whether the work assignment for the processor
* has changed or not. It is important to do it so that all the processors has a consistent view what the latest
* active job model is.
*/
activeJobModel = newJobModel;
}
Aggregations