use of org.apache.samza.container.grouper.task.TaskPartitionAssignmentManager in project samza by apache.
the class StaticResourceJobCoordinatorFactory method buildJobModelHelper.
private static JobModelHelper buildJobModelHelper(MetadataStore metadataStore, StreamMetadataCache streamMetadataCache) {
LocalityManager localityManager = new LocalityManager(new NamespaceAwareCoordinatorStreamStore(metadataStore, SetContainerHostMapping.TYPE));
TaskAssignmentManager taskAssignmentManager = new TaskAssignmentManager(new NamespaceAwareCoordinatorStreamStore(metadataStore, SetTaskContainerMapping.TYPE), new NamespaceAwareCoordinatorStreamStore(metadataStore, SetTaskModeMapping.TYPE));
TaskPartitionAssignmentManager taskPartitionAssignmentManager = new TaskPartitionAssignmentManager(new NamespaceAwareCoordinatorStreamStore(metadataStore, SetTaskPartitionMapping.TYPE));
return new JobModelHelper(localityManager, taskAssignmentManager, taskPartitionAssignmentManager, streamMetadataCache, JobModelCalculator.INSTANCE);
}
use of org.apache.samza.container.grouper.task.TaskPartitionAssignmentManager in project samza by apache.
the class JobModelHelper method updateTaskAssignments.
/**
* This method does the following:
* 1. Deletes the existing task assignments if the partition-task grouping has changed from the previous run of the job.
* 2. Saves the newly generated task assignments to the storage layer through the {@param TaskAssignementManager}.
*
* @param jobModel represents the {@see JobModel} of the samza job.
* @param taskAssignmentManager required to persist the processor to task assignments to the metadata store.
* @param taskPartitionAssignmentManager required to persist the task to partition assignments to the metadata store.
* @param grouperMetadata provides the historical metadata of the samza application.
*/
private void updateTaskAssignments(JobModel jobModel, TaskAssignmentManager taskAssignmentManager, TaskPartitionAssignmentManager taskPartitionAssignmentManager, GrouperMetadata grouperMetadata) {
LOG.info("Storing the task assignments into metadata store.");
Set<String> activeTaskNames = new HashSet<>();
Set<String> standbyTaskNames = new HashSet<>();
Set<SystemStreamPartition> systemStreamPartitions = new HashSet<>();
for (ContainerModel containerModel : jobModel.getContainers().values()) {
for (TaskModel taskModel : containerModel.getTasks().values()) {
if (TaskMode.Active.equals(taskModel.getTaskMode())) {
activeTaskNames.add(taskModel.getTaskName().getTaskName());
}
if (TaskMode.Standby.equals(taskModel.getTaskMode())) {
standbyTaskNames.add(taskModel.getTaskName().getTaskName());
}
systemStreamPartitions.addAll(taskModel.getSystemStreamPartitions());
}
}
Map<TaskName, String> previousTaskToContainerId = grouperMetadata.getPreviousTaskToProcessorAssignment();
if (activeTaskNames.size() != previousTaskToContainerId.size()) {
LOG.warn(String.format("Current task count %s does not match saved task count %s. Stateful jobs may observe misalignment of keys!", activeTaskNames.size(), previousTaskToContainerId.size()));
// If the tasks changed, then the partition-task grouping is also likely changed and we can't handle that
// without a much more complicated mapping. Further, the partition count may have changed, which means
// input message keys are likely reshuffled w.r.t. partitions, so the local state may not contain necessary
// data associated with the incoming keys. Warn the user and default to grouper
// In this scenario the tasks may have been reduced, so we need to delete all the existing messages
taskAssignmentManager.deleteTaskContainerMappings(previousTaskToContainerId.keySet().stream().map(TaskName::getTaskName).collect(Collectors.toList()));
taskPartitionAssignmentManager.delete(systemStreamPartitions);
}
// if the set of standby tasks has changed, e.g., when the replication-factor changed, or the active-tasks-set has
// changed, we log a warning and delete the existing mapping for these tasks
Set<String> previousStandbyTasks = taskAssignmentManager.readTaskModes().entrySet().stream().filter(taskNameToTaskModeEntry -> TaskMode.Standby.equals(taskNameToTaskModeEntry.getValue())).map(taskNameToTaskModeEntry -> taskNameToTaskModeEntry.getKey().getTaskName()).collect(Collectors.toSet());
if (!standbyTaskNames.equals(previousStandbyTasks)) {
LOG.info(String.format("The set of standby tasks has changed, current standby tasks %s, previous standby tasks %s", standbyTaskNames, previousStandbyTasks));
taskAssignmentManager.deleteTaskContainerMappings(previousStandbyTasks);
}
// Task to partition assignments is stored as {@see SystemStreamPartition} to list of {@see TaskName} in
// coordinator stream. This is done due to the 1 MB value size limit in a kafka topic.
Map<SystemStreamPartition, List<String>> sspToTaskNameMap = new HashMap<>();
Map<String, Map<String, TaskMode>> taskContainerMappings = new HashMap<>();
for (ContainerModel containerModel : jobModel.getContainers().values()) {
containerModel.getTasks().forEach((taskName, taskModel) -> {
taskContainerMappings.putIfAbsent(containerModel.getId(), new HashMap<>());
taskContainerMappings.get(containerModel.getId()).put(taskName.getTaskName(), taskModel.getTaskMode());
taskModel.getSystemStreamPartitions().forEach(systemStreamPartition -> {
sspToTaskNameMap.putIfAbsent(systemStreamPartition, new ArrayList<>());
sspToTaskNameMap.get(systemStreamPartition).add(taskName.getTaskName());
});
});
}
taskAssignmentManager.writeTaskContainerMappings(taskContainerMappings);
taskPartitionAssignmentManager.writeTaskPartitionAssignments(sspToTaskNameMap);
}
use of org.apache.samza.container.grouper.task.TaskPartitionAssignmentManager in project samza by apache.
the class JobModelHelper method getGrouperMetadata.
private GrouperMetadata getGrouperMetadata(Config config, LocalityManager localityManager, TaskAssignmentManager taskAssignmentManager, TaskPartitionAssignmentManager taskPartitionAssignmentManager) {
Map<String, LocationId> processorLocality = getProcessorLocality(config, localityManager);
Map<TaskName, TaskMode> taskModes = taskAssignmentManager.readTaskModes();
Map<TaskName, String> taskNameToProcessorId = new HashMap<>();
Map<TaskName, LocationId> taskLocality = new HashMap<>();
// We read the taskAssignment only for ActiveTasks, i.e., tasks that have no task-mode or have an active task mode
taskAssignmentManager.readTaskAssignment().forEach((taskNameString, containerId) -> {
TaskName taskName = new TaskName(taskNameString);
if (isActiveTask(taskName, taskModes)) {
taskNameToProcessorId.put(taskName, containerId);
if (processorLocality.containsKey(containerId)) {
taskLocality.put(taskName, processorLocality.get(containerId));
}
}
});
Map<SystemStreamPartition, List<String>> sspToTaskMapping = taskPartitionAssignmentManager.readTaskPartitionAssignments();
Map<TaskName, List<SystemStreamPartition>> taskPartitionAssignments = new HashMap<>();
// Task to partition assignments is stored as {@see SystemStreamPartition} to list of {@see TaskName} in
// coordinator stream. This is done due to the 1 MB value size limit in a kafka topic. Conversion to
// taskName to SystemStreamPartitions is done here to wire-in the data to {@see JobModel}.
sspToTaskMapping.forEach((systemStreamPartition, taskNames) -> taskNames.forEach(taskNameString -> {
TaskName taskName = new TaskName(taskNameString);
if (isActiveTask(taskName, taskModes)) {
taskPartitionAssignments.putIfAbsent(taskName, new ArrayList<>());
taskPartitionAssignments.get(taskName).add(systemStreamPartition);
}
}));
return new GrouperMetadataImpl(processorLocality, taskLocality, taskPartitionAssignments, taskNameToProcessorId);
}
Aggregations