Search in sources :

Example 1 with TaskPartitionAssignmentManager

use of org.apache.samza.container.grouper.task.TaskPartitionAssignmentManager in project samza by apache.

the class StaticResourceJobCoordinatorFactory method buildJobModelHelper.

private static JobModelHelper buildJobModelHelper(MetadataStore metadataStore, StreamMetadataCache streamMetadataCache) {
    LocalityManager localityManager = new LocalityManager(new NamespaceAwareCoordinatorStreamStore(metadataStore, SetContainerHostMapping.TYPE));
    TaskAssignmentManager taskAssignmentManager = new TaskAssignmentManager(new NamespaceAwareCoordinatorStreamStore(metadataStore, SetTaskContainerMapping.TYPE), new NamespaceAwareCoordinatorStreamStore(metadataStore, SetTaskModeMapping.TYPE));
    TaskPartitionAssignmentManager taskPartitionAssignmentManager = new TaskPartitionAssignmentManager(new NamespaceAwareCoordinatorStreamStore(metadataStore, SetTaskPartitionMapping.TYPE));
    return new JobModelHelper(localityManager, taskAssignmentManager, taskPartitionAssignmentManager, streamMetadataCache, JobModelCalculator.INSTANCE);
}
Also used : NamespaceAwareCoordinatorStreamStore(org.apache.samza.coordinator.metadatastore.NamespaceAwareCoordinatorStreamStore) TaskPartitionAssignmentManager(org.apache.samza.container.grouper.task.TaskPartitionAssignmentManager) TaskAssignmentManager(org.apache.samza.container.grouper.task.TaskAssignmentManager) JobModelHelper(org.apache.samza.coordinator.JobModelHelper) LocalityManager(org.apache.samza.container.LocalityManager)

Example 2 with TaskPartitionAssignmentManager

use of org.apache.samza.container.grouper.task.TaskPartitionAssignmentManager in project samza by apache.

the class JobModelHelper method updateTaskAssignments.

/**
 * This method does the following:
 * 1. Deletes the existing task assignments if the partition-task grouping has changed from the previous run of the job.
 * 2. Saves the newly generated task assignments to the storage layer through the {@param TaskAssignementManager}.
 *
 * @param jobModel              represents the {@see JobModel} of the samza job.
 * @param taskAssignmentManager required to persist the processor to task assignments to the metadata store.
 * @param taskPartitionAssignmentManager required to persist the task to partition assignments to the metadata store.
 * @param grouperMetadata       provides the historical metadata of the samza application.
 */
private void updateTaskAssignments(JobModel jobModel, TaskAssignmentManager taskAssignmentManager, TaskPartitionAssignmentManager taskPartitionAssignmentManager, GrouperMetadata grouperMetadata) {
    LOG.info("Storing the task assignments into metadata store.");
    Set<String> activeTaskNames = new HashSet<>();
    Set<String> standbyTaskNames = new HashSet<>();
    Set<SystemStreamPartition> systemStreamPartitions = new HashSet<>();
    for (ContainerModel containerModel : jobModel.getContainers().values()) {
        for (TaskModel taskModel : containerModel.getTasks().values()) {
            if (TaskMode.Active.equals(taskModel.getTaskMode())) {
                activeTaskNames.add(taskModel.getTaskName().getTaskName());
            }
            if (TaskMode.Standby.equals(taskModel.getTaskMode())) {
                standbyTaskNames.add(taskModel.getTaskName().getTaskName());
            }
            systemStreamPartitions.addAll(taskModel.getSystemStreamPartitions());
        }
    }
    Map<TaskName, String> previousTaskToContainerId = grouperMetadata.getPreviousTaskToProcessorAssignment();
    if (activeTaskNames.size() != previousTaskToContainerId.size()) {
        LOG.warn(String.format("Current task count %s does not match saved task count %s. Stateful jobs may observe misalignment of keys!", activeTaskNames.size(), previousTaskToContainerId.size()));
        // If the tasks changed, then the partition-task grouping is also likely changed and we can't handle that
        // without a much more complicated mapping. Further, the partition count may have changed, which means
        // input message keys are likely reshuffled w.r.t. partitions, so the local state may not contain necessary
        // data associated with the incoming keys. Warn the user and default to grouper
        // In this scenario the tasks may have been reduced, so we need to delete all the existing messages
        taskAssignmentManager.deleteTaskContainerMappings(previousTaskToContainerId.keySet().stream().map(TaskName::getTaskName).collect(Collectors.toList()));
        taskPartitionAssignmentManager.delete(systemStreamPartitions);
    }
    // if the set of standby tasks has changed, e.g., when the replication-factor changed, or the active-tasks-set has
    // changed, we log a warning and delete the existing mapping for these tasks
    Set<String> previousStandbyTasks = taskAssignmentManager.readTaskModes().entrySet().stream().filter(taskNameToTaskModeEntry -> TaskMode.Standby.equals(taskNameToTaskModeEntry.getValue())).map(taskNameToTaskModeEntry -> taskNameToTaskModeEntry.getKey().getTaskName()).collect(Collectors.toSet());
    if (!standbyTaskNames.equals(previousStandbyTasks)) {
        LOG.info(String.format("The set of standby tasks has changed, current standby tasks %s, previous standby tasks %s", standbyTaskNames, previousStandbyTasks));
        taskAssignmentManager.deleteTaskContainerMappings(previousStandbyTasks);
    }
    // Task to partition assignments is stored as {@see SystemStreamPartition} to list of {@see TaskName} in
    // coordinator stream. This is done due to the 1 MB value size limit in a kafka topic.
    Map<SystemStreamPartition, List<String>> sspToTaskNameMap = new HashMap<>();
    Map<String, Map<String, TaskMode>> taskContainerMappings = new HashMap<>();
    for (ContainerModel containerModel : jobModel.getContainers().values()) {
        containerModel.getTasks().forEach((taskName, taskModel) -> {
            taskContainerMappings.putIfAbsent(containerModel.getId(), new HashMap<>());
            taskContainerMappings.get(containerModel.getId()).put(taskName.getTaskName(), taskModel.getTaskMode());
            taskModel.getSystemStreamPartitions().forEach(systemStreamPartition -> {
                sspToTaskNameMap.putIfAbsent(systemStreamPartition, new ArrayList<>());
                sspToTaskNameMap.get(systemStreamPartition).add(taskName.getTaskName());
            });
        });
    }
    taskAssignmentManager.writeTaskContainerMappings(taskContainerMappings);
    taskPartitionAssignmentManager.writeTaskPartitionAssignments(sspToTaskNameMap);
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) TaskPartitionAssignmentManager(org.apache.samza.container.grouper.task.TaskPartitionAssignmentManager) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) HashMap(java.util.HashMap) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) LocationId(org.apache.samza.runtime.LocationId) HashSet(java.util.HashSet) Map(java.util.Map) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) TaskAssignmentManager(org.apache.samza.container.grouper.task.TaskAssignmentManager) JobModel(org.apache.samza.job.model.JobModel) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) LocalityManager(org.apache.samza.container.LocalityManager) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) ContainerModel(org.apache.samza.job.model.ContainerModel) ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) Optional(java.util.Optional) Config(org.apache.samza.config.Config) HashMap(java.util.HashMap) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskName(org.apache.samza.container.TaskName) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) TaskModel(org.apache.samza.job.model.TaskModel) HashSet(java.util.HashSet) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 3 with TaskPartitionAssignmentManager

use of org.apache.samza.container.grouper.task.TaskPartitionAssignmentManager in project samza by apache.

the class JobModelHelper method getGrouperMetadata.

private GrouperMetadata getGrouperMetadata(Config config, LocalityManager localityManager, TaskAssignmentManager taskAssignmentManager, TaskPartitionAssignmentManager taskPartitionAssignmentManager) {
    Map<String, LocationId> processorLocality = getProcessorLocality(config, localityManager);
    Map<TaskName, TaskMode> taskModes = taskAssignmentManager.readTaskModes();
    Map<TaskName, String> taskNameToProcessorId = new HashMap<>();
    Map<TaskName, LocationId> taskLocality = new HashMap<>();
    // We read the taskAssignment only for ActiveTasks, i.e., tasks that have no task-mode or have an active task mode
    taskAssignmentManager.readTaskAssignment().forEach((taskNameString, containerId) -> {
        TaskName taskName = new TaskName(taskNameString);
        if (isActiveTask(taskName, taskModes)) {
            taskNameToProcessorId.put(taskName, containerId);
            if (processorLocality.containsKey(containerId)) {
                taskLocality.put(taskName, processorLocality.get(containerId));
            }
        }
    });
    Map<SystemStreamPartition, List<String>> sspToTaskMapping = taskPartitionAssignmentManager.readTaskPartitionAssignments();
    Map<TaskName, List<SystemStreamPartition>> taskPartitionAssignments = new HashMap<>();
    // Task to partition assignments is stored as {@see SystemStreamPartition} to list of {@see TaskName} in
    // coordinator stream. This is done due to the 1 MB value size limit in a kafka topic. Conversion to
    // taskName to SystemStreamPartitions is done here to wire-in the data to {@see JobModel}.
    sspToTaskMapping.forEach((systemStreamPartition, taskNames) -> taskNames.forEach(taskNameString -> {
        TaskName taskName = new TaskName(taskNameString);
        if (isActiveTask(taskName, taskModes)) {
            taskPartitionAssignments.putIfAbsent(taskName, new ArrayList<>());
            taskPartitionAssignments.get(taskName).add(systemStreamPartition);
        }
    }));
    return new GrouperMetadataImpl(processorLocality, taskLocality, taskPartitionAssignments, taskNameToProcessorId);
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) TaskPartitionAssignmentManager(org.apache.samza.container.grouper.task.TaskPartitionAssignmentManager) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) HashMap(java.util.HashMap) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) LocationId(org.apache.samza.runtime.LocationId) HashSet(java.util.HashSet) Map(java.util.Map) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) TaskAssignmentManager(org.apache.samza.container.grouper.task.TaskAssignmentManager) JobModel(org.apache.samza.job.model.JobModel) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) LocalityManager(org.apache.samza.container.LocalityManager) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) ContainerModel(org.apache.samza.job.model.ContainerModel) ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) Optional(java.util.Optional) Config(org.apache.samza.config.Config) HashMap(java.util.HashMap) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) LocationId(org.apache.samza.runtime.LocationId) ArrayList(java.util.ArrayList) TaskMode(org.apache.samza.job.model.TaskMode) TaskName(org.apache.samza.container.TaskName) ArrayList(java.util.ArrayList) List(java.util.List) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Aggregations

LocalityManager (org.apache.samza.container.LocalityManager)3 TaskAssignmentManager (org.apache.samza.container.grouper.task.TaskAssignmentManager)3 TaskPartitionAssignmentManager (org.apache.samza.container.grouper.task.TaskPartitionAssignmentManager)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Map (java.util.Map)2 Optional (java.util.Optional)2 Set (java.util.Set)2 Collectors (java.util.stream.Collectors)2 StringUtils (org.apache.commons.lang3.StringUtils)2 Config (org.apache.samza.config.Config)2 JobConfig (org.apache.samza.config.JobConfig)2 TaskName (org.apache.samza.container.TaskName)2 GrouperMetadata (org.apache.samza.container.grouper.task.GrouperMetadata)2 GrouperMetadataImpl (org.apache.samza.container.grouper.task.GrouperMetadataImpl)2 ContainerModel (org.apache.samza.job.model.ContainerModel)2 JobModel (org.apache.samza.job.model.JobModel)2 ProcessorLocality (org.apache.samza.job.model.ProcessorLocality)2