Search in sources :

Example 96 with TaskName

use of org.apache.samza.container.TaskName in project samza by apache.

the class TaskGroup method buildContainerModels.

/**
 * Converts the {@link TaskGroup} list to a set of ContainerModel.
 *
 * @param taskModels    the TaskModels to assign to the ContainerModels.
 * @param taskGroups    the TaskGroups defining how the tasks should be grouped.
 * @return              a set of ContainerModels.
 */
public static Set<ContainerModel> buildContainerModels(Set<TaskModel> taskModels, Collection<TaskGroup> taskGroups) {
    // Map task names to models
    Map<String, TaskModel> taskNameToModel = new HashMap<>();
    for (TaskModel model : taskModels) {
        taskNameToModel.put(model.getTaskName().getTaskName(), model);
    }
    // Build container models
    Set<ContainerModel> containerModels = new HashSet<>();
    for (TaskGroup container : taskGroups) {
        Map<TaskName, TaskModel> containerTaskModels = new HashMap<>();
        for (String taskName : container.taskNames) {
            TaskModel model = taskNameToModel.get(taskName);
            containerTaskModels.put(model.getTaskName(), model);
        }
        containerModels.add(new ContainerModel(container.containerId, containerTaskModels));
    }
    return Collections.unmodifiableSet(containerModels);
}
Also used : TaskName(org.apache.samza.container.TaskName) TaskModel(org.apache.samza.job.model.TaskModel) ContainerModel(org.apache.samza.job.model.ContainerModel)

Example 97 with TaskName

use of org.apache.samza.container.TaskName in project samza by apache.

the class JobModelHelper method updateTaskAssignments.

/**
 * This method does the following:
 * 1. Deletes the existing task assignments if the partition-task grouping has changed from the previous run of the job.
 * 2. Saves the newly generated task assignments to the storage layer through the {@param TaskAssignementManager}.
 *
 * @param jobModel              represents the {@see JobModel} of the samza job.
 * @param taskAssignmentManager required to persist the processor to task assignments to the metadata store.
 * @param taskPartitionAssignmentManager required to persist the task to partition assignments to the metadata store.
 * @param grouperMetadata       provides the historical metadata of the samza application.
 */
private void updateTaskAssignments(JobModel jobModel, TaskAssignmentManager taskAssignmentManager, TaskPartitionAssignmentManager taskPartitionAssignmentManager, GrouperMetadata grouperMetadata) {
    LOG.info("Storing the task assignments into metadata store.");
    Set<String> activeTaskNames = new HashSet<>();
    Set<String> standbyTaskNames = new HashSet<>();
    Set<SystemStreamPartition> systemStreamPartitions = new HashSet<>();
    for (ContainerModel containerModel : jobModel.getContainers().values()) {
        for (TaskModel taskModel : containerModel.getTasks().values()) {
            if (TaskMode.Active.equals(taskModel.getTaskMode())) {
                activeTaskNames.add(taskModel.getTaskName().getTaskName());
            }
            if (TaskMode.Standby.equals(taskModel.getTaskMode())) {
                standbyTaskNames.add(taskModel.getTaskName().getTaskName());
            }
            systemStreamPartitions.addAll(taskModel.getSystemStreamPartitions());
        }
    }
    Map<TaskName, String> previousTaskToContainerId = grouperMetadata.getPreviousTaskToProcessorAssignment();
    if (activeTaskNames.size() != previousTaskToContainerId.size()) {
        LOG.warn(String.format("Current task count %s does not match saved task count %s. Stateful jobs may observe misalignment of keys!", activeTaskNames.size(), previousTaskToContainerId.size()));
        // If the tasks changed, then the partition-task grouping is also likely changed and we can't handle that
        // without a much more complicated mapping. Further, the partition count may have changed, which means
        // input message keys are likely reshuffled w.r.t. partitions, so the local state may not contain necessary
        // data associated with the incoming keys. Warn the user and default to grouper
        // In this scenario the tasks may have been reduced, so we need to delete all the existing messages
        taskAssignmentManager.deleteTaskContainerMappings(previousTaskToContainerId.keySet().stream().map(TaskName::getTaskName).collect(Collectors.toList()));
        taskPartitionAssignmentManager.delete(systemStreamPartitions);
    }
    // if the set of standby tasks has changed, e.g., when the replication-factor changed, or the active-tasks-set has
    // changed, we log a warning and delete the existing mapping for these tasks
    Set<String> previousStandbyTasks = taskAssignmentManager.readTaskModes().entrySet().stream().filter(taskNameToTaskModeEntry -> TaskMode.Standby.equals(taskNameToTaskModeEntry.getValue())).map(taskNameToTaskModeEntry -> taskNameToTaskModeEntry.getKey().getTaskName()).collect(Collectors.toSet());
    if (!standbyTaskNames.equals(previousStandbyTasks)) {
        LOG.info(String.format("The set of standby tasks has changed, current standby tasks %s, previous standby tasks %s", standbyTaskNames, previousStandbyTasks));
        taskAssignmentManager.deleteTaskContainerMappings(previousStandbyTasks);
    }
    // Task to partition assignments is stored as {@see SystemStreamPartition} to list of {@see TaskName} in
    // coordinator stream. This is done due to the 1 MB value size limit in a kafka topic.
    Map<SystemStreamPartition, List<String>> sspToTaskNameMap = new HashMap<>();
    Map<String, Map<String, TaskMode>> taskContainerMappings = new HashMap<>();
    for (ContainerModel containerModel : jobModel.getContainers().values()) {
        containerModel.getTasks().forEach((taskName, taskModel) -> {
            taskContainerMappings.putIfAbsent(containerModel.getId(), new HashMap<>());
            taskContainerMappings.get(containerModel.getId()).put(taskName.getTaskName(), taskModel.getTaskMode());
            taskModel.getSystemStreamPartitions().forEach(systemStreamPartition -> {
                sspToTaskNameMap.putIfAbsent(systemStreamPartition, new ArrayList<>());
                sspToTaskNameMap.get(systemStreamPartition).add(taskName.getTaskName());
            });
        });
    }
    taskAssignmentManager.writeTaskContainerMappings(taskContainerMappings);
    taskPartitionAssignmentManager.writeTaskPartitionAssignments(sspToTaskNameMap);
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) TaskPartitionAssignmentManager(org.apache.samza.container.grouper.task.TaskPartitionAssignmentManager) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) HashMap(java.util.HashMap) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) LocationId(org.apache.samza.runtime.LocationId) HashSet(java.util.HashSet) Map(java.util.Map) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) TaskAssignmentManager(org.apache.samza.container.grouper.task.TaskAssignmentManager) JobModel(org.apache.samza.job.model.JobModel) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) LocalityManager(org.apache.samza.container.LocalityManager) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) ContainerModel(org.apache.samza.job.model.ContainerModel) ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) Optional(java.util.Optional) Config(org.apache.samza.config.Config) HashMap(java.util.HashMap) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskName(org.apache.samza.container.TaskName) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) TaskModel(org.apache.samza.job.model.TaskModel) HashSet(java.util.HashSet) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 98 with TaskName

use of org.apache.samza.container.TaskName in project samza by apache.

the class TaskAssignmentManager method readTaskModes.

public Map<TaskName, TaskMode> readTaskModes() {
    Map<TaskName, TaskMode> taskModeMap = new HashMap<>();
    taskModeMappingMetadataStore.all().forEach((taskName, valueBytes) -> {
        String taskMode = taskModeSerde.fromBytes(valueBytes);
        if (taskMode != null) {
            taskModeMap.put(new TaskName(taskName), TaskMode.valueOf(taskMode));
        }
        LOG.debug("Task mode assignment for task {}: {}", taskName, taskMode);
    });
    return Collections.unmodifiableMap(new HashMap<>(taskModeMap));
}
Also used : TaskName(org.apache.samza.container.TaskName) HashMap(java.util.HashMap) TaskMode(org.apache.samza.job.model.TaskMode)

Example 99 with TaskName

use of org.apache.samza.container.TaskName in project samza by apache.

the class TaskNameGrouperProxy method getTaskModelForBuddyContainer.

// Helper method to populate the container model for a buddy container.
private static Map<TaskName, TaskModel> getTaskModelForBuddyContainer(Map<TaskName, TaskModel> activeContainerTaskModel, int replicaNum) {
    Map<TaskName, TaskModel> standbyTaskModels = new HashMap<>();
    for (TaskName taskName : activeContainerTaskModel.keySet()) {
        TaskName standbyTaskName = StandbyTaskUtil.getStandbyTaskName(taskName, replicaNum);
        TaskModel standbyTaskModel = new TaskModel(standbyTaskName, activeContainerTaskModel.get(taskName).getSystemStreamPartitions(), activeContainerTaskModel.get(taskName).getChangelogPartition(), TaskMode.Standby);
        standbyTaskModels.put(standbyTaskName, standbyTaskModel);
    }
    LOG.info("Generated standbyTaskModels : {} for active task models : {}", standbyTaskModels, activeContainerTaskModel);
    return standbyTaskModels;
}
Also used : TaskName(org.apache.samza.container.TaskName) HashMap(java.util.HashMap) TaskModel(org.apache.samza.job.model.TaskModel)

Example 100 with TaskName

use of org.apache.samza.container.TaskName in project samza by apache.

the class TestWindowOperator method setup.

@Before
public void setup() {
    Map<String, String> configMap = new HashMap<>();
    configMap.put("job.default.system", "kafka");
    configMap.put("job.name", "jobName");
    configMap.put("job.id", "jobId");
    this.config = new MapConfig(configMap);
    this.context = new MockContext();
    when(this.context.getJobContext().getConfig()).thenReturn(this.config);
    Serde storeKeySerde = new TimeSeriesKeySerde(new IntegerSerde());
    Serde storeValSerde = KVSerde.of(new IntegerSerde(), new IntegerSerde());
    SystemStreamPartition ssp = new SystemStreamPartition("kafka", "integers", new Partition(0));
    TaskModel taskModel = mock(TaskModel.class);
    when(taskModel.getSystemStreamPartitions()).thenReturn(ImmutableSet.of(ssp));
    when(taskModel.getTaskName()).thenReturn(new TaskName("task 1"));
    when(this.context.getTaskContext().getTaskModel()).thenReturn(taskModel);
    when(((TaskContextImpl) this.context.getTaskContext()).getSspsExcludingSideInputs()).thenReturn(ImmutableSet.of(ssp));
    when(this.context.getTaskContext().getTaskMetricsRegistry()).thenReturn(new MetricsRegistryMap());
    when(this.context.getContainerContext().getContainerMetricsRegistry()).thenReturn(new MetricsRegistryMap());
    when(this.context.getTaskContext().getStore("jobName-jobId-window-w1")).thenReturn(new TestInMemoryStore<>(storeKeySerde, storeValSerde));
}
Also used : KVSerde(org.apache.samza.serializers.KVSerde) Serde(org.apache.samza.serializers.Serde) IntegerSerde(org.apache.samza.serializers.IntegerSerde) TimeSeriesKeySerde(org.apache.samza.operators.impl.store.TimeSeriesKeySerde) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) MockContext(org.apache.samza.context.MockContext) HashMap(java.util.HashMap) TaskContextImpl(org.apache.samza.context.TaskContextImpl) IntegerSerde(org.apache.samza.serializers.IntegerSerde) TaskName(org.apache.samza.container.TaskName) MapConfig(org.apache.samza.config.MapConfig) TimeSeriesKeySerde(org.apache.samza.operators.impl.store.TimeSeriesKeySerde) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Before(org.junit.Before)

Aggregations

TaskName (org.apache.samza.container.TaskName)212 HashMap (java.util.HashMap)136 Test (org.junit.Test)133 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)117 Partition (org.apache.samza.Partition)106 MapConfig (org.apache.samza.config.MapConfig)92 TaskModel (org.apache.samza.job.model.TaskModel)90 Map (java.util.Map)75 Set (java.util.Set)73 Config (org.apache.samza.config.Config)71 ContainerModel (org.apache.samza.job.model.ContainerModel)63 ImmutableMap (com.google.common.collect.ImmutableMap)53 File (java.io.File)53 SystemStream (org.apache.samza.system.SystemStream)52 ImmutableSet (com.google.common.collect.ImmutableSet)50 TaskMode (org.apache.samza.job.model.TaskMode)46 TaskConfig (org.apache.samza.config.TaskConfig)43 ImmutableList (com.google.common.collect.ImmutableList)42 Collections (java.util.Collections)41 CheckpointId (org.apache.samza.checkpoint.CheckpointId)41