Search in sources :

Example 46 with ContainerModel

use of org.apache.samza.job.model.ContainerModel in project samza by apache.

the class JobModelHelper method updateTaskAssignments.

/**
 * This method does the following:
 * 1. Deletes the existing task assignments if the partition-task grouping has changed from the previous run of the job.
 * 2. Saves the newly generated task assignments to the storage layer through the {@param TaskAssignementManager}.
 *
 * @param jobModel              represents the {@see JobModel} of the samza job.
 * @param taskAssignmentManager required to persist the processor to task assignments to the metadata store.
 * @param taskPartitionAssignmentManager required to persist the task to partition assignments to the metadata store.
 * @param grouperMetadata       provides the historical metadata of the samza application.
 */
private void updateTaskAssignments(JobModel jobModel, TaskAssignmentManager taskAssignmentManager, TaskPartitionAssignmentManager taskPartitionAssignmentManager, GrouperMetadata grouperMetadata) {
    LOG.info("Storing the task assignments into metadata store.");
    Set<String> activeTaskNames = new HashSet<>();
    Set<String> standbyTaskNames = new HashSet<>();
    Set<SystemStreamPartition> systemStreamPartitions = new HashSet<>();
    for (ContainerModel containerModel : jobModel.getContainers().values()) {
        for (TaskModel taskModel : containerModel.getTasks().values()) {
            if (TaskMode.Active.equals(taskModel.getTaskMode())) {
                activeTaskNames.add(taskModel.getTaskName().getTaskName());
            }
            if (TaskMode.Standby.equals(taskModel.getTaskMode())) {
                standbyTaskNames.add(taskModel.getTaskName().getTaskName());
            }
            systemStreamPartitions.addAll(taskModel.getSystemStreamPartitions());
        }
    }
    Map<TaskName, String> previousTaskToContainerId = grouperMetadata.getPreviousTaskToProcessorAssignment();
    if (activeTaskNames.size() != previousTaskToContainerId.size()) {
        LOG.warn(String.format("Current task count %s does not match saved task count %s. Stateful jobs may observe misalignment of keys!", activeTaskNames.size(), previousTaskToContainerId.size()));
        // If the tasks changed, then the partition-task grouping is also likely changed and we can't handle that
        // without a much more complicated mapping. Further, the partition count may have changed, which means
        // input message keys are likely reshuffled w.r.t. partitions, so the local state may not contain necessary
        // data associated with the incoming keys. Warn the user and default to grouper
        // In this scenario the tasks may have been reduced, so we need to delete all the existing messages
        taskAssignmentManager.deleteTaskContainerMappings(previousTaskToContainerId.keySet().stream().map(TaskName::getTaskName).collect(Collectors.toList()));
        taskPartitionAssignmentManager.delete(systemStreamPartitions);
    }
    // if the set of standby tasks has changed, e.g., when the replication-factor changed, or the active-tasks-set has
    // changed, we log a warning and delete the existing mapping for these tasks
    Set<String> previousStandbyTasks = taskAssignmentManager.readTaskModes().entrySet().stream().filter(taskNameToTaskModeEntry -> TaskMode.Standby.equals(taskNameToTaskModeEntry.getValue())).map(taskNameToTaskModeEntry -> taskNameToTaskModeEntry.getKey().getTaskName()).collect(Collectors.toSet());
    if (!standbyTaskNames.equals(previousStandbyTasks)) {
        LOG.info(String.format("The set of standby tasks has changed, current standby tasks %s, previous standby tasks %s", standbyTaskNames, previousStandbyTasks));
        taskAssignmentManager.deleteTaskContainerMappings(previousStandbyTasks);
    }
    // Task to partition assignments is stored as {@see SystemStreamPartition} to list of {@see TaskName} in
    // coordinator stream. This is done due to the 1 MB value size limit in a kafka topic.
    Map<SystemStreamPartition, List<String>> sspToTaskNameMap = new HashMap<>();
    Map<String, Map<String, TaskMode>> taskContainerMappings = new HashMap<>();
    for (ContainerModel containerModel : jobModel.getContainers().values()) {
        containerModel.getTasks().forEach((taskName, taskModel) -> {
            taskContainerMappings.putIfAbsent(containerModel.getId(), new HashMap<>());
            taskContainerMappings.get(containerModel.getId()).put(taskName.getTaskName(), taskModel.getTaskMode());
            taskModel.getSystemStreamPartitions().forEach(systemStreamPartition -> {
                sspToTaskNameMap.putIfAbsent(systemStreamPartition, new ArrayList<>());
                sspToTaskNameMap.get(systemStreamPartition).add(taskName.getTaskName());
            });
        });
    }
    taskAssignmentManager.writeTaskContainerMappings(taskContainerMappings);
    taskPartitionAssignmentManager.writeTaskPartitionAssignments(sspToTaskNameMap);
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) TaskPartitionAssignmentManager(org.apache.samza.container.grouper.task.TaskPartitionAssignmentManager) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) HashMap(java.util.HashMap) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) LocationId(org.apache.samza.runtime.LocationId) HashSet(java.util.HashSet) Map(java.util.Map) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) TaskAssignmentManager(org.apache.samza.container.grouper.task.TaskAssignmentManager) JobModel(org.apache.samza.job.model.JobModel) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) LocalityManager(org.apache.samza.container.LocalityManager) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) ContainerModel(org.apache.samza.job.model.ContainerModel) ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) Optional(java.util.Optional) Config(org.apache.samza.config.Config) HashMap(java.util.HashMap) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskName(org.apache.samza.container.TaskName) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) TaskModel(org.apache.samza.job.model.TaskModel) HashSet(java.util.HashSet) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 47 with ContainerModel

use of org.apache.samza.job.model.ContainerModel in project samza by apache.

the class TaskNameGrouperProxy method generateStandbyTasks.

/**
 *  Generate a container model map with standby tasks added and grouped into buddy containers.
 *  Package-private for testing.
 *
 * @param containerModels The initial container model map.
 * @param replicationFactor The desired replication factor, if the replication-factor is n, we add n-1 standby tasks for each active task.
 * @return The generated map of containerModels with added containers, and the initial regular containers
 */
Set<ContainerModel> generateStandbyTasks(Set<ContainerModel> containerModels, int replicationFactor) {
    LOG.info("Received current containerModel map : {}, replicationFactor : {}", containerModels, replicationFactor);
    Set<ContainerModel> buddyContainers = new HashSet<>();
    for (ContainerModel activeContainer : containerModels) {
        for (int replicaNum = 0; replicaNum < replicationFactor - 1; replicaNum++) {
            String buddyContainerId = StandbyTaskUtil.getStandbyContainerId(activeContainer.getId(), replicaNum);
            ContainerModel buddyContainerModel = new ContainerModel(buddyContainerId, getTaskModelForBuddyContainer(activeContainer.getTasks(), replicaNum));
            buddyContainers.add(buddyContainerModel);
        }
    }
    LOG.info("Adding buddy containers : {}", buddyContainers);
    buddyContainers.addAll(containerModels);
    return buddyContainers;
}
Also used : ContainerModel(org.apache.samza.job.model.ContainerModel) HashSet(java.util.HashSet)

Example 48 with ContainerModel

use of org.apache.samza.job.model.ContainerModel in project samza by apache.

the class TestContainerAllocator method getJobModelReader.

private static JobModelManager getJobModelReader(int containerCount) {
    //Ideally, the JobModelReader should be constructed independent of HttpServer.
    //That way it becomes easier to mock objects. Save it for later.
    HttpServer server = new MockHttpServer("/", 7777, null, new ServletHolder(DefaultServlet.class));
    Map<String, ContainerModel> containers = new java.util.HashMap<>();
    for (int i = 0; i < containerCount; i++) {
        ContainerModel container = new ContainerModel(String.valueOf(i), i, new HashMap<TaskName, TaskModel>());
        containers.put(String.valueOf(i), container);
    }
    JobModel jobModel = new JobModel(getConfig(), containers);
    return new JobModelManager(jobModel, server, null);
}
Also used : HashMap(java.util.HashMap) ServletHolder(org.eclipse.jetty.servlet.ServletHolder) JobModelManager(org.apache.samza.coordinator.JobModelManager) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskName(org.apache.samza.container.TaskName) HttpServer(org.apache.samza.coordinator.server.HttpServer) JobModel(org.apache.samza.job.model.JobModel) DefaultServlet(org.eclipse.jetty.servlet.DefaultServlet) TaskModel(org.apache.samza.job.model.TaskModel)

Example 49 with ContainerModel

use of org.apache.samza.job.model.ContainerModel in project samza by apache.

the class TestContainerProcessManager method getCoordinator.

private JobModelManager getCoordinator(int containerCount) {
    Map<String, ContainerModel> containers = new java.util.HashMap<>();
    for (int i = 0; i < containerCount; i++) {
        ContainerModel container = new ContainerModel(String.valueOf(i), i, new HashMap<TaskName, TaskModel>());
        containers.put(String.valueOf(i), container);
    }
    Map<String, Map<String, String>> localityMap = new HashMap<>();
    localityMap.put("0", new HashMap<String, String>() {

        {
            put(SetContainerHostMapping.HOST_KEY, "abc");
        }
    });
    LocalityManager mockLocalityManager = mock(LocalityManager.class);
    when(mockLocalityManager.readContainerLocality()).thenReturn(localityMap);
    JobModel jobModel = new JobModel(getConfig(), containers, mockLocalityManager);
    JobModelManager.jobModelRef().getAndSet(jobModel);
    return new JobModelManager(jobModel, this.server, null);
}
Also used : HashMap(java.util.HashMap) JobModelManager(org.apache.samza.coordinator.JobModelManager) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskName(org.apache.samza.container.TaskName) JobModel(org.apache.samza.job.model.JobModel) LocalityManager(org.apache.samza.container.LocalityManager) HashMap(java.util.HashMap) Map(java.util.Map) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) TaskModel(org.apache.samza.job.model.TaskModel)

Example 50 with ContainerModel

use of org.apache.samza.job.model.ContainerModel in project samza by apache.

the class TestGroupByContainerCount method testBalancerNewContainerCountOne.

@Test
public void testBalancerNewContainerCountOne() {
    Set<TaskModel> taskModels = generateTaskModels(3);
    Set<ContainerModel> prevContainers = new GroupByContainerCount(3).group(taskModels);
    Map<String, String> prevTaskToContainerMapping = generateTaskContainerMapping(prevContainers);
    when(taskAssignmentManager.readTaskAssignment()).thenReturn(prevTaskToContainerMapping);
    Set<ContainerModel> containers = new GroupByContainerCount(1).balance(taskModels, localityManager);
    // Results should be the same as calling group
    Map<String, ContainerModel> containersMap = new HashMap<>();
    for (ContainerModel container : containers) {
        containersMap.put(container.getProcessorId(), container);
    }
    assertEquals(1, containers.size());
    ContainerModel container0 = containersMap.get("0");
    assertNotNull(container0);
    assertEquals("0", container0.getProcessorId());
    assertEquals(3, container0.getTasks().size());
    assertTrue(container0.getTasks().containsKey(getTaskName(0)));
    assertTrue(container0.getTasks().containsKey(getTaskName(1)));
    assertTrue(container0.getTasks().containsKey(getTaskName(2)));
    verify(taskAssignmentManager).writeTaskContainerMapping(getTaskName(0).getTaskName(), "0");
    verify(taskAssignmentManager).writeTaskContainerMapping(getTaskName(1).getTaskName(), "0");
    verify(taskAssignmentManager).writeTaskContainerMapping(getTaskName(2).getTaskName(), "0");
    verify(taskAssignmentManager, never()).deleteTaskContainerMappings(anyCollection());
}
Also used : HashMap(java.util.HashMap) Mockito.anyString(org.mockito.Mockito.anyString) TaskModel(org.apache.samza.job.model.TaskModel) ContainerMocks.getTaskModel(org.apache.samza.container.mock.ContainerMocks.getTaskModel) ContainerModel(org.apache.samza.job.model.ContainerModel) Test(org.junit.Test)

Aggregations

ContainerModel (org.apache.samza.job.model.ContainerModel)96 TaskModel (org.apache.samza.job.model.TaskModel)68 TaskName (org.apache.samza.container.TaskName)60 Test (org.junit.Test)57 HashMap (java.util.HashMap)53 JobModel (org.apache.samza.job.model.JobModel)37 MapConfig (org.apache.samza.config.MapConfig)30 Config (org.apache.samza.config.Config)28 Partition (org.apache.samza.Partition)24 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)22 StorageConfig (org.apache.samza.config.StorageConfig)19 Map (java.util.Map)18 JobConfig (org.apache.samza.config.JobConfig)18 TaskConfig (org.apache.samza.config.TaskConfig)18 HashSet (java.util.HashSet)16 ArrayList (java.util.ArrayList)14 ClusterManagerConfig (org.apache.samza.config.ClusterManagerConfig)12 LocationId (org.apache.samza.runtime.LocationId)12 Collectors (java.util.stream.Collectors)10 SystemStream (org.apache.samza.system.SystemStream)10