Search in sources :

Example 76 with ContainerModel

use of org.apache.samza.job.model.ContainerModel in project samza by apache.

the class TestGroupByContainerIds method testShouldMinimizeTaskShuffleWhenAvailableProcessorInGroupChanges.

@Test
public void testShouldMinimizeTaskShuffleWhenAvailableProcessorInGroupChanges() {
    TaskNameGrouper taskNameGrouper = buildSimpleGrouper(3);
    String testProcessorId1 = "testProcessorId1";
    String testProcessorId2 = "testProcessorId2";
    String testProcessorId3 = "testProcessorId3";
    LocationId testLocationId1 = new LocationId("testLocationId1");
    LocationId testLocationId2 = new LocationId("testLocationId2");
    LocationId testLocationId3 = new LocationId("testLocationId3");
    TaskName testTaskName1 = new TaskName("testTasKId1");
    TaskName testTaskName2 = new TaskName("testTaskId2");
    TaskName testTaskName3 = new TaskName("testTaskId3");
    TaskModel testTaskModel1 = new TaskModel(testTaskName1, new HashSet<>(), new Partition(0));
    TaskModel testTaskModel2 = new TaskModel(testTaskName2, new HashSet<>(), new Partition(1));
    TaskModel testTaskModel3 = new TaskModel(testTaskName3, new HashSet<>(), new Partition(2));
    Map<String, LocationId> processorLocality = ImmutableMap.of(testProcessorId1, testLocationId1, testProcessorId2, testLocationId2, testProcessorId3, testLocationId3);
    Map<TaskName, LocationId> taskLocality = ImmutableMap.of(testTaskName1, testLocationId1, testTaskName2, testLocationId2, testTaskName3, testLocationId3);
    GrouperMetadataImpl grouperMetadata = new GrouperMetadataImpl(processorLocality, taskLocality, new HashMap<>(), new HashMap<>());
    Set<TaskModel> taskModels = ImmutableSet.of(testTaskModel1, testTaskModel2, testTaskModel3);
    Set<ContainerModel> expectedContainerModels = ImmutableSet.of(new ContainerModel(testProcessorId1, ImmutableMap.of(testTaskName1, testTaskModel1)), new ContainerModel(testProcessorId2, ImmutableMap.of(testTaskName2, testTaskModel2)), new ContainerModel(testProcessorId3, ImmutableMap.of(testTaskName3, testTaskModel3)));
    Set<ContainerModel> actualContainerModels = taskNameGrouper.group(taskModels, grouperMetadata);
    assertEquals(expectedContainerModels, actualContainerModels);
    processorLocality = ImmutableMap.of(testProcessorId1, testLocationId1, testProcessorId2, testLocationId2);
    grouperMetadata = new GrouperMetadataImpl(processorLocality, taskLocality, new HashMap<>(), new HashMap<>());
    actualContainerModels = taskNameGrouper.group(taskModels, grouperMetadata);
    expectedContainerModels = ImmutableSet.of(new ContainerModel(testProcessorId1, ImmutableMap.of(testTaskName1, testTaskModel1, testTaskName3, testTaskModel3)), new ContainerModel(testProcessorId2, ImmutableMap.of(testTaskName2, testTaskModel2)));
    assertEquals(expectedContainerModels, actualContainerModels);
}
Also used : Partition(org.apache.samza.Partition) HashMap(java.util.HashMap) LocationId(org.apache.samza.runtime.LocationId) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskName(org.apache.samza.container.TaskName) ContainerMocks.getTaskName(org.apache.samza.container.mock.ContainerMocks.getTaskName) TaskModel(org.apache.samza.job.model.TaskModel) Test(org.junit.Test)

Example 77 with ContainerModel

use of org.apache.samza.job.model.ContainerModel in project samza by apache.

the class TestGroupByContainerCount method testGroupManyTasks.

@Test
public void testGroupManyTasks() {
    Set<TaskModel> taskModels = generateTaskModels(21);
    Set<ContainerModel> containers = new GroupByContainerCount(2).group(taskModels);
    Map<String, ContainerModel> containersMap = new HashMap<>();
    for (ContainerModel container : containers) {
        containersMap.put(container.getId(), container);
    }
    assertEquals(2, containers.size());
    ContainerModel container0 = containersMap.get("0");
    ContainerModel container1 = containersMap.get("1");
    assertNotNull(container0);
    assertNotNull(container1);
    assertEquals("0", container0.getId());
    assertEquals("1", container1.getId());
    assertEquals(11, container0.getTasks().size());
    assertEquals(10, container1.getTasks().size());
    // NOTE: tasks are sorted lexicographically, so the container assignment
    // can seem odd, but the consistency is the key focus
    assertTrue(container0.getTasks().containsKey(getTaskName(0)));
    assertTrue(container0.getTasks().containsKey(getTaskName(10)));
    assertTrue(container0.getTasks().containsKey(getTaskName(12)));
    assertTrue(container0.getTasks().containsKey(getTaskName(14)));
    assertTrue(container0.getTasks().containsKey(getTaskName(16)));
    assertTrue(container0.getTasks().containsKey(getTaskName(18)));
    assertTrue(container0.getTasks().containsKey(getTaskName(2)));
    assertTrue(container0.getTasks().containsKey(getTaskName(3)));
    assertTrue(container0.getTasks().containsKey(getTaskName(5)));
    assertTrue(container0.getTasks().containsKey(getTaskName(7)));
    assertTrue(container0.getTasks().containsKey(getTaskName(9)));
    assertTrue(container1.getTasks().containsKey(getTaskName(1)));
    assertTrue(container1.getTasks().containsKey(getTaskName(11)));
    assertTrue(container1.getTasks().containsKey(getTaskName(13)));
    assertTrue(container1.getTasks().containsKey(getTaskName(15)));
    assertTrue(container1.getTasks().containsKey(getTaskName(17)));
    assertTrue(container1.getTasks().containsKey(getTaskName(19)));
    assertTrue(container1.getTasks().containsKey(getTaskName(20)));
    assertTrue(container1.getTasks().containsKey(getTaskName(4)));
    assertTrue(container1.getTasks().containsKey(getTaskName(6)));
    assertTrue(container1.getTasks().containsKey(getTaskName(8)));
}
Also used : HashMap(java.util.HashMap) TaskModel(org.apache.samza.job.model.TaskModel) ContainerModel(org.apache.samza.job.model.ContainerModel) Test(org.junit.Test)

Example 78 with ContainerModel

use of org.apache.samza.job.model.ContainerModel in project samza by apache.

the class TestGroupByContainerCount method testBalancerAfterContainerIncrease.

/**
 * Before:
 *  C0  C1
 * --------
 *  T0  T1
 *  T2  T3
 *  T4  T5
 *  T6  T7
 *  T8
 *
 * After:
 *  C0  C1  C2  C3
 * ----------------
 *  T0  T1  T6  T5
 *  T2  T3  T8  T7
 *  T4
 *
 *  NOTE for host affinity, it would help to have some additional logic to reassign tasks
 *  from C0 and C1 to containers that were on the same respective hosts, it wasn't implemented
 *  because the scenario is infrequent, the benefits are not guaranteed, and the code complexity
 *  wasn't worth it. It certainly could be implemented in the future.
 */
@Test
public void testBalancerAfterContainerIncrease() {
    Set<TaskModel> taskModels = generateTaskModels(9);
    Set<ContainerModel> prevContainers = new GroupByContainerCount(2).group(taskModels);
    Map<TaskName, String> prevTaskToContainerMapping = generateTaskContainerMapping(prevContainers);
    GrouperMetadataImpl grouperMetadata = new GrouperMetadataImpl(new HashMap<>(), new HashMap<>(), new HashMap<>(), prevTaskToContainerMapping);
    Set<ContainerModel> containers = new GroupByContainerCount(4).group(taskModels, grouperMetadata);
    Map<String, ContainerModel> containersMap = new HashMap<>();
    for (ContainerModel container : containers) {
        containersMap.put(container.getId(), container);
    }
    assertEquals(4, containers.size());
    ContainerModel container0 = containersMap.get("0");
    ContainerModel container1 = containersMap.get("1");
    ContainerModel container2 = containersMap.get("2");
    ContainerModel container3 = containersMap.get("3");
    assertNotNull(container0);
    assertNotNull(container1);
    assertNotNull(container2);
    assertNotNull(container3);
    assertEquals("0", container0.getId());
    assertEquals("1", container1.getId());
    assertEquals(3, container0.getTasks().size());
    assertEquals(2, container1.getTasks().size());
    assertEquals(2, container2.getTasks().size());
    assertEquals(2, container3.getTasks().size());
    // Tasks 0-4 should stay on the same original containers
    assertTrue(container0.getTasks().containsKey(getTaskName(0)));
    assertTrue(container0.getTasks().containsKey(getTaskName(2)));
    assertTrue(container0.getTasks().containsKey(getTaskName(4)));
    assertTrue(container1.getTasks().containsKey(getTaskName(1)));
    assertTrue(container1.getTasks().containsKey(getTaskName(3)));
    // Tasks 5-8 should be reassigned to the new containers.
    // Consistency is the goal with these reassignments
    assertTrue(container2.getTasks().containsKey(getTaskName(8)));
    assertTrue(container2.getTasks().containsKey(getTaskName(6)));
    assertTrue(container3.getTasks().containsKey(getTaskName(5)));
    assertTrue(container3.getTasks().containsKey(getTaskName(7)));
}
Also used : TaskName(org.apache.samza.container.TaskName) HashMap(java.util.HashMap) TaskModel(org.apache.samza.job.model.TaskModel) ContainerModel(org.apache.samza.job.model.ContainerModel) Test(org.junit.Test)

Example 79 with ContainerModel

use of org.apache.samza.job.model.ContainerModel in project samza by apache.

the class GroupByContainerCount method group.

/**
 * {@inheritDoc}
 */
@Override
public Set<ContainerModel> group(Set<TaskModel> tasks) {
    validateTasks(tasks);
    // Sort tasks by taskName.
    List<TaskModel> sortedTasks = new ArrayList<>(tasks);
    Collections.sort(sortedTasks);
    // Map every task to a container in round-robin fashion.
    Map<TaskName, TaskModel>[] taskGroups = new Map[containerCount];
    for (int i = 0; i < containerCount; i++) {
        taskGroups[i] = new HashMap<>();
    }
    for (int i = 0; i < sortedTasks.size(); i++) {
        TaskModel tm = sortedTasks.get(i);
        taskGroups[i % containerCount].put(tm.getTaskName(), tm);
    }
    // Convert to a Set of ContainerModel
    Set<ContainerModel> containerModels = new HashSet<>();
    for (int i = 0; i < containerCount; i++) {
        containerModels.add(new ContainerModel(String.valueOf(i), taskGroups[i]));
    }
    return Collections.unmodifiableSet(containerModels);
}
Also used : ArrayList(java.util.ArrayList) HashMap(java.util.HashMap) Map(java.util.Map) TaskModel(org.apache.samza.job.model.TaskModel) ContainerModel(org.apache.samza.job.model.ContainerModel) HashSet(java.util.HashSet)

Example 80 with ContainerModel

use of org.apache.samza.job.model.ContainerModel in project samza by apache.

the class GroupByContainerIds method group.

/**
 * {@inheritDoc}
 *
 * When number of taskModels are less than number of available containerIds,
 * then chooses then selects the lexicographically least `x` containerIds.
 *
 * Otherwise, assigns the tasks to the available containerIds in a round robin fashion
 * preserving the containerId in the final assignment.
 */
@Override
public Set<ContainerModel> group(Set<TaskModel> tasks, List<String> containerIds) {
    if (containerIds == null)
        return this.group(tasks);
    if (containerIds.isEmpty())
        throw new IllegalArgumentException("Must have at least one container");
    if (tasks.isEmpty())
        throw new IllegalArgumentException("cannot group an empty set. containerIds=" + Arrays.toString(containerIds.toArray()));
    if (containerIds.size() > tasks.size()) {
        LOG.warn("Number of containers: {} is greater than number of tasks: {}.", containerIds.size(), tasks.size());
        /**
         * Choose lexicographically least `x` containerIds(where x = tasks.size()).
         */
        containerIds = containerIds.stream().sorted().limit(tasks.size()).collect(Collectors.toList());
        LOG.info("Generating containerModel with containers: {}.", containerIds);
    }
    int containerCount = containerIds.size();
    // Sort tasks by taskName.
    List<TaskModel> sortedTasks = new ArrayList<>(tasks);
    Collections.sort(sortedTasks);
    // Map every task to a container in round-robin fashion.
    Map<TaskName, TaskModel>[] taskGroups = new Map[containerCount];
    for (int i = 0; i < containerCount; i++) {
        taskGroups[i] = new HashMap<>();
    }
    for (int i = 0; i < sortedTasks.size(); i++) {
        TaskModel tm = sortedTasks.get(i);
        taskGroups[i % containerCount].put(tm.getTaskName(), tm);
    }
    // Convert to a Set of ContainerModel
    Set<ContainerModel> containerModels = new HashSet<>();
    for (int i = 0; i < containerCount; i++) {
        containerModels.add(new ContainerModel(containerIds.get(i), taskGroups[i]));
    }
    return Collections.unmodifiableSet(containerModels);
}
Also used : ArrayList(java.util.ArrayList) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) Map(java.util.Map) TaskModel(org.apache.samza.job.model.TaskModel) ContainerModel(org.apache.samza.job.model.ContainerModel) HashSet(java.util.HashSet)

Aggregations

ContainerModel (org.apache.samza.job.model.ContainerModel)96 TaskModel (org.apache.samza.job.model.TaskModel)68 TaskName (org.apache.samza.container.TaskName)60 Test (org.junit.Test)57 HashMap (java.util.HashMap)53 JobModel (org.apache.samza.job.model.JobModel)37 MapConfig (org.apache.samza.config.MapConfig)30 Config (org.apache.samza.config.Config)28 Partition (org.apache.samza.Partition)24 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)22 StorageConfig (org.apache.samza.config.StorageConfig)19 Map (java.util.Map)18 JobConfig (org.apache.samza.config.JobConfig)18 TaskConfig (org.apache.samza.config.TaskConfig)18 HashSet (java.util.HashSet)16 ArrayList (java.util.ArrayList)14 ClusterManagerConfig (org.apache.samza.config.ClusterManagerConfig)12 LocationId (org.apache.samza.runtime.LocationId)12 Collectors (java.util.stream.Collectors)10 SystemStream (org.apache.samza.system.SystemStream)10