use of org.apache.samza.job.model.ContainerModel in project samza by apache.
the class TestGroupByContainerIds method testShouldMinimizeTaskShuffleWhenAvailableProcessorInGroupChanges.
@Test
public void testShouldMinimizeTaskShuffleWhenAvailableProcessorInGroupChanges() {
TaskNameGrouper taskNameGrouper = buildSimpleGrouper(3);
String testProcessorId1 = "testProcessorId1";
String testProcessorId2 = "testProcessorId2";
String testProcessorId3 = "testProcessorId3";
LocationId testLocationId1 = new LocationId("testLocationId1");
LocationId testLocationId2 = new LocationId("testLocationId2");
LocationId testLocationId3 = new LocationId("testLocationId3");
TaskName testTaskName1 = new TaskName("testTasKId1");
TaskName testTaskName2 = new TaskName("testTaskId2");
TaskName testTaskName3 = new TaskName("testTaskId3");
TaskModel testTaskModel1 = new TaskModel(testTaskName1, new HashSet<>(), new Partition(0));
TaskModel testTaskModel2 = new TaskModel(testTaskName2, new HashSet<>(), new Partition(1));
TaskModel testTaskModel3 = new TaskModel(testTaskName3, new HashSet<>(), new Partition(2));
Map<String, LocationId> processorLocality = ImmutableMap.of(testProcessorId1, testLocationId1, testProcessorId2, testLocationId2, testProcessorId3, testLocationId3);
Map<TaskName, LocationId> taskLocality = ImmutableMap.of(testTaskName1, testLocationId1, testTaskName2, testLocationId2, testTaskName3, testLocationId3);
GrouperMetadataImpl grouperMetadata = new GrouperMetadataImpl(processorLocality, taskLocality, new HashMap<>(), new HashMap<>());
Set<TaskModel> taskModels = ImmutableSet.of(testTaskModel1, testTaskModel2, testTaskModel3);
Set<ContainerModel> expectedContainerModels = ImmutableSet.of(new ContainerModel(testProcessorId1, ImmutableMap.of(testTaskName1, testTaskModel1)), new ContainerModel(testProcessorId2, ImmutableMap.of(testTaskName2, testTaskModel2)), new ContainerModel(testProcessorId3, ImmutableMap.of(testTaskName3, testTaskModel3)));
Set<ContainerModel> actualContainerModels = taskNameGrouper.group(taskModels, grouperMetadata);
assertEquals(expectedContainerModels, actualContainerModels);
processorLocality = ImmutableMap.of(testProcessorId1, testLocationId1, testProcessorId2, testLocationId2);
grouperMetadata = new GrouperMetadataImpl(processorLocality, taskLocality, new HashMap<>(), new HashMap<>());
actualContainerModels = taskNameGrouper.group(taskModels, grouperMetadata);
expectedContainerModels = ImmutableSet.of(new ContainerModel(testProcessorId1, ImmutableMap.of(testTaskName1, testTaskModel1, testTaskName3, testTaskModel3)), new ContainerModel(testProcessorId2, ImmutableMap.of(testTaskName2, testTaskModel2)));
assertEquals(expectedContainerModels, actualContainerModels);
}
use of org.apache.samza.job.model.ContainerModel in project samza by apache.
the class TestGroupByContainerCount method testGroupManyTasks.
@Test
public void testGroupManyTasks() {
Set<TaskModel> taskModels = generateTaskModels(21);
Set<ContainerModel> containers = new GroupByContainerCount(2).group(taskModels);
Map<String, ContainerModel> containersMap = new HashMap<>();
for (ContainerModel container : containers) {
containersMap.put(container.getId(), container);
}
assertEquals(2, containers.size());
ContainerModel container0 = containersMap.get("0");
ContainerModel container1 = containersMap.get("1");
assertNotNull(container0);
assertNotNull(container1);
assertEquals("0", container0.getId());
assertEquals("1", container1.getId());
assertEquals(11, container0.getTasks().size());
assertEquals(10, container1.getTasks().size());
// NOTE: tasks are sorted lexicographically, so the container assignment
// can seem odd, but the consistency is the key focus
assertTrue(container0.getTasks().containsKey(getTaskName(0)));
assertTrue(container0.getTasks().containsKey(getTaskName(10)));
assertTrue(container0.getTasks().containsKey(getTaskName(12)));
assertTrue(container0.getTasks().containsKey(getTaskName(14)));
assertTrue(container0.getTasks().containsKey(getTaskName(16)));
assertTrue(container0.getTasks().containsKey(getTaskName(18)));
assertTrue(container0.getTasks().containsKey(getTaskName(2)));
assertTrue(container0.getTasks().containsKey(getTaskName(3)));
assertTrue(container0.getTasks().containsKey(getTaskName(5)));
assertTrue(container0.getTasks().containsKey(getTaskName(7)));
assertTrue(container0.getTasks().containsKey(getTaskName(9)));
assertTrue(container1.getTasks().containsKey(getTaskName(1)));
assertTrue(container1.getTasks().containsKey(getTaskName(11)));
assertTrue(container1.getTasks().containsKey(getTaskName(13)));
assertTrue(container1.getTasks().containsKey(getTaskName(15)));
assertTrue(container1.getTasks().containsKey(getTaskName(17)));
assertTrue(container1.getTasks().containsKey(getTaskName(19)));
assertTrue(container1.getTasks().containsKey(getTaskName(20)));
assertTrue(container1.getTasks().containsKey(getTaskName(4)));
assertTrue(container1.getTasks().containsKey(getTaskName(6)));
assertTrue(container1.getTasks().containsKey(getTaskName(8)));
}
use of org.apache.samza.job.model.ContainerModel in project samza by apache.
the class TestGroupByContainerCount method testBalancerAfterContainerIncrease.
/**
* Before:
* C0 C1
* --------
* T0 T1
* T2 T3
* T4 T5
* T6 T7
* T8
*
* After:
* C0 C1 C2 C3
* ----------------
* T0 T1 T6 T5
* T2 T3 T8 T7
* T4
*
* NOTE for host affinity, it would help to have some additional logic to reassign tasks
* from C0 and C1 to containers that were on the same respective hosts, it wasn't implemented
* because the scenario is infrequent, the benefits are not guaranteed, and the code complexity
* wasn't worth it. It certainly could be implemented in the future.
*/
@Test
public void testBalancerAfterContainerIncrease() {
Set<TaskModel> taskModels = generateTaskModels(9);
Set<ContainerModel> prevContainers = new GroupByContainerCount(2).group(taskModels);
Map<TaskName, String> prevTaskToContainerMapping = generateTaskContainerMapping(prevContainers);
GrouperMetadataImpl grouperMetadata = new GrouperMetadataImpl(new HashMap<>(), new HashMap<>(), new HashMap<>(), prevTaskToContainerMapping);
Set<ContainerModel> containers = new GroupByContainerCount(4).group(taskModels, grouperMetadata);
Map<String, ContainerModel> containersMap = new HashMap<>();
for (ContainerModel container : containers) {
containersMap.put(container.getId(), container);
}
assertEquals(4, containers.size());
ContainerModel container0 = containersMap.get("0");
ContainerModel container1 = containersMap.get("1");
ContainerModel container2 = containersMap.get("2");
ContainerModel container3 = containersMap.get("3");
assertNotNull(container0);
assertNotNull(container1);
assertNotNull(container2);
assertNotNull(container3);
assertEquals("0", container0.getId());
assertEquals("1", container1.getId());
assertEquals(3, container0.getTasks().size());
assertEquals(2, container1.getTasks().size());
assertEquals(2, container2.getTasks().size());
assertEquals(2, container3.getTasks().size());
// Tasks 0-4 should stay on the same original containers
assertTrue(container0.getTasks().containsKey(getTaskName(0)));
assertTrue(container0.getTasks().containsKey(getTaskName(2)));
assertTrue(container0.getTasks().containsKey(getTaskName(4)));
assertTrue(container1.getTasks().containsKey(getTaskName(1)));
assertTrue(container1.getTasks().containsKey(getTaskName(3)));
// Tasks 5-8 should be reassigned to the new containers.
// Consistency is the goal with these reassignments
assertTrue(container2.getTasks().containsKey(getTaskName(8)));
assertTrue(container2.getTasks().containsKey(getTaskName(6)));
assertTrue(container3.getTasks().containsKey(getTaskName(5)));
assertTrue(container3.getTasks().containsKey(getTaskName(7)));
}
use of org.apache.samza.job.model.ContainerModel in project samza by apache.
the class GroupByContainerCount method group.
/**
* {@inheritDoc}
*/
@Override
public Set<ContainerModel> group(Set<TaskModel> tasks) {
validateTasks(tasks);
// Sort tasks by taskName.
List<TaskModel> sortedTasks = new ArrayList<>(tasks);
Collections.sort(sortedTasks);
// Map every task to a container in round-robin fashion.
Map<TaskName, TaskModel>[] taskGroups = new Map[containerCount];
for (int i = 0; i < containerCount; i++) {
taskGroups[i] = new HashMap<>();
}
for (int i = 0; i < sortedTasks.size(); i++) {
TaskModel tm = sortedTasks.get(i);
taskGroups[i % containerCount].put(tm.getTaskName(), tm);
}
// Convert to a Set of ContainerModel
Set<ContainerModel> containerModels = new HashSet<>();
for (int i = 0; i < containerCount; i++) {
containerModels.add(new ContainerModel(String.valueOf(i), taskGroups[i]));
}
return Collections.unmodifiableSet(containerModels);
}
use of org.apache.samza.job.model.ContainerModel in project samza by apache.
the class GroupByContainerIds method group.
/**
* {@inheritDoc}
*
* When number of taskModels are less than number of available containerIds,
* then chooses then selects the lexicographically least `x` containerIds.
*
* Otherwise, assigns the tasks to the available containerIds in a round robin fashion
* preserving the containerId in the final assignment.
*/
@Override
public Set<ContainerModel> group(Set<TaskModel> tasks, List<String> containerIds) {
if (containerIds == null)
return this.group(tasks);
if (containerIds.isEmpty())
throw new IllegalArgumentException("Must have at least one container");
if (tasks.isEmpty())
throw new IllegalArgumentException("cannot group an empty set. containerIds=" + Arrays.toString(containerIds.toArray()));
if (containerIds.size() > tasks.size()) {
LOG.warn("Number of containers: {} is greater than number of tasks: {}.", containerIds.size(), tasks.size());
/**
* Choose lexicographically least `x` containerIds(where x = tasks.size()).
*/
containerIds = containerIds.stream().sorted().limit(tasks.size()).collect(Collectors.toList());
LOG.info("Generating containerModel with containers: {}.", containerIds);
}
int containerCount = containerIds.size();
// Sort tasks by taskName.
List<TaskModel> sortedTasks = new ArrayList<>(tasks);
Collections.sort(sortedTasks);
// Map every task to a container in round-robin fashion.
Map<TaskName, TaskModel>[] taskGroups = new Map[containerCount];
for (int i = 0; i < containerCount; i++) {
taskGroups[i] = new HashMap<>();
}
for (int i = 0; i < sortedTasks.size(); i++) {
TaskModel tm = sortedTasks.get(i);
taskGroups[i % containerCount].put(tm.getTaskName(), tm);
}
// Convert to a Set of ContainerModel
Set<ContainerModel> containerModels = new HashSet<>();
for (int i = 0; i < containerCount; i++) {
containerModels.add(new ContainerModel(containerIds.get(i), taskGroups[i]));
}
return Collections.unmodifiableSet(containerModels);
}
Aggregations