use of org.apache.samza.runtime.LocationId in project samza by apache.
the class JobModelHelper method getGrouperMetadata.
private GrouperMetadata getGrouperMetadata(Config config, LocalityManager localityManager, TaskAssignmentManager taskAssignmentManager, TaskPartitionAssignmentManager taskPartitionAssignmentManager) {
Map<String, LocationId> processorLocality = getProcessorLocality(config, localityManager);
Map<TaskName, TaskMode> taskModes = taskAssignmentManager.readTaskModes();
Map<TaskName, String> taskNameToProcessorId = new HashMap<>();
Map<TaskName, LocationId> taskLocality = new HashMap<>();
// We read the taskAssignment only for ActiveTasks, i.e., tasks that have no task-mode or have an active task mode
taskAssignmentManager.readTaskAssignment().forEach((taskNameString, containerId) -> {
TaskName taskName = new TaskName(taskNameString);
if (isActiveTask(taskName, taskModes)) {
taskNameToProcessorId.put(taskName, containerId);
if (processorLocality.containsKey(containerId)) {
taskLocality.put(taskName, processorLocality.get(containerId));
}
}
});
Map<SystemStreamPartition, List<String>> sspToTaskMapping = taskPartitionAssignmentManager.readTaskPartitionAssignments();
Map<TaskName, List<SystemStreamPartition>> taskPartitionAssignments = new HashMap<>();
// Task to partition assignments is stored as {@see SystemStreamPartition} to list of {@see TaskName} in
// coordinator stream. This is done due to the 1 MB value size limit in a kafka topic. Conversion to
// taskName to SystemStreamPartitions is done here to wire-in the data to {@see JobModel}.
sspToTaskMapping.forEach((systemStreamPartition, taskNames) -> taskNames.forEach(taskNameString -> {
TaskName taskName = new TaskName(taskNameString);
if (isActiveTask(taskName, taskModes)) {
taskPartitionAssignments.putIfAbsent(taskName, new ArrayList<>());
taskPartitionAssignments.get(taskName).add(systemStreamPartition);
}
}));
return new GrouperMetadataImpl(processorLocality, taskLocality, taskPartitionAssignments, taskNameToProcessorId);
}
use of org.apache.samza.runtime.LocationId in project samza by apache.
the class JobModelHelper method getProcessorLocality.
/**
* Retrieves and returns the processor locality of a samza job using provided {@see Config} and {@see LocalityManager}.
* @param config provides the configurations defined by the user. Required to connect to the storage layer.
* @param localityManager provides the processor to host mapping persisted to the metadata store.
* @return the processor locality.
*/
private static Map<String, LocationId> getProcessorLocality(Config config, LocalityManager localityManager) {
Map<String, LocationId> containerToLocationId = new HashMap<>();
Map<String, ProcessorLocality> existingContainerLocality = localityManager.readLocality().getProcessorLocalities();
for (int i = 0; i < new JobConfig(config).getContainerCount(); i++) {
String containerId = Integer.toString(i);
LocationId locationId = Optional.ofNullable(existingContainerLocality.get(containerId)).map(ProcessorLocality::host).filter(StringUtils::isNotEmpty).map(LocationId::new).orElse(new LocationId("ANY_HOST"));
containerToLocationId.put(containerId, locationId);
}
return containerToLocationId;
}
use of org.apache.samza.runtime.LocationId in project samza by apache.
the class TestGroupByContainerIds method testMoreTasksThanProcessors.
@Test
public void testMoreTasksThanProcessors() {
String testProcessorId1 = "testProcessorId1";
String testProcessorId2 = "testProcessorId2";
LocationId testLocationId1 = new LocationId("testLocationId1");
LocationId testLocationId2 = new LocationId("testLocationId2");
LocationId testLocationId3 = new LocationId("testLocationId3");
TaskName testTaskName1 = new TaskName("testTasKId1");
TaskName testTaskName2 = new TaskName("testTaskId2");
TaskName testTaskName3 = new TaskName("testTaskId3");
Map<String, LocationId> processorLocality = ImmutableMap.of(testProcessorId1, testLocationId1, testProcessorId2, testLocationId2);
Map<TaskName, LocationId> taskLocality = ImmutableMap.of(testTaskName1, testLocationId1, testTaskName2, testLocationId2, testTaskName3, testLocationId3);
GrouperMetadataImpl grouperMetadata = new GrouperMetadataImpl(processorLocality, taskLocality, new HashMap<>(), new HashMap<>());
Set<TaskModel> taskModels = generateTaskModels(1);
List<String> containerIds = ImmutableList.of(testProcessorId1, testProcessorId2);
Map<TaskName, TaskModel> expectedTasks = taskModels.stream().collect(Collectors.toMap(TaskModel::getTaskName, x -> x));
ContainerModel expectedContainerModel = new ContainerModel(testProcessorId1, expectedTasks);
Set<ContainerModel> actualContainerModels = buildSimpleGrouper().group(taskModels, grouperMetadata);
assertEquals(1, actualContainerModels.size());
assertEquals(ImmutableSet.of(expectedContainerModel), actualContainerModels);
}
use of org.apache.samza.runtime.LocationId in project samza by apache.
the class TestGroupByContainerIds method testShouldUseTaskLocalityWhenGeneratingContainerModels.
@Test
public void testShouldUseTaskLocalityWhenGeneratingContainerModels() {
TaskNameGrouper taskNameGrouper = buildSimpleGrouper(3);
String testProcessorId1 = "testProcessorId1";
String testProcessorId2 = "testProcessorId2";
String testProcessorId3 = "testProcessorId3";
LocationId testLocationId1 = new LocationId("testLocationId1");
LocationId testLocationId2 = new LocationId("testLocationId2");
LocationId testLocationId3 = new LocationId("testLocationId3");
TaskName testTaskName1 = new TaskName("testTasKId1");
TaskName testTaskName2 = new TaskName("testTaskId2");
TaskName testTaskName3 = new TaskName("testTaskId3");
TaskModel testTaskModel1 = new TaskModel(testTaskName1, new HashSet<>(), new Partition(0));
TaskModel testTaskModel2 = new TaskModel(testTaskName2, new HashSet<>(), new Partition(1));
TaskModel testTaskModel3 = new TaskModel(testTaskName3, new HashSet<>(), new Partition(2));
Map<String, LocationId> processorLocality = ImmutableMap.of(testProcessorId1, testLocationId1, testProcessorId2, testLocationId2, testProcessorId3, testLocationId3);
Map<TaskName, LocationId> taskLocality = ImmutableMap.of(testTaskName1, testLocationId1, testTaskName2, testLocationId2, testTaskName3, testLocationId3);
GrouperMetadataImpl grouperMetadata = new GrouperMetadataImpl(processorLocality, taskLocality, new HashMap<>(), new HashMap<>());
Set<TaskModel> taskModels = ImmutableSet.of(testTaskModel1, testTaskModel2, testTaskModel3);
Set<ContainerModel> expectedContainerModels = ImmutableSet.of(new ContainerModel(testProcessorId1, ImmutableMap.of(testTaskName1, testTaskModel1)), new ContainerModel(testProcessorId2, ImmutableMap.of(testTaskName2, testTaskModel2)), new ContainerModel(testProcessorId3, ImmutableMap.of(testTaskName3, testTaskModel3)));
Set<ContainerModel> actualContainerModels = taskNameGrouper.group(taskModels, grouperMetadata);
assertEquals(expectedContainerModels, actualContainerModels);
}
use of org.apache.samza.runtime.LocationId in project samza by apache.
the class TestGroupByContainerIds method testShouldMinimizeTaskShuffleWhenAvailableProcessorInGroupChanges.
@Test
public void testShouldMinimizeTaskShuffleWhenAvailableProcessorInGroupChanges() {
TaskNameGrouper taskNameGrouper = buildSimpleGrouper(3);
String testProcessorId1 = "testProcessorId1";
String testProcessorId2 = "testProcessorId2";
String testProcessorId3 = "testProcessorId3";
LocationId testLocationId1 = new LocationId("testLocationId1");
LocationId testLocationId2 = new LocationId("testLocationId2");
LocationId testLocationId3 = new LocationId("testLocationId3");
TaskName testTaskName1 = new TaskName("testTasKId1");
TaskName testTaskName2 = new TaskName("testTaskId2");
TaskName testTaskName3 = new TaskName("testTaskId3");
TaskModel testTaskModel1 = new TaskModel(testTaskName1, new HashSet<>(), new Partition(0));
TaskModel testTaskModel2 = new TaskModel(testTaskName2, new HashSet<>(), new Partition(1));
TaskModel testTaskModel3 = new TaskModel(testTaskName3, new HashSet<>(), new Partition(2));
Map<String, LocationId> processorLocality = ImmutableMap.of(testProcessorId1, testLocationId1, testProcessorId2, testLocationId2, testProcessorId3, testLocationId3);
Map<TaskName, LocationId> taskLocality = ImmutableMap.of(testTaskName1, testLocationId1, testTaskName2, testLocationId2, testTaskName3, testLocationId3);
GrouperMetadataImpl grouperMetadata = new GrouperMetadataImpl(processorLocality, taskLocality, new HashMap<>(), new HashMap<>());
Set<TaskModel> taskModels = ImmutableSet.of(testTaskModel1, testTaskModel2, testTaskModel3);
Set<ContainerModel> expectedContainerModels = ImmutableSet.of(new ContainerModel(testProcessorId1, ImmutableMap.of(testTaskName1, testTaskModel1)), new ContainerModel(testProcessorId2, ImmutableMap.of(testTaskName2, testTaskModel2)), new ContainerModel(testProcessorId3, ImmutableMap.of(testTaskName3, testTaskModel3)));
Set<ContainerModel> actualContainerModels = taskNameGrouper.group(taskModels, grouperMetadata);
assertEquals(expectedContainerModels, actualContainerModels);
processorLocality = ImmutableMap.of(testProcessorId1, testLocationId1, testProcessorId2, testLocationId2);
grouperMetadata = new GrouperMetadataImpl(processorLocality, taskLocality, new HashMap<>(), new HashMap<>());
actualContainerModels = taskNameGrouper.group(taskModels, grouperMetadata);
expectedContainerModels = ImmutableSet.of(new ContainerModel(testProcessorId1, ImmutableMap.of(testTaskName1, testTaskModel1, testTaskName3, testTaskModel3)), new ContainerModel(testProcessorId2, ImmutableMap.of(testTaskName2, testTaskModel2)));
assertEquals(expectedContainerModels, actualContainerModels);
}
Aggregations