use of org.apache.samza.job.model.ContainerModel in project samza by apache.
the class JobModelHelper method updateTaskAssignments.
/**
* This method does the following:
* 1. Deletes the existing task assignments if the partition-task grouping has changed from the previous run of the job.
* 2. Saves the newly generated task assignments to the storage layer through the {@param TaskAssignementManager}.
*
* @param jobModel represents the {@see JobModel} of the samza job.
* @param taskAssignmentManager required to persist the processor to task assignments to the metadata store.
* @param taskPartitionAssignmentManager required to persist the task to partition assignments to the metadata store.
* @param grouperMetadata provides the historical metadata of the samza application.
*/
private void updateTaskAssignments(JobModel jobModel, TaskAssignmentManager taskAssignmentManager, TaskPartitionAssignmentManager taskPartitionAssignmentManager, GrouperMetadata grouperMetadata) {
LOG.info("Storing the task assignments into metadata store.");
Set<String> activeTaskNames = new HashSet<>();
Set<String> standbyTaskNames = new HashSet<>();
Set<SystemStreamPartition> systemStreamPartitions = new HashSet<>();
for (ContainerModel containerModel : jobModel.getContainers().values()) {
for (TaskModel taskModel : containerModel.getTasks().values()) {
if (TaskMode.Active.equals(taskModel.getTaskMode())) {
activeTaskNames.add(taskModel.getTaskName().getTaskName());
}
if (TaskMode.Standby.equals(taskModel.getTaskMode())) {
standbyTaskNames.add(taskModel.getTaskName().getTaskName());
}
systemStreamPartitions.addAll(taskModel.getSystemStreamPartitions());
}
}
Map<TaskName, String> previousTaskToContainerId = grouperMetadata.getPreviousTaskToProcessorAssignment();
if (activeTaskNames.size() != previousTaskToContainerId.size()) {
LOG.warn(String.format("Current task count %s does not match saved task count %s. Stateful jobs may observe misalignment of keys!", activeTaskNames.size(), previousTaskToContainerId.size()));
// If the tasks changed, then the partition-task grouping is also likely changed and we can't handle that
// without a much more complicated mapping. Further, the partition count may have changed, which means
// input message keys are likely reshuffled w.r.t. partitions, so the local state may not contain necessary
// data associated with the incoming keys. Warn the user and default to grouper
// In this scenario the tasks may have been reduced, so we need to delete all the existing messages
taskAssignmentManager.deleteTaskContainerMappings(previousTaskToContainerId.keySet().stream().map(TaskName::getTaskName).collect(Collectors.toList()));
taskPartitionAssignmentManager.delete(systemStreamPartitions);
}
// if the set of standby tasks has changed, e.g., when the replication-factor changed, or the active-tasks-set has
// changed, we log a warning and delete the existing mapping for these tasks
Set<String> previousStandbyTasks = taskAssignmentManager.readTaskModes().entrySet().stream().filter(taskNameToTaskModeEntry -> TaskMode.Standby.equals(taskNameToTaskModeEntry.getValue())).map(taskNameToTaskModeEntry -> taskNameToTaskModeEntry.getKey().getTaskName()).collect(Collectors.toSet());
if (!standbyTaskNames.equals(previousStandbyTasks)) {
LOG.info(String.format("The set of standby tasks has changed, current standby tasks %s, previous standby tasks %s", standbyTaskNames, previousStandbyTasks));
taskAssignmentManager.deleteTaskContainerMappings(previousStandbyTasks);
}
// Task to partition assignments is stored as {@see SystemStreamPartition} to list of {@see TaskName} in
// coordinator stream. This is done due to the 1 MB value size limit in a kafka topic.
Map<SystemStreamPartition, List<String>> sspToTaskNameMap = new HashMap<>();
Map<String, Map<String, TaskMode>> taskContainerMappings = new HashMap<>();
for (ContainerModel containerModel : jobModel.getContainers().values()) {
containerModel.getTasks().forEach((taskName, taskModel) -> {
taskContainerMappings.putIfAbsent(containerModel.getId(), new HashMap<>());
taskContainerMappings.get(containerModel.getId()).put(taskName.getTaskName(), taskModel.getTaskMode());
taskModel.getSystemStreamPartitions().forEach(systemStreamPartition -> {
sspToTaskNameMap.putIfAbsent(systemStreamPartition, new ArrayList<>());
sspToTaskNameMap.get(systemStreamPartition).add(taskName.getTaskName());
});
});
}
taskAssignmentManager.writeTaskContainerMappings(taskContainerMappings);
taskPartitionAssignmentManager.writeTaskPartitionAssignments(sspToTaskNameMap);
}
use of org.apache.samza.job.model.ContainerModel in project samza by apache.
the class TaskNameGrouperProxy method generateStandbyTasks.
/**
* Generate a container model map with standby tasks added and grouped into buddy containers.
* Package-private for testing.
*
* @param containerModels The initial container model map.
* @param replicationFactor The desired replication factor, if the replication-factor is n, we add n-1 standby tasks for each active task.
* @return The generated map of containerModels with added containers, and the initial regular containers
*/
Set<ContainerModel> generateStandbyTasks(Set<ContainerModel> containerModels, int replicationFactor) {
LOG.info("Received current containerModel map : {}, replicationFactor : {}", containerModels, replicationFactor);
Set<ContainerModel> buddyContainers = new HashSet<>();
for (ContainerModel activeContainer : containerModels) {
for (int replicaNum = 0; replicaNum < replicationFactor - 1; replicaNum++) {
String buddyContainerId = StandbyTaskUtil.getStandbyContainerId(activeContainer.getId(), replicaNum);
ContainerModel buddyContainerModel = new ContainerModel(buddyContainerId, getTaskModelForBuddyContainer(activeContainer.getTasks(), replicaNum));
buddyContainers.add(buddyContainerModel);
}
}
LOG.info("Adding buddy containers : {}", buddyContainers);
buddyContainers.addAll(containerModels);
return buddyContainers;
}
use of org.apache.samza.job.model.ContainerModel in project samza by apache.
the class TestContainerAllocator method getJobModelReader.
private static JobModelManager getJobModelReader(int containerCount) {
//Ideally, the JobModelReader should be constructed independent of HttpServer.
//That way it becomes easier to mock objects. Save it for later.
HttpServer server = new MockHttpServer("/", 7777, null, new ServletHolder(DefaultServlet.class));
Map<String, ContainerModel> containers = new java.util.HashMap<>();
for (int i = 0; i < containerCount; i++) {
ContainerModel container = new ContainerModel(String.valueOf(i), i, new HashMap<TaskName, TaskModel>());
containers.put(String.valueOf(i), container);
}
JobModel jobModel = new JobModel(getConfig(), containers);
return new JobModelManager(jobModel, server, null);
}
use of org.apache.samza.job.model.ContainerModel in project samza by apache.
the class TestContainerProcessManager method getCoordinator.
private JobModelManager getCoordinator(int containerCount) {
Map<String, ContainerModel> containers = new java.util.HashMap<>();
for (int i = 0; i < containerCount; i++) {
ContainerModel container = new ContainerModel(String.valueOf(i), i, new HashMap<TaskName, TaskModel>());
containers.put(String.valueOf(i), container);
}
Map<String, Map<String, String>> localityMap = new HashMap<>();
localityMap.put("0", new HashMap<String, String>() {
{
put(SetContainerHostMapping.HOST_KEY, "abc");
}
});
LocalityManager mockLocalityManager = mock(LocalityManager.class);
when(mockLocalityManager.readContainerLocality()).thenReturn(localityMap);
JobModel jobModel = new JobModel(getConfig(), containers, mockLocalityManager);
JobModelManager.jobModelRef().getAndSet(jobModel);
return new JobModelManager(jobModel, this.server, null);
}
use of org.apache.samza.job.model.ContainerModel in project samza by apache.
the class TestGroupByContainerCount method testBalancerNewContainerCountOne.
@Test
public void testBalancerNewContainerCountOne() {
Set<TaskModel> taskModels = generateTaskModels(3);
Set<ContainerModel> prevContainers = new GroupByContainerCount(3).group(taskModels);
Map<String, String> prevTaskToContainerMapping = generateTaskContainerMapping(prevContainers);
when(taskAssignmentManager.readTaskAssignment()).thenReturn(prevTaskToContainerMapping);
Set<ContainerModel> containers = new GroupByContainerCount(1).balance(taskModels, localityManager);
// Results should be the same as calling group
Map<String, ContainerModel> containersMap = new HashMap<>();
for (ContainerModel container : containers) {
containersMap.put(container.getProcessorId(), container);
}
assertEquals(1, containers.size());
ContainerModel container0 = containersMap.get("0");
assertNotNull(container0);
assertEquals("0", container0.getProcessorId());
assertEquals(3, container0.getTasks().size());
assertTrue(container0.getTasks().containsKey(getTaskName(0)));
assertTrue(container0.getTasks().containsKey(getTaskName(1)));
assertTrue(container0.getTasks().containsKey(getTaskName(2)));
verify(taskAssignmentManager).writeTaskContainerMapping(getTaskName(0).getTaskName(), "0");
verify(taskAssignmentManager).writeTaskContainerMapping(getTaskName(1).getTaskName(), "0");
verify(taskAssignmentManager).writeTaskContainerMapping(getTaskName(2).getTaskName(), "0");
verify(taskAssignmentManager, never()).deleteTaskContainerMappings(anyCollection());
}
Aggregations