use of org.apache.samza.container.grouper.task.GrouperMetadata in project samza by apache.
the class ZkJobCoordinator method generateNewJobModel.
/**
* Generate new JobModel when becoming a leader or the list of processor changed.
*/
@VisibleForTesting
JobModel generateNewJobModel(List<ProcessorNode> processorNodes) {
String zkJobModelVersion = zkUtils.getJobModelVersion();
// If JobModel exists in zookeeper && cached JobModel version is unequal to JobModel version stored in zookeeper.
if (zkJobModelVersion != null && !Objects.equals(cachedJobModelVersion, zkJobModelVersion)) {
JobModel jobModel = readJobModelFromMetadataStore(zkJobModelVersion);
for (ContainerModel containerModel : jobModel.getContainers().values()) {
containerModel.getTasks().forEach((taskName, taskModel) -> changeLogPartitionMap.put(taskName, taskModel.getChangelogPartition().getPartitionId()));
}
cachedJobModelVersion = zkJobModelVersion;
}
GrouperMetadata grouperMetadata = getGrouperMetadata(zkJobModelVersion, processorNodes);
JobModel model = JobModelCalculator.INSTANCE.calculateJobModel(config, changeLogPartitionMap, streamMetadataCache, grouperMetadata);
return new JobModel(new MapConfig(), model.getContainers());
}
use of org.apache.samza.container.grouper.task.GrouperMetadata in project samza by apache.
the class JobModelHelper method newJobModel.
public JobModel newJobModel(Config config, Map<TaskName, Integer> changelogPartitionMapping) {
GrouperMetadata grouperMetadata = getGrouperMetadata(config, this.localityManager, this.taskAssignmentManager, this.taskPartitionAssignmentManager);
JobModel jobModel = this.jobModelCalculator.calculateJobModel(config, changelogPartitionMapping, this.streamMetadataCache, grouperMetadata);
updateTaskAssignments(jobModel, this.taskAssignmentManager, this.taskPartitionAssignmentManager, grouperMetadata);
return jobModel;
}
use of org.apache.samza.container.grouper.task.GrouperMetadata in project samza by apache.
the class JobModelHelper method getGrouperMetadata.
private GrouperMetadata getGrouperMetadata(Config config, LocalityManager localityManager, TaskAssignmentManager taskAssignmentManager, TaskPartitionAssignmentManager taskPartitionAssignmentManager) {
Map<String, LocationId> processorLocality = getProcessorLocality(config, localityManager);
Map<TaskName, TaskMode> taskModes = taskAssignmentManager.readTaskModes();
Map<TaskName, String> taskNameToProcessorId = new HashMap<>();
Map<TaskName, LocationId> taskLocality = new HashMap<>();
// We read the taskAssignment only for ActiveTasks, i.e., tasks that have no task-mode or have an active task mode
taskAssignmentManager.readTaskAssignment().forEach((taskNameString, containerId) -> {
TaskName taskName = new TaskName(taskNameString);
if (isActiveTask(taskName, taskModes)) {
taskNameToProcessorId.put(taskName, containerId);
if (processorLocality.containsKey(containerId)) {
taskLocality.put(taskName, processorLocality.get(containerId));
}
}
});
Map<SystemStreamPartition, List<String>> sspToTaskMapping = taskPartitionAssignmentManager.readTaskPartitionAssignments();
Map<TaskName, List<SystemStreamPartition>> taskPartitionAssignments = new HashMap<>();
// Task to partition assignments is stored as {@see SystemStreamPartition} to list of {@see TaskName} in
// coordinator stream. This is done due to the 1 MB value size limit in a kafka topic. Conversion to
// taskName to SystemStreamPartitions is done here to wire-in the data to {@see JobModel}.
sspToTaskMapping.forEach((systemStreamPartition, taskNames) -> taskNames.forEach(taskNameString -> {
TaskName taskName = new TaskName(taskNameString);
if (isActiveTask(taskName, taskModes)) {
taskPartitionAssignments.putIfAbsent(taskName, new ArrayList<>());
taskPartitionAssignments.get(taskName).add(systemStreamPartition);
}
}));
return new GrouperMetadataImpl(processorLocality, taskLocality, taskPartitionAssignments, taskNameToProcessorId);
}
use of org.apache.samza.container.grouper.task.GrouperMetadata in project samza by apache.
the class TestGroupBySystemStreamPartitionWithGrouperProxy method testRemovalOfPreviousStreamsAndThenAddNewStream.
@Test
public void testRemovalOfPreviousStreamsAndThenAddNewStream() {
Map<TaskName, List<SystemStreamPartition>> prevGroupingWithMultipleStreams = ImmutableMap.<TaskName, List<SystemStreamPartition>>builder().put(new TaskName("SystemStreamPartition [kafka, PVE, 0]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 1]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 2]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 3]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, URE, 0]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, URE, 1]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, URE, 2]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, URE, 3]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)))).build();
Set<SystemStreamPartition> currSsps = IntStream.range(0, 8).mapToObj(partitionId -> new SystemStreamPartition("kafka", "BOB", new Partition(partitionId))).collect(Collectors.toSet());
Map<TaskName, Set<SystemStreamPartition>> expectedGroupingForStatefulAndStateless = ImmutableMap.<TaskName, Set<SystemStreamPartition>>builder().put(new TaskName("SystemStreamPartition [kafka, BOB, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 5]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(5)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 4]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(4)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 7]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(7)))).put(new TaskName("SystemStreamPartition [kafka, BOB, 6]"), ImmutableSet.of(new SystemStreamPartition("kafka", "BOB", new Partition(6)))).build();
// SSPGrouperProxy for stateful job
SSPGrouperProxy groupBySystemStreamPartition = buildSspGrouperProxy(true);
GrouperMetadata grouperMetadata = new GrouperMetadataImpl(new HashMap<>(), new HashMap<>(), prevGroupingWithMultipleStreams, new HashMap<>());
Map<TaskName, Set<SystemStreamPartition>> finalGrouping = groupBySystemStreamPartition.group(currSsps, grouperMetadata);
Assert.assertEquals(expectedGroupingForStatefulAndStateless, finalGrouping);
// SSPGrouperProxy for stateless job
groupBySystemStreamPartition = buildSspGrouperProxy(false);
finalGrouping = groupBySystemStreamPartition.group(currSsps, grouperMetadata);
Assert.assertEquals(expectedGroupingForStatefulAndStateless, finalGrouping);
}
use of org.apache.samza.container.grouper.task.GrouperMetadata in project samza by apache.
the class TestGroupBySystemStreamPartitionWithGrouperProxy method testSingleStreamRepartitioning.
@Test
public void testSingleStreamRepartitioning() {
Map<TaskName, List<SystemStreamPartition>> prevGroupingWithSingleStream = ImmutableMap.<TaskName, List<SystemStreamPartition>>builder().put(new TaskName("SystemStreamPartition [kafka, PVE, 0]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 1]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 2]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 3]"), ImmutableList.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)))).build();
Set<SystemStreamPartition> currSsps = IntStream.range(0, 8).mapToObj(partitionId -> new SystemStreamPartition("kafka", "PVE", new Partition(partitionId))).collect(Collectors.toSet());
Map<TaskName, Set<SystemStreamPartition>> expectedGroupingForStateful = ImmutableMap.<TaskName, Set<SystemStreamPartition>>builder().put(new TaskName("SystemStreamPartition [kafka, PVE, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)), new SystemStreamPartition("kafka", "PVE", new Partition(5)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)), new SystemStreamPartition("kafka", "PVE", new Partition(4)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(7)), new SystemStreamPartition("kafka", "PVE", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)), new SystemStreamPartition("kafka", "PVE", new Partition(6)))).build();
Map<TaskName, Set<SystemStreamPartition>> expectedGroupingForStateless = ImmutableMap.<TaskName, Set<SystemStreamPartition>>builder().put(new TaskName("SystemStreamPartition [kafka, PVE, 1]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(1)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 0]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(0)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 3]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(3)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 2]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(2)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 5]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(5)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 4]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(4)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 7]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(7)))).put(new TaskName("SystemStreamPartition [kafka, PVE, 6]"), ImmutableSet.of(new SystemStreamPartition("kafka", "PVE", new Partition(6)))).build();
// SSPGrouperProxy for stateful job
SSPGrouperProxy groupBySystemStreamPartition = buildSspGrouperProxy(true);
GrouperMetadata grouperMetadata = new GrouperMetadataImpl(new HashMap<>(), new HashMap<>(), prevGroupingWithSingleStream, new HashMap<>());
Map<TaskName, Set<SystemStreamPartition>> finalGrouping = groupBySystemStreamPartition.group(currSsps, grouperMetadata);
Assert.assertEquals(expectedGroupingForStateful, finalGrouping);
// SSPGrouperProxy for stateless job
groupBySystemStreamPartition = buildSspGrouperProxy(false);
finalGrouping = groupBySystemStreamPartition.group(currSsps, grouperMetadata);
Assert.assertEquals(expectedGroupingForStateless, finalGrouping);
}
Aggregations