Search in sources :

Example 11 with LocationId

use of org.apache.samza.runtime.LocationId in project samza by apache.

the class JobModelHelper method getGrouperMetadata.

private GrouperMetadata getGrouperMetadata(Config config, LocalityManager localityManager, TaskAssignmentManager taskAssignmentManager, TaskPartitionAssignmentManager taskPartitionAssignmentManager) {
    Map<String, LocationId> processorLocality = getProcessorLocality(config, localityManager);
    Map<TaskName, TaskMode> taskModes = taskAssignmentManager.readTaskModes();
    Map<TaskName, String> taskNameToProcessorId = new HashMap<>();
    Map<TaskName, LocationId> taskLocality = new HashMap<>();
    // We read the taskAssignment only for ActiveTasks, i.e., tasks that have no task-mode or have an active task mode
    taskAssignmentManager.readTaskAssignment().forEach((taskNameString, containerId) -> {
        TaskName taskName = new TaskName(taskNameString);
        if (isActiveTask(taskName, taskModes)) {
            taskNameToProcessorId.put(taskName, containerId);
            if (processorLocality.containsKey(containerId)) {
                taskLocality.put(taskName, processorLocality.get(containerId));
            }
        }
    });
    Map<SystemStreamPartition, List<String>> sspToTaskMapping = taskPartitionAssignmentManager.readTaskPartitionAssignments();
    Map<TaskName, List<SystemStreamPartition>> taskPartitionAssignments = new HashMap<>();
    // Task to partition assignments is stored as {@see SystemStreamPartition} to list of {@see TaskName} in
    // coordinator stream. This is done due to the 1 MB value size limit in a kafka topic. Conversion to
    // taskName to SystemStreamPartitions is done here to wire-in the data to {@see JobModel}.
    sspToTaskMapping.forEach((systemStreamPartition, taskNames) -> taskNames.forEach(taskNameString -> {
        TaskName taskName = new TaskName(taskNameString);
        if (isActiveTask(taskName, taskModes)) {
            taskPartitionAssignments.putIfAbsent(taskName, new ArrayList<>());
            taskPartitionAssignments.get(taskName).add(systemStreamPartition);
        }
    }));
    return new GrouperMetadataImpl(processorLocality, taskLocality, taskPartitionAssignments, taskNameToProcessorId);
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) TaskPartitionAssignmentManager(org.apache.samza.container.grouper.task.TaskPartitionAssignmentManager) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) HashMap(java.util.HashMap) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) LocationId(org.apache.samza.runtime.LocationId) HashSet(java.util.HashSet) Map(java.util.Map) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) TaskAssignmentManager(org.apache.samza.container.grouper.task.TaskAssignmentManager) JobModel(org.apache.samza.job.model.JobModel) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) LocalityManager(org.apache.samza.container.LocalityManager) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) ContainerModel(org.apache.samza.job.model.ContainerModel) ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) Optional(java.util.Optional) Config(org.apache.samza.config.Config) HashMap(java.util.HashMap) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) LocationId(org.apache.samza.runtime.LocationId) ArrayList(java.util.ArrayList) TaskMode(org.apache.samza.job.model.TaskMode) TaskName(org.apache.samza.container.TaskName) ArrayList(java.util.ArrayList) List(java.util.List) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 12 with LocationId

use of org.apache.samza.runtime.LocationId in project samza by apache.

the class JobModelHelper method getProcessorLocality.

/**
 * Retrieves and returns the processor locality of a samza job using provided {@see Config} and {@see LocalityManager}.
 * @param config provides the configurations defined by the user. Required to connect to the storage layer.
 * @param localityManager provides the processor to host mapping persisted to the metadata store.
 * @return the processor locality.
 */
private static Map<String, LocationId> getProcessorLocality(Config config, LocalityManager localityManager) {
    Map<String, LocationId> containerToLocationId = new HashMap<>();
    Map<String, ProcessorLocality> existingContainerLocality = localityManager.readLocality().getProcessorLocalities();
    for (int i = 0; i < new JobConfig(config).getContainerCount(); i++) {
        String containerId = Integer.toString(i);
        LocationId locationId = Optional.ofNullable(existingContainerLocality.get(containerId)).map(ProcessorLocality::host).filter(StringUtils::isNotEmpty).map(LocationId::new).orElse(new LocationId("ANY_HOST"));
        containerToLocationId.put(containerId, locationId);
    }
    return containerToLocationId;
}
Also used : ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) HashMap(java.util.HashMap) StringUtils(org.apache.commons.lang3.StringUtils) LocationId(org.apache.samza.runtime.LocationId) JobConfig(org.apache.samza.config.JobConfig)

Example 13 with LocationId

use of org.apache.samza.runtime.LocationId in project samza by apache.

the class TestGroupByContainerIds method testMoreTasksThanProcessors.

@Test
public void testMoreTasksThanProcessors() {
    String testProcessorId1 = "testProcessorId1";
    String testProcessorId2 = "testProcessorId2";
    LocationId testLocationId1 = new LocationId("testLocationId1");
    LocationId testLocationId2 = new LocationId("testLocationId2");
    LocationId testLocationId3 = new LocationId("testLocationId3");
    TaskName testTaskName1 = new TaskName("testTasKId1");
    TaskName testTaskName2 = new TaskName("testTaskId2");
    TaskName testTaskName3 = new TaskName("testTaskId3");
    Map<String, LocationId> processorLocality = ImmutableMap.of(testProcessorId1, testLocationId1, testProcessorId2, testLocationId2);
    Map<TaskName, LocationId> taskLocality = ImmutableMap.of(testTaskName1, testLocationId1, testTaskName2, testLocationId2, testTaskName3, testLocationId3);
    GrouperMetadataImpl grouperMetadata = new GrouperMetadataImpl(processorLocality, taskLocality, new HashMap<>(), new HashMap<>());
    Set<TaskModel> taskModels = generateTaskModels(1);
    List<String> containerIds = ImmutableList.of(testProcessorId1, testProcessorId2);
    Map<TaskName, TaskModel> expectedTasks = taskModels.stream().collect(Collectors.toMap(TaskModel::getTaskName, x -> x));
    ContainerModel expectedContainerModel = new ContainerModel(testProcessorId1, expectedTasks);
    Set<ContainerModel> actualContainerModels = buildSimpleGrouper().group(taskModels, grouperMetadata);
    assertEquals(1, actualContainerModels.size());
    assertEquals(ImmutableSet.of(expectedContainerModel), actualContainerModels);
}
Also used : ImmutableSet(com.google.common.collect.ImmutableSet) TaskName(org.apache.samza.container.TaskName) ImmutableMap(com.google.common.collect.ImmutableMap) Assert.assertNotNull(org.junit.Assert.assertNotNull) Partition(org.apache.samza.Partition) Set(java.util.Set) Assert.assertTrue(org.junit.Assert.assertTrue) HashMap(java.util.HashMap) TaskModel(org.apache.samza.job.model.TaskModel) Test(org.junit.Test) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) LocationId(org.apache.samza.runtime.LocationId) HashSet(java.util.HashSet) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) ContainerModel(org.apache.samza.job.model.ContainerModel) ContainerMocks.generateTaskModels(org.apache.samza.container.mock.ContainerMocks.generateTaskModels) Map(java.util.Map) ContainerMocks.getTaskName(org.apache.samza.container.mock.ContainerMocks.getTaskName) Config(org.apache.samza.config.Config) Collections(java.util.Collections) MapConfig(org.apache.samza.config.MapConfig) Assert.assertEquals(org.junit.Assert.assertEquals) LocationId(org.apache.samza.runtime.LocationId) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskName(org.apache.samza.container.TaskName) ContainerMocks.getTaskName(org.apache.samza.container.mock.ContainerMocks.getTaskName) TaskModel(org.apache.samza.job.model.TaskModel) Test(org.junit.Test)

Example 14 with LocationId

use of org.apache.samza.runtime.LocationId in project samza by apache.

the class TestGroupByContainerIds method testShouldUseTaskLocalityWhenGeneratingContainerModels.

@Test
public void testShouldUseTaskLocalityWhenGeneratingContainerModels() {
    TaskNameGrouper taskNameGrouper = buildSimpleGrouper(3);
    String testProcessorId1 = "testProcessorId1";
    String testProcessorId2 = "testProcessorId2";
    String testProcessorId3 = "testProcessorId3";
    LocationId testLocationId1 = new LocationId("testLocationId1");
    LocationId testLocationId2 = new LocationId("testLocationId2");
    LocationId testLocationId3 = new LocationId("testLocationId3");
    TaskName testTaskName1 = new TaskName("testTasKId1");
    TaskName testTaskName2 = new TaskName("testTaskId2");
    TaskName testTaskName3 = new TaskName("testTaskId3");
    TaskModel testTaskModel1 = new TaskModel(testTaskName1, new HashSet<>(), new Partition(0));
    TaskModel testTaskModel2 = new TaskModel(testTaskName2, new HashSet<>(), new Partition(1));
    TaskModel testTaskModel3 = new TaskModel(testTaskName3, new HashSet<>(), new Partition(2));
    Map<String, LocationId> processorLocality = ImmutableMap.of(testProcessorId1, testLocationId1, testProcessorId2, testLocationId2, testProcessorId3, testLocationId3);
    Map<TaskName, LocationId> taskLocality = ImmutableMap.of(testTaskName1, testLocationId1, testTaskName2, testLocationId2, testTaskName3, testLocationId3);
    GrouperMetadataImpl grouperMetadata = new GrouperMetadataImpl(processorLocality, taskLocality, new HashMap<>(), new HashMap<>());
    Set<TaskModel> taskModels = ImmutableSet.of(testTaskModel1, testTaskModel2, testTaskModel3);
    Set<ContainerModel> expectedContainerModels = ImmutableSet.of(new ContainerModel(testProcessorId1, ImmutableMap.of(testTaskName1, testTaskModel1)), new ContainerModel(testProcessorId2, ImmutableMap.of(testTaskName2, testTaskModel2)), new ContainerModel(testProcessorId3, ImmutableMap.of(testTaskName3, testTaskModel3)));
    Set<ContainerModel> actualContainerModels = taskNameGrouper.group(taskModels, grouperMetadata);
    assertEquals(expectedContainerModels, actualContainerModels);
}
Also used : Partition(org.apache.samza.Partition) LocationId(org.apache.samza.runtime.LocationId) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskName(org.apache.samza.container.TaskName) ContainerMocks.getTaskName(org.apache.samza.container.mock.ContainerMocks.getTaskName) TaskModel(org.apache.samza.job.model.TaskModel) Test(org.junit.Test)

Example 15 with LocationId

use of org.apache.samza.runtime.LocationId in project samza by apache.

the class TestGroupByContainerIds method testShouldMinimizeTaskShuffleWhenAvailableProcessorInGroupChanges.

@Test
public void testShouldMinimizeTaskShuffleWhenAvailableProcessorInGroupChanges() {
    TaskNameGrouper taskNameGrouper = buildSimpleGrouper(3);
    String testProcessorId1 = "testProcessorId1";
    String testProcessorId2 = "testProcessorId2";
    String testProcessorId3 = "testProcessorId3";
    LocationId testLocationId1 = new LocationId("testLocationId1");
    LocationId testLocationId2 = new LocationId("testLocationId2");
    LocationId testLocationId3 = new LocationId("testLocationId3");
    TaskName testTaskName1 = new TaskName("testTasKId1");
    TaskName testTaskName2 = new TaskName("testTaskId2");
    TaskName testTaskName3 = new TaskName("testTaskId3");
    TaskModel testTaskModel1 = new TaskModel(testTaskName1, new HashSet<>(), new Partition(0));
    TaskModel testTaskModel2 = new TaskModel(testTaskName2, new HashSet<>(), new Partition(1));
    TaskModel testTaskModel3 = new TaskModel(testTaskName3, new HashSet<>(), new Partition(2));
    Map<String, LocationId> processorLocality = ImmutableMap.of(testProcessorId1, testLocationId1, testProcessorId2, testLocationId2, testProcessorId3, testLocationId3);
    Map<TaskName, LocationId> taskLocality = ImmutableMap.of(testTaskName1, testLocationId1, testTaskName2, testLocationId2, testTaskName3, testLocationId3);
    GrouperMetadataImpl grouperMetadata = new GrouperMetadataImpl(processorLocality, taskLocality, new HashMap<>(), new HashMap<>());
    Set<TaskModel> taskModels = ImmutableSet.of(testTaskModel1, testTaskModel2, testTaskModel3);
    Set<ContainerModel> expectedContainerModels = ImmutableSet.of(new ContainerModel(testProcessorId1, ImmutableMap.of(testTaskName1, testTaskModel1)), new ContainerModel(testProcessorId2, ImmutableMap.of(testTaskName2, testTaskModel2)), new ContainerModel(testProcessorId3, ImmutableMap.of(testTaskName3, testTaskModel3)));
    Set<ContainerModel> actualContainerModels = taskNameGrouper.group(taskModels, grouperMetadata);
    assertEquals(expectedContainerModels, actualContainerModels);
    processorLocality = ImmutableMap.of(testProcessorId1, testLocationId1, testProcessorId2, testLocationId2);
    grouperMetadata = new GrouperMetadataImpl(processorLocality, taskLocality, new HashMap<>(), new HashMap<>());
    actualContainerModels = taskNameGrouper.group(taskModels, grouperMetadata);
    expectedContainerModels = ImmutableSet.of(new ContainerModel(testProcessorId1, ImmutableMap.of(testTaskName1, testTaskModel1, testTaskName3, testTaskModel3)), new ContainerModel(testProcessorId2, ImmutableMap.of(testTaskName2, testTaskModel2)));
    assertEquals(expectedContainerModels, actualContainerModels);
}
Also used : Partition(org.apache.samza.Partition) HashMap(java.util.HashMap) LocationId(org.apache.samza.runtime.LocationId) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskName(org.apache.samza.container.TaskName) ContainerMocks.getTaskName(org.apache.samza.container.mock.ContainerMocks.getTaskName) TaskModel(org.apache.samza.job.model.TaskModel) Test(org.junit.Test)

Aggregations

LocationId (org.apache.samza.runtime.LocationId)16 TaskName (org.apache.samza.container.TaskName)13 ContainerModel (org.apache.samza.job.model.ContainerModel)11 Test (org.junit.Test)11 TaskModel (org.apache.samza.job.model.TaskModel)9 HashMap (java.util.HashMap)7 Partition (org.apache.samza.Partition)6 ContainerMocks.getTaskName (org.apache.samza.container.mock.ContainerMocks.getTaskName)6 ArrayList (java.util.ArrayList)5 List (java.util.List)4 ProcessorLocality (org.apache.samza.job.model.ProcessorLocality)4 HashSet (java.util.HashSet)3 Map (java.util.Map)3 Set (java.util.Set)3 Collectors (java.util.stream.Collectors)3 Collections (java.util.Collections)2 Optional (java.util.Optional)2 StringUtils (org.apache.commons.lang3.StringUtils)2 Config (org.apache.samza.config.Config)2 JobConfig (org.apache.samza.config.JobConfig)2