use of org.apache.samza.job.model.TaskModel in project samza by apache.
the class TestGroupByContainerCount method testGroupTaskCountIncrease.
@Test
public void testGroupTaskCountIncrease() {
int taskCount = 3;
Set<TaskModel> taskModels = generateTaskModels(taskCount);
Set<ContainerModel> prevContainers = new GroupByContainerCount(2).group(generateTaskModels(taskCount - 1));
Map<String, String> prevTaskToContainerMapping = generateTaskContainerMapping(prevContainers);
when(taskAssignmentManager.readTaskAssignment()).thenReturn(prevTaskToContainerMapping);
Set<ContainerModel> containers = new GroupByContainerCount(1).balance(taskModels, localityManager);
// Results should be the same as calling group
Map<String, ContainerModel> containersMap = new HashMap<>();
for (ContainerModel container : containers) {
containersMap.put(container.getProcessorId(), container);
}
assertEquals(1, containers.size());
ContainerModel container0 = containersMap.get("0");
assertNotNull(container0);
assertEquals("0", container0.getProcessorId());
assertEquals(3, container0.getTasks().size());
assertTrue(container0.getTasks().containsKey(getTaskName(0)));
assertTrue(container0.getTasks().containsKey(getTaskName(1)));
assertTrue(container0.getTasks().containsKey(getTaskName(2)));
verify(taskAssignmentManager).writeTaskContainerMapping(getTaskName(0).getTaskName(), "0");
verify(taskAssignmentManager).writeTaskContainerMapping(getTaskName(1).getTaskName(), "0");
verify(taskAssignmentManager).writeTaskContainerMapping(getTaskName(2).getTaskName(), "0");
verify(taskAssignmentManager).deleteTaskContainerMappings(anyCollection());
}
use of org.apache.samza.job.model.TaskModel in project samza by apache.
the class TestGroupByContainerCount method testGroupManyTasks.
@Test
public void testGroupManyTasks() {
Set<TaskModel> taskModels = generateTaskModels(21);
Set<ContainerModel> containers = new GroupByContainerCount(2).group(taskModels);
Map<String, ContainerModel> containersMap = new HashMap<>();
for (ContainerModel container : containers) {
containersMap.put(container.getProcessorId(), container);
}
assertEquals(2, containers.size());
ContainerModel container0 = containersMap.get("0");
ContainerModel container1 = containersMap.get("1");
assertNotNull(container0);
assertNotNull(container1);
assertEquals("0", container0.getProcessorId());
assertEquals("1", container1.getProcessorId());
assertEquals(11, container0.getTasks().size());
assertEquals(10, container1.getTasks().size());
// NOTE: tasks are sorted lexicographically, so the container assignment
// can seem odd, but the consistency is the key focus
assertTrue(container0.getTasks().containsKey(getTaskName(0)));
assertTrue(container0.getTasks().containsKey(getTaskName(10)));
assertTrue(container0.getTasks().containsKey(getTaskName(12)));
assertTrue(container0.getTasks().containsKey(getTaskName(14)));
assertTrue(container0.getTasks().containsKey(getTaskName(16)));
assertTrue(container0.getTasks().containsKey(getTaskName(18)));
assertTrue(container0.getTasks().containsKey(getTaskName(2)));
assertTrue(container0.getTasks().containsKey(getTaskName(3)));
assertTrue(container0.getTasks().containsKey(getTaskName(5)));
assertTrue(container0.getTasks().containsKey(getTaskName(7)));
assertTrue(container0.getTasks().containsKey(getTaskName(9)));
assertTrue(container1.getTasks().containsKey(getTaskName(1)));
assertTrue(container1.getTasks().containsKey(getTaskName(11)));
assertTrue(container1.getTasks().containsKey(getTaskName(13)));
assertTrue(container1.getTasks().containsKey(getTaskName(15)));
assertTrue(container1.getTasks().containsKey(getTaskName(17)));
assertTrue(container1.getTasks().containsKey(getTaskName(19)));
assertTrue(container1.getTasks().containsKey(getTaskName(20)));
assertTrue(container1.getTasks().containsKey(getTaskName(4)));
assertTrue(container1.getTasks().containsKey(getTaskName(6)));
assertTrue(container1.getTasks().containsKey(getTaskName(8)));
}
use of org.apache.samza.job.model.TaskModel in project samza by apache.
the class StorageRecovery method getChangeLogMaxPartitionNumber.
/**
* get the max partition number of the changelog stream
*/
private void getChangeLogMaxPartitionNumber() {
int maxPartitionId = 0;
for (ContainerModel containerModel : containers.values()) {
for (TaskModel taskModel : containerModel.getTasks().values()) {
maxPartitionId = Math.max(maxPartitionId, taskModel.getChangelogPartition().getPartitionId());
}
}
maxPartitionNumber = maxPartitionId + 1;
}
use of org.apache.samza.job.model.TaskModel in project samza by apache.
the class StorageRecovery method getTaskStorageManagers.
/**
* create one TaskStorageManager for each task. Add all of them to the
* List<TaskStorageManager>
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
private void getTaskStorageManagers() {
StreamMetadataCache streamMetadataCache = new StreamMetadataCache(Util.javaMapAsScalaMap(systemAdmins), 5000, SystemClock.instance());
for (ContainerModel containerModel : containers.values()) {
HashMap<String, StorageEngine> taskStores = new HashMap<String, StorageEngine>();
SamzaContainerContext containerContext = new SamzaContainerContext(containerModel.getProcessorId(), jobConfig, containerModel.getTasks().keySet());
for (TaskModel taskModel : containerModel.getTasks().values()) {
HashMap<String, SystemConsumer> storeConsumers = getStoreConsumers();
for (Entry<String, StorageEngineFactory<?, ?>> entry : storageEngineFactories.entrySet()) {
String storeName = entry.getKey();
if (changeLogSystemStreams.containsKey(storeName)) {
SystemStreamPartition changeLogSystemStreamPartition = new SystemStreamPartition(changeLogSystemStreams.get(storeName), taskModel.getChangelogPartition());
File storePartitionDir = TaskStorageManager.getStorePartitionDir(storeBaseDir, storeName, taskModel.getTaskName());
log.info("Got storage engine directory: " + storePartitionDir);
StorageEngine storageEngine = (entry.getValue()).getStorageEngine(storeName, storePartitionDir, (Serde) new ByteSerde(), (Serde) new ByteSerde(), null, new MetricsRegistryMap(), changeLogSystemStreamPartition, containerContext);
taskStores.put(storeName, storageEngine);
}
}
TaskStorageManager taskStorageManager = new TaskStorageManager(taskModel.getTaskName(), Util.javaMapAsScalaMap(taskStores), Util.javaMapAsScalaMap(storeConsumers), Util.javaMapAsScalaMap(changeLogSystemStreams), maxPartitionNumber, streamMetadataCache, storeBaseDir, storeBaseDir, taskModel.getChangelogPartition(), Util.javaMapAsScalaMap(systemAdmins), new StorageConfig(jobConfig).getChangeLogDeleteRetentionsInMs(), new SystemClock());
taskStorageManagers.add(taskStorageManager);
}
}
}
use of org.apache.samza.job.model.TaskModel in project samza by apache.
the class SamzaTaskProxy method getTasks.
/**
* Fetches the complete job model from the coordinator stream based upon the provided {@link JobInstance}
* param, transforms it to a list of {@link Task} and returns it.
* {@inheritDoc}
*/
@Override
public List<Task> getTasks(JobInstance jobInstance) throws IOException, InterruptedException {
Preconditions.checkArgument(installFinder.isInstalled(jobInstance), String.format("Invalid job instance : %s", jobInstance));
JobModel jobModel = getJobModel(jobInstance);
StorageConfig storageConfig = new StorageConfig(jobModel.getConfig());
List<String> storeNames = JavaConverters.seqAsJavaListConverter(storageConfig.getStoreNames()).asJava();
Map<String, String> containerLocality = jobModel.getAllContainerLocality();
List<Task> tasks = new ArrayList<>();
for (ContainerModel containerModel : jobModel.getContainers().values()) {
String containerId = containerModel.getProcessorId();
String host = containerLocality.get(containerId);
for (TaskModel taskModel : containerModel.getTasks().values()) {
String taskName = taskModel.getTaskName().getTaskName();
List<Partition> partitions = taskModel.getSystemStreamPartitions().stream().map(Partition::new).collect(Collectors.toList());
tasks.add(new Task(host, taskName, containerId, partitions, storeNames));
}
}
return tasks;
}
Aggregations