use of org.apache.samza.job.model.TaskModel in project samza by apache.
the class GroupByContainerCount method buildContainerModels.
/**
* Translates the list of TaskGroup instances to a set of ContainerModel instances, using the
* set of TaskModel instances.
*
* @param tasks the TaskModels to assign to the ContainerModels.
* @param containerTasks the TaskGroups defining how the tasks should be grouped.
* @return a mutable set of ContainerModels.
*/
private Set<ContainerModel> buildContainerModels(Set<TaskModel> tasks, List<TaskGroup> containerTasks) {
// Map task names to models
Map<String, TaskModel> taskNameToModel = new HashMap<>();
for (TaskModel model : tasks) {
taskNameToModel.put(model.getTaskName().getTaskName(), model);
}
// Build container models
Set<ContainerModel> containerModels = new HashSet<>();
for (TaskGroup container : containerTasks) {
Map<TaskName, TaskModel> containerTaskModels = new HashMap<>();
for (String taskName : container.taskNames) {
TaskModel model = taskNameToModel.get(taskName);
containerTaskModels.put(model.getTaskName(), model);
}
containerModels.add(new ContainerModel(container.containerId, Integer.valueOf(container.containerId), containerTaskModels));
}
return Collections.unmodifiableSet(containerModels);
}
use of org.apache.samza.job.model.TaskModel in project samza by apache.
the class GroupByContainerIds method group.
public Set<ContainerModel> group(Set<TaskModel> tasks, List<String> containersIds) {
if (tasks.isEmpty())
throw new IllegalArgumentException("cannot group an empty set. containersIds=" + Arrays.toString(containersIds.toArray()));
if (containersIds.size() > tasks.size())
throw new IllegalArgumentException("number of containers " + containersIds.size() + " is bigger than number of tasks " + tasks.size());
if (containersIds == null)
return this.group(tasks);
int containerCount = containersIds.size();
// Sort tasks by taskName.
List<TaskModel> sortedTasks = new ArrayList<>(tasks);
Collections.sort(sortedTasks);
// Map every task to a container in round-robin fashion.
Map<TaskName, TaskModel>[] taskGroups = new Map[containerCount];
for (int i = 0; i < containerCount; i++) {
taskGroups[i] = new HashMap<>();
}
for (int i = 0; i < sortedTasks.size(); i++) {
TaskModel tm = sortedTasks.get(i);
taskGroups[i % containerCount].put(tm.getTaskName(), tm);
}
// Convert to a Set of ContainerModel
Set<ContainerModel> containerModels = new HashSet<>();
for (int i = 0; i < containerCount; i++) {
// containerId in ContainerModel constructor is set to -1 because processorId can be any string and does
// not have an integer equivalent. So, we set it to -1. After 0.13, this parameter will be removed.
containerModels.add(new ContainerModel(containersIds.get(i), -1, taskGroups[i]));
}
return Collections.unmodifiableSet(containerModels);
}
use of org.apache.samza.job.model.TaskModel in project samza by apache.
the class SamzaObjectMapper method getObjectMapper.
/**
* @return Returns a new ObjectMapper that's been configured to (de)serialize
* Samza's job data model, and simple data types such as TaskName,
* Partition, Config, and SystemStreamPartition.
*/
public static ObjectMapper getObjectMapper() {
ObjectMapper mapper = new ObjectMapper();
SimpleModule module = new SimpleModule("SamzaModule", new Version(1, 0, 0, ""));
// Setup custom serdes for simple data types.
module.addSerializer(Partition.class, new PartitionSerializer());
module.addSerializer(SystemStreamPartition.class, new SystemStreamPartitionSerializer());
module.addKeySerializer(SystemStreamPartition.class, new SystemStreamPartitionKeySerializer());
module.addSerializer(TaskName.class, new TaskNameSerializer());
module.addDeserializer(Partition.class, new PartitionDeserializer());
module.addDeserializer(SystemStreamPartition.class, new SystemStreamPartitionDeserializer());
module.addKeyDeserializer(SystemStreamPartition.class, new SystemStreamPartitionKeyDeserializer());
module.addDeserializer(Config.class, new ConfigDeserializer());
// Setup mixins for data models.
mapper.getSerializationConfig().addMixInAnnotations(TaskModel.class, JsonTaskModelMixIn.class);
mapper.getDeserializationConfig().addMixInAnnotations(TaskModel.class, JsonTaskModelMixIn.class);
mapper.getSerializationConfig().addMixInAnnotations(ContainerModel.class, JsonContainerModelMixIn.class);
mapper.getSerializationConfig().addMixInAnnotations(JobModel.class, JsonJobModelMixIn.class);
mapper.getDeserializationConfig().addMixInAnnotations(JobModel.class, JsonJobModelMixIn.class);
module.addDeserializer(ContainerModel.class, new JsonDeserializer<ContainerModel>() {
@Override
public ContainerModel deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException, JsonProcessingException {
ObjectCodec oc = jp.getCodec();
JsonNode node = oc.readTree(jp);
int containerId = node.get("container-id").getIntValue();
if (node.get("container-id") == null) {
throw new SamzaException("JobModel did not contain a container-id. This can never happen. JobModel corrupt!");
}
String processorId;
if (node.get("processor-id") == null) {
processorId = String.valueOf(containerId);
} else {
processorId = node.get("processor-id").getTextValue();
}
Map<TaskName, TaskModel> tasksMapping = OBJECT_MAPPER.readValue(node.get("tasks"), new TypeReference<Map<TaskName, TaskModel>>() {
});
return new ContainerModel(processorId, containerId, tasksMapping);
}
});
// Convert camel case to hyphenated field names, and register the module.
mapper.setPropertyNamingStrategy(new CamelCaseToDashesStrategy());
mapper.registerModule(module);
return mapper;
}
use of org.apache.samza.job.model.TaskModel in project samza by apache.
the class StorageRecovery method getChangeLogMaxPartitionNumber.
/**
* get the max partition number of the changelog stream
*/
private void getChangeLogMaxPartitionNumber() {
int maxPartitionId = 0;
for (ContainerModel containerModel : containers.values()) {
for (TaskModel taskModel : containerModel.getTasks().values()) {
maxPartitionId = Math.max(maxPartitionId, taskModel.getChangelogPartition().getPartitionId());
}
}
maxPartitionNumber = maxPartitionId + 1;
}
use of org.apache.samza.job.model.TaskModel in project samza by apache.
the class StorageRecovery method getTaskStorageManagers.
/**
* create one TaskStorageManager for each task. Add all of them to the
* List<TaskStorageManager>
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
private void getTaskStorageManagers() {
StreamMetadataCache streamMetadataCache = new StreamMetadataCache(Util.javaMapAsScalaMap(systemAdmins), 5000, SystemClock.instance());
for (ContainerModel containerModel : containers.values()) {
HashMap<String, StorageEngine> taskStores = new HashMap<String, StorageEngine>();
SamzaContainerContext containerContext = new SamzaContainerContext(containerModel.getProcessorId(), jobConfig, containerModel.getTasks().keySet());
for (TaskModel taskModel : containerModel.getTasks().values()) {
HashMap<String, SystemConsumer> storeConsumers = getStoreConsumers();
for (Entry<String, StorageEngineFactory<?, ?>> entry : storageEngineFactories.entrySet()) {
String storeName = entry.getKey();
if (changeLogSystemStreams.containsKey(storeName)) {
SystemStreamPartition changeLogSystemStreamPartition = new SystemStreamPartition(changeLogSystemStreams.get(storeName), taskModel.getChangelogPartition());
File storePartitionDir = TaskStorageManager.getStorePartitionDir(storeBaseDir, storeName, taskModel.getTaskName());
log.info("Got storage engine directory: " + storePartitionDir);
StorageEngine storageEngine = (entry.getValue()).getStorageEngine(storeName, storePartitionDir, (Serde) new ByteSerde(), (Serde) new ByteSerde(), null, new MetricsRegistryMap(), changeLogSystemStreamPartition, containerContext);
taskStores.put(storeName, storageEngine);
}
}
TaskStorageManager taskStorageManager = new TaskStorageManager(taskModel.getTaskName(), Util.javaMapAsScalaMap(taskStores), Util.javaMapAsScalaMap(storeConsumers), Util.javaMapAsScalaMap(changeLogSystemStreams), maxPartitionNumber, streamMetadataCache, storeBaseDir, storeBaseDir, taskModel.getChangelogPartition(), Util.javaMapAsScalaMap(systemAdmins), new StorageConfig(jobConfig).getChangeLogDeleteRetentionsInMs(), new SystemClock());
taskStorageManagers.add(taskStorageManager);
}
}
}
Aggregations