Search in sources :

Example 26 with TaskModel

use of org.apache.samza.job.model.TaskModel in project samza by apache.

the class GroupByContainerCount method buildContainerModels.

/**
   * Translates the list of TaskGroup instances to a set of ContainerModel instances, using the
   * set of TaskModel instances.
   *
   * @param tasks             the TaskModels to assign to the ContainerModels.
   * @param containerTasks    the TaskGroups defining how the tasks should be grouped.
   * @return                  a mutable set of ContainerModels.
   */
private Set<ContainerModel> buildContainerModels(Set<TaskModel> tasks, List<TaskGroup> containerTasks) {
    // Map task names to models
    Map<String, TaskModel> taskNameToModel = new HashMap<>();
    for (TaskModel model : tasks) {
        taskNameToModel.put(model.getTaskName().getTaskName(), model);
    }
    // Build container models
    Set<ContainerModel> containerModels = new HashSet<>();
    for (TaskGroup container : containerTasks) {
        Map<TaskName, TaskModel> containerTaskModels = new HashMap<>();
        for (String taskName : container.taskNames) {
            TaskModel model = taskNameToModel.get(taskName);
            containerTaskModels.put(model.getTaskName(), model);
        }
        containerModels.add(new ContainerModel(container.containerId, Integer.valueOf(container.containerId), containerTaskModels));
    }
    return Collections.unmodifiableSet(containerModels);
}
Also used : HashMap(java.util.HashMap) TaskName(org.apache.samza.container.TaskName) TaskModel(org.apache.samza.job.model.TaskModel) HashSet(java.util.HashSet) ContainerModel(org.apache.samza.job.model.ContainerModel)

Example 27 with TaskModel

use of org.apache.samza.job.model.TaskModel in project samza by apache.

the class GroupByContainerIds method group.

public Set<ContainerModel> group(Set<TaskModel> tasks, List<String> containersIds) {
    if (tasks.isEmpty())
        throw new IllegalArgumentException("cannot group an empty set. containersIds=" + Arrays.toString(containersIds.toArray()));
    if (containersIds.size() > tasks.size())
        throw new IllegalArgumentException("number of containers " + containersIds.size() + " is bigger than number of tasks " + tasks.size());
    if (containersIds == null)
        return this.group(tasks);
    int containerCount = containersIds.size();
    // Sort tasks by taskName.
    List<TaskModel> sortedTasks = new ArrayList<>(tasks);
    Collections.sort(sortedTasks);
    // Map every task to a container in round-robin fashion.
    Map<TaskName, TaskModel>[] taskGroups = new Map[containerCount];
    for (int i = 0; i < containerCount; i++) {
        taskGroups[i] = new HashMap<>();
    }
    for (int i = 0; i < sortedTasks.size(); i++) {
        TaskModel tm = sortedTasks.get(i);
        taskGroups[i % containerCount].put(tm.getTaskName(), tm);
    }
    // Convert to a Set of ContainerModel
    Set<ContainerModel> containerModels = new HashSet<>();
    for (int i = 0; i < containerCount; i++) {
        // containerId in ContainerModel constructor is set to -1 because processorId can be any string and does
        // not have an integer equivalent. So, we set it to -1. After 0.13, this parameter will be removed.
        containerModels.add(new ContainerModel(containersIds.get(i), -1, taskGroups[i]));
    }
    return Collections.unmodifiableSet(containerModels);
}
Also used : ArrayList(java.util.ArrayList) Map(java.util.Map) HashMap(java.util.HashMap) TaskModel(org.apache.samza.job.model.TaskModel) HashSet(java.util.HashSet) ContainerModel(org.apache.samza.job.model.ContainerModel)

Example 28 with TaskModel

use of org.apache.samza.job.model.TaskModel in project samza by apache.

the class SamzaObjectMapper method getObjectMapper.

/**
   * @return Returns a new ObjectMapper that's been configured to (de)serialize
   *         Samza's job data model, and simple data types such as TaskName,
   *         Partition, Config, and SystemStreamPartition.
   */
public static ObjectMapper getObjectMapper() {
    ObjectMapper mapper = new ObjectMapper();
    SimpleModule module = new SimpleModule("SamzaModule", new Version(1, 0, 0, ""));
    // Setup custom serdes for simple data types.
    module.addSerializer(Partition.class, new PartitionSerializer());
    module.addSerializer(SystemStreamPartition.class, new SystemStreamPartitionSerializer());
    module.addKeySerializer(SystemStreamPartition.class, new SystemStreamPartitionKeySerializer());
    module.addSerializer(TaskName.class, new TaskNameSerializer());
    module.addDeserializer(Partition.class, new PartitionDeserializer());
    module.addDeserializer(SystemStreamPartition.class, new SystemStreamPartitionDeserializer());
    module.addKeyDeserializer(SystemStreamPartition.class, new SystemStreamPartitionKeyDeserializer());
    module.addDeserializer(Config.class, new ConfigDeserializer());
    // Setup mixins for data models.
    mapper.getSerializationConfig().addMixInAnnotations(TaskModel.class, JsonTaskModelMixIn.class);
    mapper.getDeserializationConfig().addMixInAnnotations(TaskModel.class, JsonTaskModelMixIn.class);
    mapper.getSerializationConfig().addMixInAnnotations(ContainerModel.class, JsonContainerModelMixIn.class);
    mapper.getSerializationConfig().addMixInAnnotations(JobModel.class, JsonJobModelMixIn.class);
    mapper.getDeserializationConfig().addMixInAnnotations(JobModel.class, JsonJobModelMixIn.class);
    module.addDeserializer(ContainerModel.class, new JsonDeserializer<ContainerModel>() {

        @Override
        public ContainerModel deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException, JsonProcessingException {
            ObjectCodec oc = jp.getCodec();
            JsonNode node = oc.readTree(jp);
            int containerId = node.get("container-id").getIntValue();
            if (node.get("container-id") == null) {
                throw new SamzaException("JobModel did not contain a container-id. This can never happen. JobModel corrupt!");
            }
            String processorId;
            if (node.get("processor-id") == null) {
                processorId = String.valueOf(containerId);
            } else {
                processorId = node.get("processor-id").getTextValue();
            }
            Map<TaskName, TaskModel> tasksMapping = OBJECT_MAPPER.readValue(node.get("tasks"), new TypeReference<Map<TaskName, TaskModel>>() {
            });
            return new ContainerModel(processorId, containerId, tasksMapping);
        }
    });
    // Convert camel case to hyphenated field names, and register the module.
    mapper.setPropertyNamingStrategy(new CamelCaseToDashesStrategy());
    mapper.registerModule(module);
    return mapper;
}
Also used : JsonNode(org.codehaus.jackson.JsonNode) ObjectCodec(org.codehaus.jackson.ObjectCodec) SamzaException(org.apache.samza.SamzaException) ContainerModel(org.apache.samza.job.model.ContainerModel) Version(org.codehaus.jackson.Version) DeserializationContext(org.codehaus.jackson.map.DeserializationContext) TypeReference(org.codehaus.jackson.type.TypeReference) JsonProcessingException(org.codehaus.jackson.JsonProcessingException) ObjectMapper(org.codehaus.jackson.map.ObjectMapper) JsonParser(org.codehaus.jackson.JsonParser) IOException(java.io.IOException) TaskName(org.apache.samza.container.TaskName) HashMap(java.util.HashMap) Map(java.util.Map) SimpleModule(org.codehaus.jackson.map.module.SimpleModule) TaskModel(org.apache.samza.job.model.TaskModel)

Example 29 with TaskModel

use of org.apache.samza.job.model.TaskModel in project samza by apache.

the class StorageRecovery method getChangeLogMaxPartitionNumber.

/**
   * get the max partition number of the changelog stream
   */
private void getChangeLogMaxPartitionNumber() {
    int maxPartitionId = 0;
    for (ContainerModel containerModel : containers.values()) {
        for (TaskModel taskModel : containerModel.getTasks().values()) {
            maxPartitionId = Math.max(maxPartitionId, taskModel.getChangelogPartition().getPartitionId());
        }
    }
    maxPartitionNumber = maxPartitionId + 1;
}
Also used : TaskModel(org.apache.samza.job.model.TaskModel) ContainerModel(org.apache.samza.job.model.ContainerModel)

Example 30 with TaskModel

use of org.apache.samza.job.model.TaskModel in project samza by apache.

the class StorageRecovery method getTaskStorageManagers.

/**
   * create one TaskStorageManager for each task. Add all of them to the
   * List<TaskStorageManager>
   */
@SuppressWarnings({ "unchecked", "rawtypes" })
private void getTaskStorageManagers() {
    StreamMetadataCache streamMetadataCache = new StreamMetadataCache(Util.javaMapAsScalaMap(systemAdmins), 5000, SystemClock.instance());
    for (ContainerModel containerModel : containers.values()) {
        HashMap<String, StorageEngine> taskStores = new HashMap<String, StorageEngine>();
        SamzaContainerContext containerContext = new SamzaContainerContext(containerModel.getProcessorId(), jobConfig, containerModel.getTasks().keySet());
        for (TaskModel taskModel : containerModel.getTasks().values()) {
            HashMap<String, SystemConsumer> storeConsumers = getStoreConsumers();
            for (Entry<String, StorageEngineFactory<?, ?>> entry : storageEngineFactories.entrySet()) {
                String storeName = entry.getKey();
                if (changeLogSystemStreams.containsKey(storeName)) {
                    SystemStreamPartition changeLogSystemStreamPartition = new SystemStreamPartition(changeLogSystemStreams.get(storeName), taskModel.getChangelogPartition());
                    File storePartitionDir = TaskStorageManager.getStorePartitionDir(storeBaseDir, storeName, taskModel.getTaskName());
                    log.info("Got storage engine directory: " + storePartitionDir);
                    StorageEngine storageEngine = (entry.getValue()).getStorageEngine(storeName, storePartitionDir, (Serde) new ByteSerde(), (Serde) new ByteSerde(), null, new MetricsRegistryMap(), changeLogSystemStreamPartition, containerContext);
                    taskStores.put(storeName, storageEngine);
                }
            }
            TaskStorageManager taskStorageManager = new TaskStorageManager(taskModel.getTaskName(), Util.javaMapAsScalaMap(taskStores), Util.javaMapAsScalaMap(storeConsumers), Util.javaMapAsScalaMap(changeLogSystemStreams), maxPartitionNumber, streamMetadataCache, storeBaseDir, storeBaseDir, taskModel.getChangelogPartition(), Util.javaMapAsScalaMap(systemAdmins), new StorageConfig(jobConfig).getChangeLogDeleteRetentionsInMs(), new SystemClock());
            taskStorageManagers.add(taskStorageManager);
        }
    }
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) SamzaContainerContext(org.apache.samza.container.SamzaContainerContext) SystemConsumer(org.apache.samza.system.SystemConsumer) SystemClock(org.apache.samza.util.SystemClock) HashMap(java.util.HashMap) StorageConfig(org.apache.samza.config.StorageConfig) JavaStorageConfig(org.apache.samza.config.JavaStorageConfig) ContainerModel(org.apache.samza.job.model.ContainerModel) ByteSerde(org.apache.samza.serializers.ByteSerde) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) File(java.io.File) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Aggregations

ContainerModel (org.apache.samza.job.model.ContainerModel)30 TaskModel (org.apache.samza.job.model.TaskModel)30 HashMap (java.util.HashMap)26 ContainerMocks.getTaskModel (org.apache.samza.container.mock.ContainerMocks.getTaskModel)17 Test (org.junit.Test)17 Mockito.anyString (org.mockito.Mockito.anyString)14 TaskName (org.apache.samza.container.TaskName)8 JobModel (org.apache.samza.job.model.JobModel)6 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 Map (java.util.Map)4 JobModelManager (org.apache.samza.coordinator.JobModelManager)3 MapConfig (org.apache.samza.config.MapConfig)2 StorageConfig (org.apache.samza.config.StorageConfig)2 LocalityManager (org.apache.samza.container.LocalityManager)2 HttpServer (org.apache.samza.coordinator.server.HttpServer)2 MetricsRegistryMap (org.apache.samza.metrics.MetricsRegistryMap)2 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)2 DefaultServlet (org.eclipse.jetty.servlet.DefaultServlet)2 ServletHolder (org.eclipse.jetty.servlet.ServletHolder)2