Search in sources :

Example 6 with TaskMode

use of org.apache.samza.job.model.TaskMode in project samza by apache.

the class StorageManagerUtil method getTaskStoreCheckpointDirs.

public List<File> getTaskStoreCheckpointDirs(File storeBaseDir, String storeName, TaskName taskName, TaskMode taskMode) {
    try {
        File storeDir = new File(storeBaseDir, storeName);
        String taskStoreName = getTaskStoreDir(storeBaseDir, storeName, taskName, taskMode).getName();
        if (storeDir.exists()) {
            // new store or no local state
            List<File> checkpointDirs = Files.list(storeDir.toPath()).map(Path::toFile).filter(file -> file.getName().contains(taskStoreName + "-")).collect(Collectors.toList());
            return checkpointDirs;
        } else {
            return Collections.emptyList();
        }
    } catch (IOException e) {
        throw new SamzaException(String.format("Error finding checkpoint dirs for task: %s mode: %s store: %s in dir: %s", taskName, taskMode, storeName, storeBaseDir), e);
    }
}
Also used : Path(java.nio.file.Path) StandbyTaskUtil(org.apache.samza.clustermanager.StandbyTaskUtil) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) StandardCopyOption(java.nio.file.StandardCopyOption) FileUtil(org.apache.samza.util.FileUtil) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) SamzaObjectMapper(org.apache.samza.serializers.model.SamzaObjectMapper) TypeReference(com.fasterxml.jackson.core.type.TypeReference) JsonParseException(com.fasterxml.jackson.core.JsonParseException) Path(java.nio.file.Path) StorageConfig(org.apache.samza.config.StorageConfig) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) ObjectWriter(com.fasterxml.jackson.databind.ObjectWriter) ImmutableMap(com.google.common.collect.ImmutableMap) Files(java.nio.file.Files) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Set(java.util.Set) IOException(java.io.IOException) Clock(org.apache.samza.util.Clock) Collectors(java.util.stream.Collectors) File(java.io.File) SamzaException(org.apache.samza.SamzaException) CheckpointId(org.apache.samza.checkpoint.CheckpointId) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) CheckpointV2Serde(org.apache.samza.serializers.CheckpointV2Serde) SystemAdmin(org.apache.samza.system.SystemAdmin) JsonMappingException(com.fasterxml.jackson.databind.JsonMappingException) Config(org.apache.samza.config.Config) Collections(java.util.Collections) IOException(java.io.IOException) File(java.io.File) SamzaException(org.apache.samza.SamzaException)

Example 7 with TaskMode

use of org.apache.samza.job.model.TaskMode in project samza by apache.

the class ContainerStorageManager method createSideInputProcessors.

// Create sideInput store processors, one per store per task
private Map<TaskName, Map<String, SideInputsProcessor>> createSideInputProcessors(StorageConfig config, ContainerModel containerModel, Map<TaskName, TaskInstanceMetrics> taskInstanceMetrics) {
    Map<TaskName, Map<String, SideInputsProcessor>> sideInputStoresToProcessors = new HashMap<>();
    containerModel.getTasks().forEach((taskName, taskModel) -> {
        sideInputStoresToProcessors.put(taskName, new HashMap<>());
        TaskMode taskMode = taskModel.getTaskMode();
        for (String storeName : this.taskSideInputStoreSSPs.get(taskName).keySet()) {
            SideInputsProcessor sideInputsProcessor;
            Optional<String> sideInputsProcessorSerializedInstance = config.getSideInputsProcessorSerializedInstance(storeName);
            if (sideInputsProcessorSerializedInstance.isPresent()) {
                sideInputsProcessor = SerdeUtils.deserialize("Side Inputs Processor", sideInputsProcessorSerializedInstance.get());
                LOG.info("Using serialized side-inputs-processor for store: {}, task: {}", storeName, taskName);
            } else if (config.getSideInputsProcessorFactory(storeName).isPresent()) {
                String sideInputsProcessorFactoryClassName = config.getSideInputsProcessorFactory(storeName).get();
                SideInputsProcessorFactory sideInputsProcessorFactory = ReflectionUtil.getObj(sideInputsProcessorFactoryClassName, SideInputsProcessorFactory.class);
                sideInputsProcessor = sideInputsProcessorFactory.getSideInputsProcessor(config, taskInstanceMetrics.get(taskName).registry());
                LOG.info("Using side-inputs-processor from factory: {} for store: {}, task: {}", config.getSideInputsProcessorFactory(storeName).get(), storeName, taskName);
            } else {
                // if this is a active-task with a side-input store but no sideinput-processor-factory defined in config, we rely on upstream validations to fail the deploy
                // if this is a standby-task and the store is a non-side-input changelog store
                // we creating identity sideInputProcessor for stores of standbyTasks
                // have to use the right serde because the sideInput stores are created
                Serde keySerde = serdes.get(config.getStorageKeySerde(storeName).orElseThrow(() -> new SamzaException("Could not find storage key serde for store: " + storeName)));
                Serde msgSerde = serdes.get(config.getStorageMsgSerde(storeName).orElseThrow(() -> new SamzaException("Could not find storage msg serde for store: " + storeName)));
                sideInputsProcessor = new SideInputsProcessor() {

                    @Override
                    public Collection<Entry<?, ?>> process(IncomingMessageEnvelope message, KeyValueStore store) {
                        // Ignore message if the key is null
                        if (message.getKey() == null) {
                            return ImmutableList.of();
                        } else {
                            // Skip serde if the message is null
                            return ImmutableList.of(new Entry<>(keySerde.fromBytes((byte[]) message.getKey()), message.getMessage() == null ? null : msgSerde.fromBytes((byte[]) message.getMessage())));
                        }
                    }
                };
                LOG.info("Using identity side-inputs-processor for store: {}, task: {}", storeName, taskName);
            }
            sideInputStoresToProcessors.get(taskName).put(storeName, sideInputsProcessor);
        }
    });
    return sideInputStoresToProcessors;
}
Also used : Serde(org.apache.samza.serializers.Serde) HashMap(java.util.HashMap) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) TaskMode(org.apache.samza.job.model.TaskMode) SamzaException(org.apache.samza.SamzaException) Entry(org.apache.samza.storage.kv.Entry) TaskName(org.apache.samza.container.TaskName) Map(java.util.Map) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) HashMap(java.util.HashMap)

Example 8 with TaskMode

use of org.apache.samza.job.model.TaskMode in project samza by apache.

the class JobModelHelper method getGrouperMetadata.

private GrouperMetadata getGrouperMetadata(Config config, LocalityManager localityManager, TaskAssignmentManager taskAssignmentManager, TaskPartitionAssignmentManager taskPartitionAssignmentManager) {
    Map<String, LocationId> processorLocality = getProcessorLocality(config, localityManager);
    Map<TaskName, TaskMode> taskModes = taskAssignmentManager.readTaskModes();
    Map<TaskName, String> taskNameToProcessorId = new HashMap<>();
    Map<TaskName, LocationId> taskLocality = new HashMap<>();
    // We read the taskAssignment only for ActiveTasks, i.e., tasks that have no task-mode or have an active task mode
    taskAssignmentManager.readTaskAssignment().forEach((taskNameString, containerId) -> {
        TaskName taskName = new TaskName(taskNameString);
        if (isActiveTask(taskName, taskModes)) {
            taskNameToProcessorId.put(taskName, containerId);
            if (processorLocality.containsKey(containerId)) {
                taskLocality.put(taskName, processorLocality.get(containerId));
            }
        }
    });
    Map<SystemStreamPartition, List<String>> sspToTaskMapping = taskPartitionAssignmentManager.readTaskPartitionAssignments();
    Map<TaskName, List<SystemStreamPartition>> taskPartitionAssignments = new HashMap<>();
    // Task to partition assignments is stored as {@see SystemStreamPartition} to list of {@see TaskName} in
    // coordinator stream. This is done due to the 1 MB value size limit in a kafka topic. Conversion to
    // taskName to SystemStreamPartitions is done here to wire-in the data to {@see JobModel}.
    sspToTaskMapping.forEach((systemStreamPartition, taskNames) -> taskNames.forEach(taskNameString -> {
        TaskName taskName = new TaskName(taskNameString);
        if (isActiveTask(taskName, taskModes)) {
            taskPartitionAssignments.putIfAbsent(taskName, new ArrayList<>());
            taskPartitionAssignments.get(taskName).add(systemStreamPartition);
        }
    }));
    return new GrouperMetadataImpl(processorLocality, taskLocality, taskPartitionAssignments, taskNameToProcessorId);
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) TaskPartitionAssignmentManager(org.apache.samza.container.grouper.task.TaskPartitionAssignmentManager) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) HashMap(java.util.HashMap) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) LocationId(org.apache.samza.runtime.LocationId) HashSet(java.util.HashSet) Map(java.util.Map) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) TaskAssignmentManager(org.apache.samza.container.grouper.task.TaskAssignmentManager) JobModel(org.apache.samza.job.model.JobModel) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) LocalityManager(org.apache.samza.container.LocalityManager) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) ContainerModel(org.apache.samza.job.model.ContainerModel) ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) Optional(java.util.Optional) Config(org.apache.samza.config.Config) HashMap(java.util.HashMap) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) LocationId(org.apache.samza.runtime.LocationId) ArrayList(java.util.ArrayList) TaskMode(org.apache.samza.job.model.TaskMode) TaskName(org.apache.samza.container.TaskName) ArrayList(java.util.ArrayList) List(java.util.List) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 9 with TaskMode

use of org.apache.samza.job.model.TaskMode in project samza by apache.

the class TaskAssignmentManager method writeTaskContainerMappings.

/**
 * Method to batch write task container info to {@link MetadataStore}.
 * @param mappings the task and container mappings: (ContainerId, (TaskName, TaskMode))
 */
public void writeTaskContainerMappings(Map<String, Map<String, TaskMode>> mappings) {
    for (String containerId : mappings.keySet()) {
        Map<String, TaskMode> tasks = mappings.get(containerId);
        for (String taskName : tasks.keySet()) {
            TaskMode taskMode = tasks.get(taskName);
            LOG.info("Storing task: {} and container ID: {} into metadata store", taskName, containerId);
            String existingContainerId = taskNameToContainerId.get(taskName);
            if (existingContainerId != null && !existingContainerId.equals(containerId)) {
                LOG.info("Task \"{}\" in mode {} moved from container {} to container {}", new Object[] { taskName, taskMode, existingContainerId, containerId });
            } else {
                LOG.debug("Task \"{}\" in mode {} assigned to container {}", taskName, taskMode, containerId);
            }
            if (containerId == null) {
                taskContainerMappingMetadataStore.delete(taskName);
                taskModeMappingMetadataStore.delete(taskName);
                taskNameToContainerId.remove(taskName);
            } else {
                taskContainerMappingMetadataStore.put(taskName, containerIdSerde.toBytes(containerId));
                taskModeMappingMetadataStore.put(taskName, taskModeSerde.toBytes(taskMode.toString()));
                taskNameToContainerId.put(taskName, containerId);
            }
        }
    }
    taskContainerMappingMetadataStore.flush();
    taskModeMappingMetadataStore.flush();
}
Also used : TaskMode(org.apache.samza.job.model.TaskMode)

Aggregations

TaskMode (org.apache.samza.job.model.TaskMode)9 TaskName (org.apache.samza.container.TaskName)8 HashMap (java.util.HashMap)7 Map (java.util.Map)6 Set (java.util.Set)5 SamzaException (org.apache.samza.SamzaException)5 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)5 Logger (org.slf4j.Logger)5 LoggerFactory (org.slf4j.LoggerFactory)5 File (java.io.File)4 List (java.util.List)4 Optional (java.util.Optional)4 Collectors (java.util.stream.Collectors)4 Config (org.apache.samza.config.Config)4 TaskModel (org.apache.samza.job.model.TaskModel)4 VisibleForTesting (com.google.common.annotations.VisibleForTesting)3 StringUtils (org.apache.commons.lang3.StringUtils)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 ArrayList (java.util.ArrayList)2 Collections (java.util.Collections)2