Search in sources :

Example 1 with TaskMode

use of org.apache.samza.job.model.TaskMode in project samza by apache.

the class TransactionalStateTaskRestoreManager method getStoreActions.

/**
 * Marks each persistent but non-logged store for deletion.
 *
 * For each logged store, based on the current, checkpointed and local changelog offsets,
 * 1. decides which directories (current and checkpoints) to delete for persistent stores.
 * 2. decides which directories (checkpoints) to retain for persistent stores.
 * 3. decides which stores (persistent or not) need to be restored, and the beginning and end offsets for the restore.
 *
 * When this method returns, in StoreActions,
 * 1. all persistent store current directories will be present in storeDirsToDelete
 * 2. each persistent store checkpoint directory will be present in either storeDirToRetain or storeDirsToDelete.
 * 3. there will be at most one storeDirToRetain per persistent store, which will be a checkpoint directory.
 * 4. any stores (persistent or not) that need to be restored from changelogs will be present in
 *    storesToRestore with appropriate offsets.
 */
@VisibleForTesting
static StoreActions getStoreActions(TaskModel taskModel, Map<String, StorageEngine> storeEngines, Map<String, SystemStream> storeChangelogs, Map<String, KafkaStateCheckpointMarker> kafkaStateCheckpointMarkers, CheckpointId checkpointId, Map<SystemStreamPartition, SystemStreamPartitionMetadata> currentChangelogOffsets, SystemAdmins systemAdmins, StorageManagerUtil storageManagerUtil, File loggedStoreBaseDirectory, File nonLoggedStoreBaseDirectory, Config config, Clock clock) {
    TaskName taskName = taskModel.getTaskName();
    TaskMode taskMode = taskModel.getTaskMode();
    Map<String, File> storeDirToRetain = new HashMap<>();
    ListMultimap<String, File> storeDirsToDelete = ArrayListMultimap.create();
    Map<String, RestoreOffsets> storesToRestore = new HashMap<>();
    storeEngines.forEach((storeName, storageEngine) -> {
        // do nothing if store is non persistent and not logged (e.g. in memory cache only)
        if (!storageEngine.getStoreProperties().isPersistedToDisk() && !storageEngine.getStoreProperties().isLoggedStore()) {
            return;
        }
        // persistent but non-logged stores are always deleted
        if (storageEngine.getStoreProperties().isPersistedToDisk() && !storageEngine.getStoreProperties().isLoggedStore()) {
            File currentDir = storageManagerUtil.getTaskStoreDir(nonLoggedStoreBaseDirectory, storeName, taskName, taskMode);
            LOG.info("Marking current directory: {} for store: {} in task: {} for deletion since it is not a logged store.", currentDir, storeName, taskName);
            storeDirsToDelete.put(storeName, currentDir);
            // persistent but non-logged stores should not have checkpoint dirs
            return;
        }
        // get the oldest and newest current changelog SSP offsets as well as the checkpointed changelog SSP offset
        SystemStream changelog = storeChangelogs.get(storeName);
        SystemStreamPartition changelogSSP = new SystemStreamPartition(changelog, taskModel.getChangelogPartition());
        SystemAdmin admin = systemAdmins.getSystemAdmin(changelogSSP.getSystem());
        SystemStreamPartitionMetadata changelogSSPMetadata = currentChangelogOffsets.get(changelogSSP);
        String oldestOffset = changelogSSPMetadata.getOldestOffset();
        String newestOffset = changelogSSPMetadata.getNewestOffset();
        // can be null if no message, or message has null offset
        String checkpointedOffset = null;
        if (kafkaStateCheckpointMarkers.containsKey(storeName) && StringUtils.isNotBlank(kafkaStateCheckpointMarkers.get(storeName).getChangelogOffset())) {
            checkpointedOffset = kafkaStateCheckpointMarkers.get(storeName).getChangelogOffset();
        }
        long timeSinceLastCheckpointInMs = checkpointId == null ? Long.MAX_VALUE : System.currentTimeMillis() - checkpointId.getMillis();
        // if the clean.store.start config is set, delete current and checkpoint dirs, restore from oldest offset to checkpointed
        if (storageEngine.getStoreProperties().isPersistedToDisk() && new StorageConfig(config).cleanLoggedStoreDirsOnStart(storeName)) {
            File currentDir = storageManagerUtil.getTaskStoreDir(loggedStoreBaseDirectory, storeName, taskName, taskMode);
            LOG.info("Marking current directory: {} for store: {} in task: {} for deletion due to clean.on.container.start config.", currentDir, storeName, taskName);
            storeDirsToDelete.put(storeName, currentDir);
            storageManagerUtil.getTaskStoreCheckpointDirs(loggedStoreBaseDirectory, storeName, taskName, taskMode).forEach(checkpointDir -> {
                LOG.info("Marking checkpoint directory: {} for store: {} in task: {} for deletion due to clean.on.container.start config.", checkpointDir, storeName, taskName);
                storeDirsToDelete.put(storeName, checkpointDir);
            });
            LOG.info("Marking restore offsets for store: {} in task: {} to {}, {} ", storeName, taskName, oldestOffset, checkpointedOffset);
            storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, checkpointedOffset));
            return;
        }
        Optional<File> currentDirOptional;
        Optional<List<File>> checkpointDirsOptional;
        if (!storageEngine.getStoreProperties().isPersistedToDisk()) {
            currentDirOptional = Optional.empty();
            checkpointDirsOptional = Optional.empty();
        } else {
            currentDirOptional = Optional.of(storageManagerUtil.getTaskStoreDir(loggedStoreBaseDirectory, storeName, taskName, taskMode));
            checkpointDirsOptional = Optional.of(storageManagerUtil.getTaskStoreCheckpointDirs(loggedStoreBaseDirectory, storeName, taskName, taskMode));
        }
        LOG.info("For store: {} in task: {} got current dir: {}, checkpoint dirs: {}, checkpointed changelog offset: {}", storeName, taskName, currentDirOptional, checkpointDirsOptional, checkpointedOffset);
        currentDirOptional.ifPresent(currentDir -> {
            LOG.info("Marking current directory: {} for store: {} in task: {} for deletion.", currentDir, storeName, taskName);
            storeDirsToDelete.put(storeName, currentDir);
        });
        if (checkpointedOffset == null && oldestOffset != null) {
            // this can mean that either this is the initial migration for this feature and there are no previously
            // checkpointed changelog offsets, or that this is a new store or changelog topic after the initial migration.
            // if this is the first time migration, it might be desirable to retain existing data.
            // if this is new store or topic, it is possible that the container previously died after writing some data to
            // the changelog but before a commit, so it is desirable to delete the store, not restore anything and
            // trim the changelog
            // since we can't tell the difference b/w the two scenarios by just looking at the store and changelogs,
            // we'll request users to indicate whether to retain existing data using a config flag. this flag should only
            // be set during migrations, and turned off after the first successful commit of the new container (i.e. next
            // deploy). for simplicity, we'll always delete the local store, and restore from changelog if necessary.
            // the former scenario should not be common. the recommended way to opt-in to the transactional state feature
            // is to first upgrade to the latest samza version but keep the transactional state restore config off.
            // this will create the store checkpoint directories and write the changelog offset to the checkpoint, but
            // will not use them during restore. once this is done (i.e. at least one commit after upgrade), the
            // transactional state restore feature can be turned on on subsequent deploys. this code path exists as a
            // fail-safe against clearing changelogs in case users do not follow upgrade instructions and enable the
            // feature directly.
            checkpointDirsOptional.ifPresent(checkpointDirs -> checkpointDirs.forEach(checkpointDir -> {
                LOG.info("Marking checkpoint directory: {} for store: {} in task: {} for deletion since checkpointed " + "offset is null and oldest offset: {} is not.", checkpointDir, storeName, taskName, oldestOffset);
                storeDirsToDelete.put(storeName, checkpointDir);
            }));
            if (new TaskConfig(config).getTransactionalStateRetainExistingState()) {
                // mark for restore from (oldest, newest) to recreate local state.
                LOG.warn("Checkpointed offset for store: {} in task: {} is null. Since retain existing state is true, " + "local state will be fully restored from current changelog contents. " + "There is no transactional local state guarantee.", storeName, taskName);
                storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, newestOffset));
            } else {
                LOG.warn("Checkpointed offset for store: {} in task: {} is null. Since retain existing state is false, " + "any local state and changelog topic contents will be deleted.", storeName, taskName);
                // mark for restore from (oldest, null) to trim entire changelog.
                storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, null));
            }
        } else if (// check if the checkpointed offset is out of range of current oldest and newest offsets
        admin.offsetComparator(oldestOffset, checkpointedOffset) > 0 || admin.offsetComparator(checkpointedOffset, newestOffset) > 0) {
            // checkpointed offset is out of range. this could mean that this is a TTL topic and the checkpointed
            // offset was TTLd, or that the changelog topic was manually deleted and then recreated.
            // we cannot guarantee transactional state for TTL stores, so delete everything and do a full restore
            // for local store. if the topic was deleted and recreated, this will have the side effect of
            // clearing the store as well.
            LOG.warn("Checkpointed offset: {} for store: {} in task: {} is out of range of oldest: {} or newest: {} offset." + "Deleting existing store and fully restoring from changelog topic from oldest to newest offset. If the topic " + "has time-based retention, there is no transactional local state guarantees. If the topic was changed," + "local state will be cleaned up and fully restored to match the new topic contents.", checkpointedOffset, storeName, taskName, oldestOffset, newestOffset);
            checkpointDirsOptional.ifPresent(checkpointDirs -> checkpointDirs.forEach(checkpointDir -> storeDirsToDelete.put(storeName, checkpointDir)));
            storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, newestOffset));
        } else {
            // happy path. checkpointed offset is in range of current oldest and newest offsets
            if (!checkpointDirsOptional.isPresent()) {
                // non-persistent logged store
                LOG.info("Did not find any checkpoint directories for logged (maybe non-persistent) store: {}. Local state " + "will be fully restored from current changelog contents.", storeName);
                storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, checkpointedOffset));
            } else {
                // persistent logged store
                String targetOffset;
                // check checkpoint time against min.compaction.lag.ms. if older, restore from checkpointed offset to newest
                // with no trim. be conservative. allow 10% safety margin to avoid deletions when the downtime is close
                // to min.compaction.lag.ms
                long minCompactionLagMs = new StorageConfig(config).getChangelogMinCompactionLagMs(storeName);
                if (timeSinceLastCheckpointInMs > .9 * minCompactionLagMs) {
                    LOG.warn("Checkpointed offset for store: {} in task: {} is: {}. It is in range of oldest: {} and " + "newest: {} changelog offset. However, time since last checkpoint is: {}, which is greater than " + "0.9 * min.compaction.lag.ms: {} for the changelog topic. Since there is a chance that" + "the changelog topic has been compacted, restoring store to the end of the current changelog contents." + "There is no transactional local state guarantee.", storeName, taskName, checkpointedOffset, oldestOffset, newestOffset, timeSinceLastCheckpointInMs, minCompactionLagMs);
                    targetOffset = newestOffset;
                } else {
                    targetOffset = checkpointedOffset;
                }
                // if there exists a valid store checkpoint directory with oldest offset <= local offset <= target offset,
                // retain it and restore the delta. delete all other checkpoint directories for the store. if more than one such
                // checkpoint directory exists, retain the one with the highest local offset and delete the rest.
                boolean hasValidCheckpointDir = false;
                for (File checkpointDir : checkpointDirsOptional.get()) {
                    if (storageManagerUtil.isLoggedStoreValid(storeName, checkpointDir, config, storeChangelogs, taskModel, clock, storeEngines)) {
                        String localOffset = storageManagerUtil.readOffsetFile(checkpointDir, Collections.singleton(changelogSSP), false).get(changelogSSP);
                        LOG.info("Read local offset: {} for store: {} checkpoint dir: {} in task: {}", localOffset, storeName, checkpointDir, taskName);
                        if (admin.offsetComparator(localOffset, oldestOffset) >= 0 && admin.offsetComparator(localOffset, targetOffset) <= 0 && (storesToRestore.get(storeName) == null || admin.offsetComparator(localOffset, storesToRestore.get(storeName).startingOffset) > 0)) {
                            hasValidCheckpointDir = true;
                            LOG.info("Temporarily marking checkpoint dir: {} for store: {} in task: {} for retention. " + "May be overridden later.", checkpointDir, storeName, taskName);
                            storeDirToRetain.put(storeName, checkpointDir);
                            // mark for restore even if local == checkpointed, so that the changelog gets trimmed.
                            LOG.info("Temporarily marking store: {} in task: {} for restore from beginning offset: {} to " + "ending offset: {}. May be overridden later", storeName, taskName, localOffset, targetOffset);
                            storesToRestore.put(storeName, new RestoreOffsets(localOffset, targetOffset));
                        }
                    }
                }
                // delete all non-retained checkpoint directories
                for (File checkpointDir : checkpointDirsOptional.get()) {
                    if (storeDirToRetain.get(storeName) == null || !storeDirToRetain.get(storeName).equals(checkpointDir)) {
                        LOG.info("Marking checkpoint directory: {} for store: {} in task: {} for deletion since it is not " + "marked for retention.", checkpointDir, storeName, taskName);
                        storeDirsToDelete.put(storeName, checkpointDir);
                    }
                }
                // if the store had not valid checkpoint dirs to retain, restore from changelog
                if (!hasValidCheckpointDir) {
                    storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, targetOffset));
                }
            }
        }
    });
    LOG.info("Store directories to be retained in Task: {} are: {}", taskName, storeDirToRetain);
    LOG.info("Store directories to be deleted in Task: {} are: {}", taskName, storeDirsToDelete);
    LOG.info("Stores to be restored in Task: {} are: {}", taskName, storesToRestore);
    return new StoreActions(storeDirToRetain, storeDirsToDelete, storesToRestore);
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) ListMultimap(com.google.common.collect.ListMultimap) SSPMetadataCache(org.apache.samza.system.SSPMetadataCache) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TaskModel(org.apache.samza.job.model.TaskModel) Serde(org.apache.samza.serializers.Serde) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) StringUtils(org.apache.commons.lang3.StringUtils) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) FileUtil(org.apache.samza.util.FileUtil) SystemConsumer(org.apache.samza.system.SystemConsumer) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) ExecutorService(java.util.concurrent.ExecutorService) StorageConfig(org.apache.samza.config.StorageConfig) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) ImmutableMap(com.google.common.collect.ImmutableMap) TaskConfig(org.apache.samza.config.TaskConfig) JobContext(org.apache.samza.context.JobContext) Partition(org.apache.samza.Partition) ContainerContext(org.apache.samza.context.ContainerContext) Set(java.util.Set) Checkpoint(org.apache.samza.checkpoint.Checkpoint) Clock(org.apache.samza.util.Clock) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) File(java.io.File) SamzaException(org.apache.samza.SamzaException) CheckpointId(org.apache.samza.checkpoint.CheckpointId) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) ChangelogSSPIterator(org.apache.samza.system.ChangelogSSPIterator) SystemAdmin(org.apache.samza.system.SystemAdmin) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) KafkaStateCheckpointMarker(org.apache.samza.checkpoint.kafka.KafkaStateCheckpointMarker) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Config(org.apache.samza.config.Config) Collections(java.util.Collections) SystemAdmins(org.apache.samza.system.SystemAdmins) HashMap(java.util.HashMap) StorageConfig(org.apache.samza.config.StorageConfig) SystemStream(org.apache.samza.system.SystemStream) TaskConfig(org.apache.samza.config.TaskConfig) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) TaskMode(org.apache.samza.job.model.TaskMode) TaskName(org.apache.samza.container.TaskName) List(java.util.List) SystemAdmin(org.apache.samza.system.SystemAdmin) File(java.io.File) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 2 with TaskMode

use of org.apache.samza.job.model.TaskMode in project samza by apache.

the class TransactionalStateTaskRestoreManager method setupStoreDirs.

/**
 * For each store for this task,
 * a. Deletes current directory if persistent but non-logged store.
 * b. Deletes current and checkpoint directories if persistent logged store and directory is marked for deletion
 * c. Moves the valid persistent logged store checkpoint directory to current directory if marked for retention.
 * d. Creates all missing (i.e. not retained in step c) persistent logged store dirs.
 *
 * When this method returns,
 * a. There will be a empty current dir for each persistent but non-logged store.
 * b. There will be a current dir for each persistent logged store. This dir may or may not be empty.
 * c. There will be no remaining checkpoint dirs for persistent logged stores.
 */
@VisibleForTesting
static void setupStoreDirs(TaskModel taskModel, Map<String, StorageEngine> storeEngines, StoreActions storeActions, StorageManagerUtil storageManagerUtil, FileUtil fileUtil, File loggedStoreBaseDirectory, File nonLoggedStoreBaseDirectory) {
    TaskName taskName = taskModel.getTaskName();
    TaskMode taskMode = taskModel.getTaskMode();
    ListMultimap<String, File> storeDirsToDelete = storeActions.storeDirsToDelete;
    Map<String, File> storeDirsToRetain = storeActions.storeDirsToRetain;
    // delete all persistent store directories marked for deletion
    storeDirsToDelete.entries().forEach(entry -> {
        String storeName = entry.getKey();
        File storeDirToDelete = entry.getValue();
        LOG.info("Deleting persistent store directory: {} for store: {} in task: {}", storeDirToDelete, storeName, taskName);
        fileUtil.rm(storeDirToDelete);
    });
    // rename all retained persistent logged store checkpoint directories to current directory
    storeDirsToRetain.forEach((storeName, storeDirToRetain) -> {
        File currentDir = storageManagerUtil.getTaskStoreDir(loggedStoreBaseDirectory, storeName, taskName, taskMode);
        LOG.info("Moving logged store checkpoint directory: {} for store: {} in task: {} to current directory: {}", storeDirsToRetain.toString(), storeName, taskName, currentDir);
        storageManagerUtil.restoreCheckpointFiles(storeDirToRetain, currentDir);
    // do not remove the checkpoint directory yet. in case commit fails and container restarts,
    // we can retry the move. if we delete the checkpoint, the current dir will be deleted as well on
    // restart, and we will have to do a full restore.
    });
    // create any missing (not retained) current directories for persistent stores
    storeEngines.forEach((storeName, storageEngine) -> {
        if (storageEngine.getStoreProperties().isPersistedToDisk()) {
            File currentDir;
            if (storageEngine.getStoreProperties().isLoggedStore()) {
                currentDir = storageManagerUtil.getTaskStoreDir(loggedStoreBaseDirectory, storeName, taskName, taskMode);
            } else {
                currentDir = storageManagerUtil.getTaskStoreDir(nonLoggedStoreBaseDirectory, storeName, taskName, taskMode);
            }
            try {
                if (!fileUtil.exists(currentDir.toPath())) {
                    LOG.info("Creating missing persistent store current directory: {} for store: {} in task: {}", currentDir, storeName, taskName);
                    fileUtil.createDirectories(currentDir.toPath());
                }
            } catch (Exception e) {
                throw new SamzaException(String.format("Error setting up current directory for store: %s", storeName), e);
            }
        }
    });
}
Also used : TaskName(org.apache.samza.container.TaskName) TaskMode(org.apache.samza.job.model.TaskMode) File(java.io.File) SamzaException(org.apache.samza.SamzaException) SamzaException(org.apache.samza.SamzaException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 3 with TaskMode

use of org.apache.samza.job.model.TaskMode in project samza by apache.

the class JobModelHelper method updateTaskAssignments.

/**
 * This method does the following:
 * 1. Deletes the existing task assignments if the partition-task grouping has changed from the previous run of the job.
 * 2. Saves the newly generated task assignments to the storage layer through the {@param TaskAssignementManager}.
 *
 * @param jobModel              represents the {@see JobModel} of the samza job.
 * @param taskAssignmentManager required to persist the processor to task assignments to the metadata store.
 * @param taskPartitionAssignmentManager required to persist the task to partition assignments to the metadata store.
 * @param grouperMetadata       provides the historical metadata of the samza application.
 */
private void updateTaskAssignments(JobModel jobModel, TaskAssignmentManager taskAssignmentManager, TaskPartitionAssignmentManager taskPartitionAssignmentManager, GrouperMetadata grouperMetadata) {
    LOG.info("Storing the task assignments into metadata store.");
    Set<String> activeTaskNames = new HashSet<>();
    Set<String> standbyTaskNames = new HashSet<>();
    Set<SystemStreamPartition> systemStreamPartitions = new HashSet<>();
    for (ContainerModel containerModel : jobModel.getContainers().values()) {
        for (TaskModel taskModel : containerModel.getTasks().values()) {
            if (TaskMode.Active.equals(taskModel.getTaskMode())) {
                activeTaskNames.add(taskModel.getTaskName().getTaskName());
            }
            if (TaskMode.Standby.equals(taskModel.getTaskMode())) {
                standbyTaskNames.add(taskModel.getTaskName().getTaskName());
            }
            systemStreamPartitions.addAll(taskModel.getSystemStreamPartitions());
        }
    }
    Map<TaskName, String> previousTaskToContainerId = grouperMetadata.getPreviousTaskToProcessorAssignment();
    if (activeTaskNames.size() != previousTaskToContainerId.size()) {
        LOG.warn(String.format("Current task count %s does not match saved task count %s. Stateful jobs may observe misalignment of keys!", activeTaskNames.size(), previousTaskToContainerId.size()));
        // If the tasks changed, then the partition-task grouping is also likely changed and we can't handle that
        // without a much more complicated mapping. Further, the partition count may have changed, which means
        // input message keys are likely reshuffled w.r.t. partitions, so the local state may not contain necessary
        // data associated with the incoming keys. Warn the user and default to grouper
        // In this scenario the tasks may have been reduced, so we need to delete all the existing messages
        taskAssignmentManager.deleteTaskContainerMappings(previousTaskToContainerId.keySet().stream().map(TaskName::getTaskName).collect(Collectors.toList()));
        taskPartitionAssignmentManager.delete(systemStreamPartitions);
    }
    // if the set of standby tasks has changed, e.g., when the replication-factor changed, or the active-tasks-set has
    // changed, we log a warning and delete the existing mapping for these tasks
    Set<String> previousStandbyTasks = taskAssignmentManager.readTaskModes().entrySet().stream().filter(taskNameToTaskModeEntry -> TaskMode.Standby.equals(taskNameToTaskModeEntry.getValue())).map(taskNameToTaskModeEntry -> taskNameToTaskModeEntry.getKey().getTaskName()).collect(Collectors.toSet());
    if (!standbyTaskNames.equals(previousStandbyTasks)) {
        LOG.info(String.format("The set of standby tasks has changed, current standby tasks %s, previous standby tasks %s", standbyTaskNames, previousStandbyTasks));
        taskAssignmentManager.deleteTaskContainerMappings(previousStandbyTasks);
    }
    // Task to partition assignments is stored as {@see SystemStreamPartition} to list of {@see TaskName} in
    // coordinator stream. This is done due to the 1 MB value size limit in a kafka topic.
    Map<SystemStreamPartition, List<String>> sspToTaskNameMap = new HashMap<>();
    Map<String, Map<String, TaskMode>> taskContainerMappings = new HashMap<>();
    for (ContainerModel containerModel : jobModel.getContainers().values()) {
        containerModel.getTasks().forEach((taskName, taskModel) -> {
            taskContainerMappings.putIfAbsent(containerModel.getId(), new HashMap<>());
            taskContainerMappings.get(containerModel.getId()).put(taskName.getTaskName(), taskModel.getTaskMode());
            taskModel.getSystemStreamPartitions().forEach(systemStreamPartition -> {
                sspToTaskNameMap.putIfAbsent(systemStreamPartition, new ArrayList<>());
                sspToTaskNameMap.get(systemStreamPartition).add(taskName.getTaskName());
            });
        });
    }
    taskAssignmentManager.writeTaskContainerMappings(taskContainerMappings);
    taskPartitionAssignmentManager.writeTaskPartitionAssignments(sspToTaskNameMap);
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) TaskPartitionAssignmentManager(org.apache.samza.container.grouper.task.TaskPartitionAssignmentManager) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) HashMap(java.util.HashMap) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) StringUtils(org.apache.commons.lang3.StringUtils) ArrayList(java.util.ArrayList) LocationId(org.apache.samza.runtime.LocationId) HashSet(java.util.HashSet) Map(java.util.Map) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) TaskAssignmentManager(org.apache.samza.container.grouper.task.TaskAssignmentManager) JobModel(org.apache.samza.job.model.JobModel) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) LocalityManager(org.apache.samza.container.LocalityManager) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) ContainerModel(org.apache.samza.job.model.ContainerModel) ProcessorLocality(org.apache.samza.job.model.ProcessorLocality) Optional(java.util.Optional) Config(org.apache.samza.config.Config) HashMap(java.util.HashMap) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskName(org.apache.samza.container.TaskName) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) TaskModel(org.apache.samza.job.model.TaskModel) HashSet(java.util.HashSet) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 4 with TaskMode

use of org.apache.samza.job.model.TaskMode in project samza by apache.

the class TaskAssignmentManager method readTaskModes.

public Map<TaskName, TaskMode> readTaskModes() {
    Map<TaskName, TaskMode> taskModeMap = new HashMap<>();
    taskModeMappingMetadataStore.all().forEach((taskName, valueBytes) -> {
        String taskMode = taskModeSerde.fromBytes(valueBytes);
        if (taskMode != null) {
            taskModeMap.put(new TaskName(taskName), TaskMode.valueOf(taskMode));
        }
        LOG.debug("Task mode assignment for task {}: {}", taskName, taskMode);
    });
    return Collections.unmodifiableMap(new HashMap<>(taskModeMap));
}
Also used : TaskName(org.apache.samza.container.TaskName) HashMap(java.util.HashMap) TaskMode(org.apache.samza.job.model.TaskMode)

Example 5 with TaskMode

use of org.apache.samza.job.model.TaskMode in project samza by apache.

the class TaskSideInputStorageManager method writeFileOffsets.

/**
 * Writes the offset files for all side input stores one by one. There is one offset file per store.
 * Its contents are a JSON encoded mapping from each side input SSP to its last processed offset, and a checksum.
 *
 * @param lastProcessedOffsets The offset per SSP to write
 */
public void writeFileOffsets(Map<SystemStreamPartition, String> lastProcessedOffsets) {
    storeToSSps.entrySet().stream().filter(// filter out in-memory side input stores
    entry -> isPersistedStore(entry.getKey())).forEach((entry) -> {
        String storeName = entry.getKey();
        Map<SystemStreamPartition, String> offsets = entry.getValue().stream().filter(lastProcessedOffsets::containsKey).collect(Collectors.toMap(Function.identity(), lastProcessedOffsets::get));
        try {
            File taskStoreDir = storageManagerUtil.getTaskStoreDir(storeBaseDir, storeName, taskName, taskMode);
            storageManagerUtil.writeOffsetFile(taskStoreDir, offsets, true);
        } catch (Exception e) {
            throw new SamzaException("Failed to write offset file for side input store: " + storeName, e);
        }
    });
}
Also used : TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) HashMap(java.util.HashMap) Clock(org.apache.samza.util.Clock) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) File(java.io.File) SamzaException(org.apache.samza.SamzaException) TimeUnit(java.util.concurrent.TimeUnit) FileUtil(org.apache.samza.util.FileUtil) TaskMode(org.apache.samza.job.model.TaskMode) Map(java.util.Map) Optional(java.util.Optional) VisibleForTesting(com.google.common.annotations.VisibleForTesting) File(java.io.File) SamzaException(org.apache.samza.SamzaException) SamzaException(org.apache.samza.SamzaException) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Aggregations

TaskMode (org.apache.samza.job.model.TaskMode)9 TaskName (org.apache.samza.container.TaskName)8 HashMap (java.util.HashMap)7 Map (java.util.Map)6 Set (java.util.Set)5 SamzaException (org.apache.samza.SamzaException)5 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)5 Logger (org.slf4j.Logger)5 LoggerFactory (org.slf4j.LoggerFactory)5 File (java.io.File)4 List (java.util.List)4 Optional (java.util.Optional)4 Collectors (java.util.stream.Collectors)4 Config (org.apache.samza.config.Config)4 TaskModel (org.apache.samza.job.model.TaskModel)4 VisibleForTesting (com.google.common.annotations.VisibleForTesting)3 StringUtils (org.apache.commons.lang3.StringUtils)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 ArrayList (java.util.ArrayList)2 Collections (java.util.Collections)2