use of org.apache.samza.job.model.TaskMode in project samza by apache.
the class StorageManagerUtil method getTaskStoreCheckpointDirs.
public List<File> getTaskStoreCheckpointDirs(File storeBaseDir, String storeName, TaskName taskName, TaskMode taskMode) {
try {
File storeDir = new File(storeBaseDir, storeName);
String taskStoreName = getTaskStoreDir(storeBaseDir, storeName, taskName, taskMode).getName();
if (storeDir.exists()) {
// new store or no local state
List<File> checkpointDirs = Files.list(storeDir.toPath()).map(Path::toFile).filter(file -> file.getName().contains(taskStoreName + "-")).collect(Collectors.toList());
return checkpointDirs;
} else {
return Collections.emptyList();
}
} catch (IOException e) {
throw new SamzaException(String.format("Error finding checkpoint dirs for task: %s mode: %s store: %s in dir: %s", taskName, taskMode, storeName, storeBaseDir), e);
}
}
use of org.apache.samza.job.model.TaskMode in project samza by apache.
the class ContainerStorageManager method createSideInputProcessors.
// Create sideInput store processors, one per store per task
private Map<TaskName, Map<String, SideInputsProcessor>> createSideInputProcessors(StorageConfig config, ContainerModel containerModel, Map<TaskName, TaskInstanceMetrics> taskInstanceMetrics) {
Map<TaskName, Map<String, SideInputsProcessor>> sideInputStoresToProcessors = new HashMap<>();
containerModel.getTasks().forEach((taskName, taskModel) -> {
sideInputStoresToProcessors.put(taskName, new HashMap<>());
TaskMode taskMode = taskModel.getTaskMode();
for (String storeName : this.taskSideInputStoreSSPs.get(taskName).keySet()) {
SideInputsProcessor sideInputsProcessor;
Optional<String> sideInputsProcessorSerializedInstance = config.getSideInputsProcessorSerializedInstance(storeName);
if (sideInputsProcessorSerializedInstance.isPresent()) {
sideInputsProcessor = SerdeUtils.deserialize("Side Inputs Processor", sideInputsProcessorSerializedInstance.get());
LOG.info("Using serialized side-inputs-processor for store: {}, task: {}", storeName, taskName);
} else if (config.getSideInputsProcessorFactory(storeName).isPresent()) {
String sideInputsProcessorFactoryClassName = config.getSideInputsProcessorFactory(storeName).get();
SideInputsProcessorFactory sideInputsProcessorFactory = ReflectionUtil.getObj(sideInputsProcessorFactoryClassName, SideInputsProcessorFactory.class);
sideInputsProcessor = sideInputsProcessorFactory.getSideInputsProcessor(config, taskInstanceMetrics.get(taskName).registry());
LOG.info("Using side-inputs-processor from factory: {} for store: {}, task: {}", config.getSideInputsProcessorFactory(storeName).get(), storeName, taskName);
} else {
// if this is a active-task with a side-input store but no sideinput-processor-factory defined in config, we rely on upstream validations to fail the deploy
// if this is a standby-task and the store is a non-side-input changelog store
// we creating identity sideInputProcessor for stores of standbyTasks
// have to use the right serde because the sideInput stores are created
Serde keySerde = serdes.get(config.getStorageKeySerde(storeName).orElseThrow(() -> new SamzaException("Could not find storage key serde for store: " + storeName)));
Serde msgSerde = serdes.get(config.getStorageMsgSerde(storeName).orElseThrow(() -> new SamzaException("Could not find storage msg serde for store: " + storeName)));
sideInputsProcessor = new SideInputsProcessor() {
@Override
public Collection<Entry<?, ?>> process(IncomingMessageEnvelope message, KeyValueStore store) {
// Ignore message if the key is null
if (message.getKey() == null) {
return ImmutableList.of();
} else {
// Skip serde if the message is null
return ImmutableList.of(new Entry<>(keySerde.fromBytes((byte[]) message.getKey()), message.getMessage() == null ? null : msgSerde.fromBytes((byte[]) message.getMessage())));
}
}
};
LOG.info("Using identity side-inputs-processor for store: {}, task: {}", storeName, taskName);
}
sideInputStoresToProcessors.get(taskName).put(storeName, sideInputsProcessor);
}
});
return sideInputStoresToProcessors;
}
use of org.apache.samza.job.model.TaskMode in project samza by apache.
the class JobModelHelper method getGrouperMetadata.
private GrouperMetadata getGrouperMetadata(Config config, LocalityManager localityManager, TaskAssignmentManager taskAssignmentManager, TaskPartitionAssignmentManager taskPartitionAssignmentManager) {
Map<String, LocationId> processorLocality = getProcessorLocality(config, localityManager);
Map<TaskName, TaskMode> taskModes = taskAssignmentManager.readTaskModes();
Map<TaskName, String> taskNameToProcessorId = new HashMap<>();
Map<TaskName, LocationId> taskLocality = new HashMap<>();
// We read the taskAssignment only for ActiveTasks, i.e., tasks that have no task-mode or have an active task mode
taskAssignmentManager.readTaskAssignment().forEach((taskNameString, containerId) -> {
TaskName taskName = new TaskName(taskNameString);
if (isActiveTask(taskName, taskModes)) {
taskNameToProcessorId.put(taskName, containerId);
if (processorLocality.containsKey(containerId)) {
taskLocality.put(taskName, processorLocality.get(containerId));
}
}
});
Map<SystemStreamPartition, List<String>> sspToTaskMapping = taskPartitionAssignmentManager.readTaskPartitionAssignments();
Map<TaskName, List<SystemStreamPartition>> taskPartitionAssignments = new HashMap<>();
// Task to partition assignments is stored as {@see SystemStreamPartition} to list of {@see TaskName} in
// coordinator stream. This is done due to the 1 MB value size limit in a kafka topic. Conversion to
// taskName to SystemStreamPartitions is done here to wire-in the data to {@see JobModel}.
sspToTaskMapping.forEach((systemStreamPartition, taskNames) -> taskNames.forEach(taskNameString -> {
TaskName taskName = new TaskName(taskNameString);
if (isActiveTask(taskName, taskModes)) {
taskPartitionAssignments.putIfAbsent(taskName, new ArrayList<>());
taskPartitionAssignments.get(taskName).add(systemStreamPartition);
}
}));
return new GrouperMetadataImpl(processorLocality, taskLocality, taskPartitionAssignments, taskNameToProcessorId);
}
use of org.apache.samza.job.model.TaskMode in project samza by apache.
the class TaskAssignmentManager method writeTaskContainerMappings.
/**
* Method to batch write task container info to {@link MetadataStore}.
* @param mappings the task and container mappings: (ContainerId, (TaskName, TaskMode))
*/
public void writeTaskContainerMappings(Map<String, Map<String, TaskMode>> mappings) {
for (String containerId : mappings.keySet()) {
Map<String, TaskMode> tasks = mappings.get(containerId);
for (String taskName : tasks.keySet()) {
TaskMode taskMode = tasks.get(taskName);
LOG.info("Storing task: {} and container ID: {} into metadata store", taskName, containerId);
String existingContainerId = taskNameToContainerId.get(taskName);
if (existingContainerId != null && !existingContainerId.equals(containerId)) {
LOG.info("Task \"{}\" in mode {} moved from container {} to container {}", new Object[] { taskName, taskMode, existingContainerId, containerId });
} else {
LOG.debug("Task \"{}\" in mode {} assigned to container {}", taskName, taskMode, containerId);
}
if (containerId == null) {
taskContainerMappingMetadataStore.delete(taskName);
taskModeMappingMetadataStore.delete(taskName);
taskNameToContainerId.remove(taskName);
} else {
taskContainerMappingMetadataStore.put(taskName, containerIdSerde.toBytes(containerId));
taskModeMappingMetadataStore.put(taskName, taskModeSerde.toBytes(taskMode.toString()));
taskNameToContainerId.put(taskName, containerId);
}
}
}
taskContainerMappingMetadataStore.flush();
taskModeMappingMetadataStore.flush();
}
Aggregations