use of org.apache.samza.system.SystemStream in project samza by apache.
the class KafkaChangelogStateBackendFactory method getBackupManager.
@Override
public TaskBackupManager getBackupManager(JobContext jobContext, ContainerModel containerModel, TaskModel taskModel, ExecutorService backupExecutor, MetricsRegistry metricsRegistry, Config config, Clock clock, File loggedStoreBaseDir, File nonLoggedStoreBaseDir) {
SystemAdmins systemAdmins = new SystemAdmins(config);
StorageConfig storageConfig = new StorageConfig(config);
Map<String, SystemStream> storeChangelogs = storageConfig.getStoreChangelogs();
if (new TaskConfig(config).getTransactionalStateCheckpointEnabled()) {
return new KafkaTransactionalStateTaskBackupManager(taskModel.getTaskName(), storeChangelogs, systemAdmins, taskModel.getChangelogPartition());
} else {
return new KafkaNonTransactionalStateTaskBackupManager(taskModel.getTaskName(), storeChangelogs, systemAdmins, taskModel.getChangelogPartition());
}
}
use of org.apache.samza.system.SystemStream in project samza by apache.
the class KafkaChangelogStateBackendFactory method filterStandbySystemStreams.
@VisibleForTesting
Map<String, SystemStream> filterStandbySystemStreams(Map<String, SystemStream> changelogSystemStreams, ContainerModel containerModel) {
Map<SystemStreamPartition, String> changelogSSPToStore = new HashMap<>();
changelogSystemStreams.forEach((storeName, systemStream) -> containerModel.getTasks().forEach((taskName, taskModel) -> changelogSSPToStore.put(new SystemStreamPartition(systemStream, taskModel.getChangelogPartition()), storeName)));
Set<TaskModel> standbyTaskModels = containerModel.getTasks().values().stream().filter(taskModel -> taskModel.getTaskMode().equals(TaskMode.Standby)).collect(Collectors.toSet());
// remove all standby task changelog ssps
standbyTaskModels.forEach((taskModel) -> {
changelogSystemStreams.forEach((storeName, systemStream) -> {
SystemStreamPartition ssp = new SystemStreamPartition(systemStream, taskModel.getChangelogPartition());
changelogSSPToStore.remove(ssp);
});
});
// changelogSystemStreams correspond only to active tasks (since those of standby-tasks moved to sideInputs above)
return MapUtils.invertMap(changelogSSPToStore).entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, x -> x.getValue().getSystemStream()));
}
use of org.apache.samza.system.SystemStream in project samza by apache.
the class TaskSideInputHandler method getOldestOffsets.
/**
* Gets the oldest offset for the {@link SystemStreamPartition}s associated with all the store side inputs.
* 1. Groups the list of the SSPs based on system stream
* 2. Fetches the {@link SystemStreamMetadata} from {@link StreamMetadataCache}
* 3. Fetches the partition metadata for each system stream and fetch the corresponding partition metadata
* and populates the oldest offset for SSPs belonging to the system stream.
*
* @return a {@link Map} of {@link SystemStreamPartition} to their oldest offset. If partitionMetadata could not be
* obtained for any {@link SystemStreamPartition} the offset for it is populated as null.
*/
@VisibleForTesting
Map<SystemStreamPartition, String> getOldestOffsets() {
Map<SystemStreamPartition, String> oldestOffsets = new HashMap<>();
// Step 1
Map<SystemStream, List<SystemStreamPartition>> systemStreamToSsp = this.sspToStores.keySet().stream().collect(Collectors.groupingBy(SystemStreamPartition::getSystemStream));
// Step 2
Map<SystemStream, SystemStreamMetadata> metadata = JavaConverters.mapAsJavaMapConverter(this.streamMetadataCache.getStreamMetadata(JavaConverters.asScalaSetConverter(systemStreamToSsp.keySet()).asScala().toSet(), false)).asJava();
// Step 3
metadata.forEach((systemStream, systemStreamMetadata) -> {
// get the partition metadata for each system stream
Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> partitionMetadata = systemStreamMetadata.getSystemStreamPartitionMetadata();
// Because of https://bugs.openjdk.java.net/browse/JDK-8148463 using lambda will NPE when getOldestOffset() is null
for (SystemStreamPartition ssp : systemStreamToSsp.get(systemStream)) {
oldestOffsets.put(ssp, partitionMetadata.get(ssp.getPartition()).getOldestOffset());
}
});
return oldestOffsets;
}
use of org.apache.samza.system.SystemStream in project samza by apache.
the class TransactionalStateTaskRestoreManager method getCurrentChangelogOffsets.
/**
* Get offset metadata for each changelog SSP for this task. A task may have multiple changelog streams
* (e.g., for different stores), but will have the same partition for all of them.
*/
@VisibleForTesting
static Map<SystemStreamPartition, SystemStreamPartitionMetadata> getCurrentChangelogOffsets(TaskModel taskModel, Map<String, SystemStream> storeChangelogs, SSPMetadataCache sspMetadataCache) {
Map<SystemStreamPartition, SystemStreamPartitionMetadata> changelogOffsets = new HashMap<>();
Partition changelogPartition = taskModel.getChangelogPartition();
for (Map.Entry<String, SystemStream> storeChangelog : storeChangelogs.entrySet()) {
SystemStream changelog = storeChangelog.getValue();
SystemStreamPartition changelogSSP = new SystemStreamPartition(changelog.getSystem(), changelog.getStream(), changelogPartition);
SystemStreamPartitionMetadata metadata = sspMetadataCache.getMetadata(changelogSSP);
changelogOffsets.put(changelogSSP, metadata);
}
LOG.info("Got current changelog offsets for taskName: {} as: {}", taskModel.getTaskName(), changelogOffsets);
return changelogOffsets;
}
use of org.apache.samza.system.SystemStream in project samza by apache.
the class ChangelogStreamManager method createChangelogStreams.
/**
* Creates and validates the changelog streams of a samza job.
*
* @param config the configuration with changelog info.
* @param maxChangeLogStreamPartitions the maximum number of changelog stream partitions to create.
*/
public static void createChangelogStreams(Config config, int maxChangeLogStreamPartitions) {
// Get changelog store config
StorageConfig storageConfig = new StorageConfig(config);
ImmutableMap.Builder<String, SystemStream> storeNameSystemStreamMapBuilder = new ImmutableMap.Builder<>();
storageConfig.getStoreNames().forEach(storeName -> {
Optional<String> changelogStream = storageConfig.getChangelogStream(storeName);
if (changelogStream.isPresent() && StringUtils.isNotBlank(changelogStream.get())) {
storeNameSystemStreamMapBuilder.put(storeName, StreamUtil.getSystemStreamFromNames(changelogStream.get()));
}
});
Map<String, SystemStream> storeNameSystemStreamMapping = storeNameSystemStreamMapBuilder.build();
// Get SystemAdmin for changelog store's system and attempt to create the stream
SystemConfig systemConfig = new SystemConfig(config);
storeNameSystemStreamMapping.forEach((storeName, systemStream) -> {
// Load system admin for this system.
SystemAdmin systemAdmin = systemConfig.getSystemFactories().get(systemStream.getSystem()).getAdmin(systemStream.getSystem(), config, ChangelogStreamManager.class.getSimpleName());
if (systemAdmin == null) {
throw new SamzaException(String.format("Error creating changelog. Changelog on store %s uses system %s, which is missing from the configuration.", storeName, systemStream.getSystem()));
}
StreamSpec changelogSpec = StreamSpec.createChangeLogStreamSpec(systemStream.getStream(), systemStream.getSystem(), maxChangeLogStreamPartitions);
systemAdmin.start();
if (systemAdmin.createStream(changelogSpec)) {
LOG.info(String.format("created changelog stream %s.", systemStream.getStream()));
} else {
LOG.info(String.format("changelog stream %s already exists.", systemStream.getStream()));
}
systemAdmin.validateStream(changelogSpec);
if (storageConfig.getAccessLogEnabled(storeName)) {
String accesslogStream = storageConfig.getAccessLogStream(systemStream.getStream());
StreamSpec accesslogSpec = new StreamSpec(accesslogStream, accesslogStream, systemStream.getSystem(), maxChangeLogStreamPartitions);
systemAdmin.createStream(accesslogSpec);
systemAdmin.validateStream(accesslogSpec);
}
systemAdmin.stop();
});
}
Aggregations