Search in sources :

Example 1 with StreamMetadataCache

use of org.apache.samza.system.StreamMetadataCache in project samza by apache.

the class AzureJobCoordinator method start.

@Override
public void start() {
    LOG.info("Starting Azure job coordinator.");
    // The systemAdmins should be started before streamMetadataCache can be used. And it should be stopped when this coordinator is stopped.
    systemAdmins = new SystemAdmins(config);
    systemAdmins.start();
    streamMetadataCache = new StreamMetadataCache(systemAdmins, METADATA_CACHE_TTL_MS, SystemClock.instance());
    table.addProcessorEntity(INITIAL_STATE, processorId, false);
    // Start scheduler for heartbeating
    LOG.info("Starting scheduler for heartbeating.");
    heartbeat.scheduleTask();
    azureLeaderElector.tryBecomeLeader();
    // Start scheduler to check for job model version upgrades
    LOG.info("Starting scheduler to check for job model version upgrades.");
    versionUpgrade.setStateChangeListener(createJMVersionUpgradeListener());
    versionUpgrade.scheduleTask();
    // Start scheduler to check for leader liveness
    LOG.info("Starting scheduler to check for leader liveness.");
    leaderAlive.setStateChangeListener(createLeaderLivenessListener());
    leaderAlive.scheduleTask();
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) SystemAdmins(org.apache.samza.system.SystemAdmins)

Example 2 with StreamMetadataCache

use of org.apache.samza.system.StreamMetadataCache in project samza by apache.

the class PassthroughJobCoordinator method getJobModel.

@Override
public JobModel getJobModel() {
    SystemAdmins systemAdmins = new SystemAdmins(config, this.getClass().getSimpleName());
    StreamMetadataCache streamMetadataCache = new StreamMetadataCache(systemAdmins, 5000, SystemClock.instance());
    systemAdmins.start();
    try {
        String containerId = Integer.toString(config.getInt(JobConfig.PROCESSOR_ID));
        GrouperMetadata grouperMetadata = new GrouperMetadataImpl(ImmutableMap.of(String.valueOf(containerId), locationId), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap());
        return JobModelCalculator.INSTANCE.calculateJobModel(this.config, Collections.emptyMap(), streamMetadataCache, grouperMetadata);
    } finally {
        systemAdmins.stop();
    }
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) GrouperMetadataImpl(org.apache.samza.container.grouper.task.GrouperMetadataImpl) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) SystemAdmins(org.apache.samza.system.SystemAdmins)

Example 3 with StreamMetadataCache

use of org.apache.samza.system.StreamMetadataCache in project samza by apache.

the class JobModelCalculator method getInputStreamPartitions.

/**
 * Computes the input system stream partitions of a samza job using the provided {@param config}
 * and {@param streamMetadataCache}.
 * @param config the configuration of the job.
 * @param streamMetadataCache to query the partition metadata of the input streams.
 * @return the input {@see SystemStreamPartition} of the samza job.
 */
private static Set<SystemStreamPartition> getInputStreamPartitions(Config config, StreamMetadataCache streamMetadataCache) {
    TaskConfig taskConfig = new TaskConfig(config);
    // Get the set of partitions for each SystemStream from the stream metadata
    Map<SystemStream, SystemStreamMetadata> allMetadata = JavaConverters.mapAsJavaMapConverter(streamMetadataCache.getStreamMetadata(JavaConverters.asScalaSetConverter(taskConfig.getInputStreams()).asScala().toSet(), true)).asJava();
    Set<SystemStreamPartition> inputStreamPartitions = new HashSet<>();
    allMetadata.forEach((systemStream, systemStreamMetadata) -> systemStreamMetadata.getSystemStreamPartitionMetadata().keySet().forEach(partition -> inputStreamPartitions.add(new SystemStreamPartition(systemStream, partition))));
    return inputStreamPartitions;
}
Also used : ConfigException(org.apache.samza.config.ConfigException) StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) ConfigUtil(org.apache.samza.util.ConfigUtil) SystemStreamPartitionMatcher(org.apache.samza.system.SystemStreamPartitionMatcher) SystemStreamPartitionGrouperFactory(org.apache.samza.container.grouper.stream.SystemStreamPartitionGrouperFactory) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) ClusterManagerConfig(org.apache.samza.config.ClusterManagerConfig) TaskModel(org.apache.samza.job.model.TaskModel) SSPGrouperProxy(org.apache.samza.container.grouper.stream.SSPGrouperProxy) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Function(java.util.function.Function) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) GrouperMetadata(org.apache.samza.container.grouper.task.GrouperMetadata) MapConfig(org.apache.samza.config.MapConfig) JobModel(org.apache.samza.job.model.JobModel) RegExTopicGenerator(org.apache.samza.config.RegExTopicGenerator) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) TaskConfig(org.apache.samza.config.TaskConfig) TaskNameGrouperFactory(org.apache.samza.container.grouper.task.TaskNameGrouperFactory) Partition(org.apache.samza.Partition) Set(java.util.Set) Collectors(java.util.stream.Collectors) SystemStreamPartitionGrouper(org.apache.samza.container.grouper.stream.SystemStreamPartitionGrouper) TreeMap(java.util.TreeMap) TaskNameGrouperProxy(org.apache.samza.container.grouper.task.TaskNameGrouperProxy) ReflectionUtil(org.apache.samza.util.ReflectionUtil) ContainerModel(org.apache.samza.job.model.ContainerModel) JavaConverters(scala.collection.JavaConverters) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) Config(org.apache.samza.config.Config) Comparator(java.util.Comparator) SystemStream(org.apache.samza.system.SystemStream) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) TaskConfig(org.apache.samza.config.TaskConfig) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashSet(java.util.HashSet)

Example 4 with StreamMetadataCache

use of org.apache.samza.system.StreamMetadataCache in project samza by apache.

the class StorageRecovery method getTaskStorageManagers.

/**
   * create one TaskStorageManager for each task. Add all of them to the
   * List<TaskStorageManager>
   */
@SuppressWarnings({ "unchecked", "rawtypes" })
private void getTaskStorageManagers() {
    StreamMetadataCache streamMetadataCache = new StreamMetadataCache(Util.javaMapAsScalaMap(systemAdmins), 5000, SystemClock.instance());
    for (ContainerModel containerModel : containers.values()) {
        HashMap<String, StorageEngine> taskStores = new HashMap<String, StorageEngine>();
        SamzaContainerContext containerContext = new SamzaContainerContext(containerModel.getProcessorId(), jobConfig, containerModel.getTasks().keySet());
        for (TaskModel taskModel : containerModel.getTasks().values()) {
            HashMap<String, SystemConsumer> storeConsumers = getStoreConsumers();
            for (Entry<String, StorageEngineFactory<?, ?>> entry : storageEngineFactories.entrySet()) {
                String storeName = entry.getKey();
                if (changeLogSystemStreams.containsKey(storeName)) {
                    SystemStreamPartition changeLogSystemStreamPartition = new SystemStreamPartition(changeLogSystemStreams.get(storeName), taskModel.getChangelogPartition());
                    File storePartitionDir = TaskStorageManager.getStorePartitionDir(storeBaseDir, storeName, taskModel.getTaskName());
                    log.info("Got storage engine directory: " + storePartitionDir);
                    StorageEngine storageEngine = (entry.getValue()).getStorageEngine(storeName, storePartitionDir, (Serde) new ByteSerde(), (Serde) new ByteSerde(), null, new MetricsRegistryMap(), changeLogSystemStreamPartition, containerContext);
                    taskStores.put(storeName, storageEngine);
                }
            }
            TaskStorageManager taskStorageManager = new TaskStorageManager(taskModel.getTaskName(), Util.javaMapAsScalaMap(taskStores), Util.javaMapAsScalaMap(storeConsumers), Util.javaMapAsScalaMap(changeLogSystemStreams), maxPartitionNumber, streamMetadataCache, storeBaseDir, storeBaseDir, taskModel.getChangelogPartition(), Util.javaMapAsScalaMap(systemAdmins), new StorageConfig(jobConfig).getChangeLogDeleteRetentionsInMs(), new SystemClock());
            taskStorageManagers.add(taskStorageManager);
        }
    }
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) SamzaContainerContext(org.apache.samza.container.SamzaContainerContext) SystemConsumer(org.apache.samza.system.SystemConsumer) SystemClock(org.apache.samza.util.SystemClock) HashMap(java.util.HashMap) StorageConfig(org.apache.samza.config.StorageConfig) JavaStorageConfig(org.apache.samza.config.JavaStorageConfig) ContainerModel(org.apache.samza.job.model.ContainerModel) ByteSerde(org.apache.samza.serializers.ByteSerde) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) File(java.io.File) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 5 with StreamMetadataCache

use of org.apache.samza.system.StreamMetadataCache in project samza by apache.

the class SamzaTaskProxy method getJobModel.

/**
   * Retrieves the jobModel from the jobCoordinator.
   * @param jobInstance the job instance (jobId, jobName).
   * @return the JobModel fetched from the coordinator stream.
   */
protected JobModel getJobModel(JobInstance jobInstance) {
    CoordinatorStreamSystemConsumer coordinatorSystemConsumer = null;
    CoordinatorStreamSystemProducer coordinatorSystemProducer = null;
    try {
        CoordinatorStreamSystemFactory coordinatorStreamSystemFactory = new CoordinatorStreamSystemFactory();
        Config coordinatorSystemConfig = getCoordinatorSystemConfig(jobInstance);
        LOG.info("Using config: {} to create coordinatorStream producer and consumer.", coordinatorSystemConfig);
        coordinatorSystemConsumer = coordinatorStreamSystemFactory.getCoordinatorStreamSystemConsumer(coordinatorSystemConfig, METRICS_REGISTRY);
        coordinatorSystemProducer = coordinatorStreamSystemFactory.getCoordinatorStreamSystemProducer(coordinatorSystemConfig, METRICS_REGISTRY);
        LOG.info("Registering coordinator system stream consumer.");
        coordinatorSystemConsumer.register();
        LOG.debug("Starting coordinator system stream consumer.");
        coordinatorSystemConsumer.start();
        LOG.debug("Bootstrapping coordinator system stream consumer.");
        coordinatorSystemConsumer.bootstrap();
        LOG.info("Registering coordinator system stream producer.");
        coordinatorSystemProducer.register(SOURCE);
        Config config = coordinatorSystemConsumer.getConfig();
        LOG.info("Got config from coordinatorSystemConsumer: {}.", config);
        ChangelogPartitionManager changelogManager = new ChangelogPartitionManager(coordinatorSystemProducer, coordinatorSystemConsumer, SOURCE);
        changelogManager.start();
        LocalityManager localityManager = new LocalityManager(coordinatorSystemProducer, coordinatorSystemConsumer);
        localityManager.start();
        String jobCoordinatorSystemName = config.get(JobConfig.JOB_COORDINATOR_SYSTEM());
        /**
       * Select job coordinator system properties from config and instantiate SystemAdmin for it alone.
       * Instantiating SystemAdmin's for other input/output systems defined in config is unnecessary.
       */
        Config systemAdminConfig = config.subset(String.format("systems.%s", jobCoordinatorSystemName), false);
        scala.collection.immutable.Map<String, SystemAdmin> systemAdmins = JobModelManager.getSystemAdmins(systemAdminConfig);
        StreamMetadataCache streamMetadataCache = new StreamMetadataCache(systemAdmins, 0, SystemClock.instance());
        Map<TaskName, Integer> changeLogPartitionMapping = changelogManager.readChangeLogPartitionMapping();
        return JobModelManager.readJobModel(config, changeLogPartitionMapping, localityManager, streamMetadataCache, null);
    } finally {
        if (coordinatorSystemConsumer != null) {
            coordinatorSystemConsumer.stop();
        }
        if (coordinatorSystemProducer != null) {
            coordinatorSystemProducer.stop();
        }
    }
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) JobConfig(org.apache.samza.config.JobConfig) MapConfig(org.apache.samza.config.MapConfig) StorageConfig(org.apache.samza.config.StorageConfig) Config(org.apache.samza.config.Config) CoordinatorStreamSystemFactory(org.apache.samza.coordinator.stream.CoordinatorStreamSystemFactory) CoordinatorStreamSystemProducer(org.apache.samza.coordinator.stream.CoordinatorStreamSystemProducer) CoordinatorStreamSystemConsumer(org.apache.samza.coordinator.stream.CoordinatorStreamSystemConsumer) TaskName(org.apache.samza.container.TaskName) ChangelogPartitionManager(org.apache.samza.storage.ChangelogPartitionManager) SystemAdmin(org.apache.samza.system.SystemAdmin) LocalityManager(org.apache.samza.container.LocalityManager)

Aggregations

StreamMetadataCache (org.apache.samza.system.StreamMetadataCache)13 HashMap (java.util.HashMap)7 HashSet (java.util.HashSet)5 SystemAdmins (org.apache.samza.system.SystemAdmins)5 SystemStream (org.apache.samza.system.SystemStream)5 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)5 Map (java.util.Map)4 Partition (org.apache.samza.Partition)4 Config (org.apache.samza.config.Config)4 StorageConfig (org.apache.samza.config.StorageConfig)4 ContainerModel (org.apache.samza.job.model.ContainerModel)4 SystemStreamMetadata (org.apache.samza.system.SystemStreamMetadata)4 Set (java.util.Set)3 JobConfig (org.apache.samza.config.JobConfig)3 MapConfig (org.apache.samza.config.MapConfig)3 TaskConfig (org.apache.samza.config.TaskConfig)3 TaskName (org.apache.samza.container.TaskName)3 TaskModel (org.apache.samza.job.model.TaskModel)3 SystemClock (org.apache.samza.util.SystemClock)3 File (java.io.File)2