Search in sources :

Example 31 with TaskName

use of org.apache.samza.container.TaskName in project samza by apache.

the class TestAsyncRunLoop method testEndOfStreamOffsetManagement.

// TODO: Add assertions.
//@Test
public void testEndOfStreamOffsetManagement() throws Exception {
    //explicitly configure to disable commits inside process or window calls and invoke commit from end of stream
    TestTask mockStreamTask1 = new TestTask(true, false, false, null);
    TestTask mockStreamTask2 = new TestTask(true, false, false, null);
    Partition p1 = new Partition(1);
    Partition p2 = new Partition(2);
    SystemStreamPartition ssp1 = new SystemStreamPartition("system1", "stream1", p1);
    SystemStreamPartition ssp2 = new SystemStreamPartition("system1", "stream2", p2);
    IncomingMessageEnvelope envelope1 = new IncomingMessageEnvelope(ssp2, "1", "key1", "message1");
    IncomingMessageEnvelope envelope2 = new IncomingMessageEnvelope(ssp2, "2", "key1", "message1");
    IncomingMessageEnvelope envelope3 = IncomingMessageEnvelope.buildEndOfStreamEnvelope(ssp2);
    Map<SystemStreamPartition, List<IncomingMessageEnvelope>> sspMap = new HashMap<>();
    List<IncomingMessageEnvelope> messageList = new ArrayList<>();
    messageList.add(envelope1);
    messageList.add(envelope2);
    messageList.add(envelope3);
    sspMap.put(ssp2, messageList);
    SystemConsumer mockConsumer = mock(SystemConsumer.class);
    when(mockConsumer.poll(anyObject(), anyLong())).thenReturn(sspMap);
    HashMap<String, SystemConsumer> systemConsumerMap = new HashMap<>();
    systemConsumerMap.put("system1", mockConsumer);
    SystemConsumers consumers = TestSystemConsumers.getSystemConsumers(systemConsumerMap);
    TaskName taskName1 = new TaskName("task1");
    TaskName taskName2 = new TaskName("task2");
    Set<TaskName> taskNames = new HashSet<>();
    taskNames.add(taskName1);
    taskNames.add(taskName2);
    OffsetManager offsetManager = mock(OffsetManager.class);
    when(offsetManager.getLastProcessedOffset(taskName1, ssp1)).thenReturn(Option.apply("3"));
    when(offsetManager.getLastProcessedOffset(taskName2, ssp2)).thenReturn(Option.apply("0"));
    when(offsetManager.getStartingOffset(taskName1, ssp1)).thenReturn(Option.apply(IncomingMessageEnvelope.END_OF_STREAM_OFFSET));
    when(offsetManager.getStartingOffset(taskName2, ssp2)).thenReturn(Option.apply("1"));
    TaskInstance taskInstance1 = createTaskInstance(mockStreamTask1, taskName1, ssp1, offsetManager, consumers);
    TaskInstance taskInstance2 = createTaskInstance(mockStreamTask2, taskName2, ssp2, offsetManager, consumers);
    Map<TaskName, TaskInstance> tasks = new HashMap<>();
    tasks.put(taskName1, taskInstance1);
    tasks.put(taskName2, taskInstance2);
    taskInstance1.registerConsumers();
    taskInstance2.registerConsumers();
    consumers.start();
    int maxMessagesInFlight = 1;
    AsyncRunLoop runLoop = new AsyncRunLoop(tasks, executor, consumers, maxMessagesInFlight, windowMs, commitMs, callbackTimeoutMs, maxThrottlingDelayMs, containerMetrics, () -> 0L, false);
    runLoop.run();
}
Also used : SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) SystemConsumer(org.apache.samza.system.SystemConsumer) TaskInstance(org.apache.samza.container.TaskInstance) SystemConsumers(org.apache.samza.system.SystemConsumers) TestSystemConsumers(org.apache.samza.system.TestSystemConsumers) HashMap(java.util.HashMap) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) OffsetManager(org.apache.samza.checkpoint.OffsetManager) ArrayList(java.util.ArrayList) TaskName(org.apache.samza.container.TaskName) ArrayList(java.util.ArrayList) List(java.util.List) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashSet(java.util.HashSet)

Example 32 with TaskName

use of org.apache.samza.container.TaskName in project samza by apache.

the class ChangelogPartitionManager method writeChangeLogPartitionMapping.

/**
   * Write the taskName to partition mapping that is being maintained by this ChangelogManager
   * @param changelogEntries The entries that needs to be written to the coordinator stream, the map takes the taskName
   *                       and it's corresponding changelog partition.
   */
public void writeChangeLogPartitionMapping(Map<TaskName, Integer> changelogEntries) {
    log.debug("Updating changelog information with: ");
    for (Map.Entry<TaskName, Integer> entry : changelogEntries.entrySet()) {
        log.debug("TaskName: {} to Partition: {}", entry.getKey().getTaskName(), entry.getValue());
        send(new SetChangelogMapping(getSource(), entry.getKey().getTaskName(), entry.getValue()));
    }
}
Also used : TaskName(org.apache.samza.container.TaskName) SetChangelogMapping(org.apache.samza.coordinator.stream.messages.SetChangelogMapping) Map(java.util.Map) HashMap(java.util.HashMap)

Example 33 with TaskName

use of org.apache.samza.container.TaskName in project samza by apache.

the class SamzaTaskProxy method getJobModel.

/**
   * Retrieves the jobModel from the jobCoordinator.
   * @param jobInstance the job instance (jobId, jobName).
   * @return the JobModel fetched from the coordinator stream.
   */
protected JobModel getJobModel(JobInstance jobInstance) {
    CoordinatorStreamSystemConsumer coordinatorSystemConsumer = null;
    CoordinatorStreamSystemProducer coordinatorSystemProducer = null;
    try {
        CoordinatorStreamSystemFactory coordinatorStreamSystemFactory = new CoordinatorStreamSystemFactory();
        Config coordinatorSystemConfig = getCoordinatorSystemConfig(jobInstance);
        LOG.info("Using config: {} to create coordinatorStream producer and consumer.", coordinatorSystemConfig);
        coordinatorSystemConsumer = coordinatorStreamSystemFactory.getCoordinatorStreamSystemConsumer(coordinatorSystemConfig, METRICS_REGISTRY);
        coordinatorSystemProducer = coordinatorStreamSystemFactory.getCoordinatorStreamSystemProducer(coordinatorSystemConfig, METRICS_REGISTRY);
        LOG.info("Registering coordinator system stream consumer.");
        coordinatorSystemConsumer.register();
        LOG.debug("Starting coordinator system stream consumer.");
        coordinatorSystemConsumer.start();
        LOG.debug("Bootstrapping coordinator system stream consumer.");
        coordinatorSystemConsumer.bootstrap();
        LOG.info("Registering coordinator system stream producer.");
        coordinatorSystemProducer.register(SOURCE);
        Config config = coordinatorSystemConsumer.getConfig();
        LOG.info("Got config from coordinatorSystemConsumer: {}.", config);
        ChangelogPartitionManager changelogManager = new ChangelogPartitionManager(coordinatorSystemProducer, coordinatorSystemConsumer, SOURCE);
        changelogManager.start();
        LocalityManager localityManager = new LocalityManager(coordinatorSystemProducer, coordinatorSystemConsumer);
        localityManager.start();
        String jobCoordinatorSystemName = config.get(JobConfig.JOB_COORDINATOR_SYSTEM());
        /**
       * Select job coordinator system properties from config and instantiate SystemAdmin for it alone.
       * Instantiating SystemAdmin's for other input/output systems defined in config is unnecessary.
       */
        Config systemAdminConfig = config.subset(String.format("systems.%s", jobCoordinatorSystemName), false);
        scala.collection.immutable.Map<String, SystemAdmin> systemAdmins = JobModelManager.getSystemAdmins(systemAdminConfig);
        StreamMetadataCache streamMetadataCache = new StreamMetadataCache(systemAdmins, 0, SystemClock.instance());
        Map<TaskName, Integer> changeLogPartitionMapping = changelogManager.readChangeLogPartitionMapping();
        return JobModelManager.readJobModel(config, changeLogPartitionMapping, localityManager, streamMetadataCache, null);
    } finally {
        if (coordinatorSystemConsumer != null) {
            coordinatorSystemConsumer.stop();
        }
        if (coordinatorSystemProducer != null) {
            coordinatorSystemProducer.stop();
        }
    }
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) JobConfig(org.apache.samza.config.JobConfig) MapConfig(org.apache.samza.config.MapConfig) StorageConfig(org.apache.samza.config.StorageConfig) Config(org.apache.samza.config.Config) CoordinatorStreamSystemFactory(org.apache.samza.coordinator.stream.CoordinatorStreamSystemFactory) CoordinatorStreamSystemProducer(org.apache.samza.coordinator.stream.CoordinatorStreamSystemProducer) CoordinatorStreamSystemConsumer(org.apache.samza.coordinator.stream.CoordinatorStreamSystemConsumer) TaskName(org.apache.samza.container.TaskName) ChangelogPartitionManager(org.apache.samza.storage.ChangelogPartitionManager) SystemAdmin(org.apache.samza.system.SystemAdmin) LocalityManager(org.apache.samza.container.LocalityManager)

Example 34 with TaskName

use of org.apache.samza.container.TaskName in project samza by apache.

the class LocalStoreMonitor method monitor.

/**
   * This monitor method is invoked periodically to delete the stale state stores
   * of dead jobs/tasks.
   * @throws Exception if there was any problem in running the monitor.
   */
@Override
public void monitor() throws Exception {
    File localStoreDir = new File(config.getLocalStoreBaseDir());
    Preconditions.checkState(localStoreDir.isDirectory(), String.format("LocalStoreDir: %s is not a directory", localStoreDir.getAbsolutePath()));
    String localHostName = InetAddress.getLocalHost().getHostName();
    for (JobInstance jobInstance : getHostAffinityEnabledJobs(localStoreDir)) {
        File jobDir = new File(localStoreDir, String.format("%s-%s", jobInstance.getJobName(), jobInstance.getJobId()));
        try {
            JobStatus jobStatus = jobsClient.getJobStatus(jobInstance);
            for (Task task : jobsClient.getTasks(jobInstance)) {
                for (String storeName : jobDir.list(DirectoryFileFilter.DIRECTORY)) {
                    LOG.info("Job: {} has the running status: {} with preferred host: {}.", jobInstance, jobStatus, task.getPreferredHost());
                    /**
             *  A task store is active if all of the following conditions are true:
             *  a) If the store is amongst the active stores of the task.
             *  b) If the job has been started.
             *  c) If the preferred host of the task is the localhost on which the monitor is run.
             */
                    if (jobStatus.hasBeenStarted() && task.getStoreNames().contains(storeName) && task.getPreferredHost().equals(localHostName)) {
                        LOG.info(String.format("Store %s is actively used by the task: %s.", storeName, task.getTaskName()));
                    } else {
                        LOG.info(String.format("Store %s not used by the task: %s.", storeName, task.getTaskName()));
                        markSweepTaskStore(TaskStorageManager.getStorePartitionDir(jobDir, storeName, new TaskName(task.getTaskName())));
                    }
                }
            }
        } catch (Exception ex) {
            if (!config.getIgnoreFailures()) {
                throw ex;
            }
            LOG.warn("Config: {} turned on, failures will be ignored. Local store cleanup for job: {} resulted in exception: {}.", new Object[] { LocalStoreMonitorConfig.CONFIG_IGNORE_FAILURES, jobInstance, ex });
        }
    }
}
Also used : JobStatus(org.apache.samza.rest.model.JobStatus) Task(org.apache.samza.rest.model.Task) JobInstance(org.apache.samza.rest.proxy.job.JobInstance) TaskName(org.apache.samza.container.TaskName) File(java.io.File) IOException(java.io.IOException)

Example 35 with TaskName

use of org.apache.samza.container.TaskName in project samza by apache.

the class GroupByPartition method group.

@Override
public Map<TaskName, Set<SystemStreamPartition>> group(Set<SystemStreamPartition> ssps) {
    Map<TaskName, Set<SystemStreamPartition>> groupedMap = new HashMap<TaskName, Set<SystemStreamPartition>>();
    for (SystemStreamPartition ssp : ssps) {
        // skip the broadcast streams if there is any
        if (broadcastStreams.contains(ssp)) {
            continue;
        }
        TaskName taskName = new TaskName("Partition " + ssp.getPartition().getPartitionId());
        if (!groupedMap.containsKey(taskName)) {
            groupedMap.put(taskName, new HashSet<SystemStreamPartition>());
        }
        groupedMap.get(taskName).add(ssp);
    }
    // assign the broadcast streams to all the taskNames
    if (!broadcastStreams.isEmpty()) {
        for (Set<SystemStreamPartition> value : groupedMap.values()) {
            for (SystemStreamPartition ssp : broadcastStreams) {
                value.add(ssp);
            }
        }
    }
    return groupedMap;
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) TaskName(org.apache.samza.container.TaskName) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Aggregations

TaskName (org.apache.samza.container.TaskName)38 HashMap (java.util.HashMap)30 TaskInstance (org.apache.samza.container.TaskInstance)13 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)13 CountDownLatch (java.util.concurrent.CountDownLatch)11 HashSet (java.util.HashSet)10 Test (org.junit.Test)10 ContainerModel (org.apache.samza.job.model.ContainerModel)9 TaskModel (org.apache.samza.job.model.TaskModel)8 Set (java.util.Set)7 MapConfig (org.apache.samza.config.MapConfig)7 Partition (org.apache.samza.Partition)6 Config (org.apache.samza.config.Config)6 IncomingMessageEnvelope (org.apache.samza.system.IncomingMessageEnvelope)6 JobModel (org.apache.samza.job.model.JobModel)5 Map (java.util.Map)3 JobModelManager (org.apache.samza.coordinator.JobModelManager)3 IOException (java.io.IOException)2 LocalityManager (org.apache.samza.container.LocalityManager)2 HttpServer (org.apache.samza.coordinator.server.HttpServer)2