use of org.apache.samza.container.TaskName in project samza by apache.
the class TestAsyncRunLoop method testEndOfStreamOffsetManagement.
// TODO: Add assertions.
//@Test
public void testEndOfStreamOffsetManagement() throws Exception {
//explicitly configure to disable commits inside process or window calls and invoke commit from end of stream
TestTask mockStreamTask1 = new TestTask(true, false, false, null);
TestTask mockStreamTask2 = new TestTask(true, false, false, null);
Partition p1 = new Partition(1);
Partition p2 = new Partition(2);
SystemStreamPartition ssp1 = new SystemStreamPartition("system1", "stream1", p1);
SystemStreamPartition ssp2 = new SystemStreamPartition("system1", "stream2", p2);
IncomingMessageEnvelope envelope1 = new IncomingMessageEnvelope(ssp2, "1", "key1", "message1");
IncomingMessageEnvelope envelope2 = new IncomingMessageEnvelope(ssp2, "2", "key1", "message1");
IncomingMessageEnvelope envelope3 = IncomingMessageEnvelope.buildEndOfStreamEnvelope(ssp2);
Map<SystemStreamPartition, List<IncomingMessageEnvelope>> sspMap = new HashMap<>();
List<IncomingMessageEnvelope> messageList = new ArrayList<>();
messageList.add(envelope1);
messageList.add(envelope2);
messageList.add(envelope3);
sspMap.put(ssp2, messageList);
SystemConsumer mockConsumer = mock(SystemConsumer.class);
when(mockConsumer.poll(anyObject(), anyLong())).thenReturn(sspMap);
HashMap<String, SystemConsumer> systemConsumerMap = new HashMap<>();
systemConsumerMap.put("system1", mockConsumer);
SystemConsumers consumers = TestSystemConsumers.getSystemConsumers(systemConsumerMap);
TaskName taskName1 = new TaskName("task1");
TaskName taskName2 = new TaskName("task2");
Set<TaskName> taskNames = new HashSet<>();
taskNames.add(taskName1);
taskNames.add(taskName2);
OffsetManager offsetManager = mock(OffsetManager.class);
when(offsetManager.getLastProcessedOffset(taskName1, ssp1)).thenReturn(Option.apply("3"));
when(offsetManager.getLastProcessedOffset(taskName2, ssp2)).thenReturn(Option.apply("0"));
when(offsetManager.getStartingOffset(taskName1, ssp1)).thenReturn(Option.apply(IncomingMessageEnvelope.END_OF_STREAM_OFFSET));
when(offsetManager.getStartingOffset(taskName2, ssp2)).thenReturn(Option.apply("1"));
TaskInstance taskInstance1 = createTaskInstance(mockStreamTask1, taskName1, ssp1, offsetManager, consumers);
TaskInstance taskInstance2 = createTaskInstance(mockStreamTask2, taskName2, ssp2, offsetManager, consumers);
Map<TaskName, TaskInstance> tasks = new HashMap<>();
tasks.put(taskName1, taskInstance1);
tasks.put(taskName2, taskInstance2);
taskInstance1.registerConsumers();
taskInstance2.registerConsumers();
consumers.start();
int maxMessagesInFlight = 1;
AsyncRunLoop runLoop = new AsyncRunLoop(tasks, executor, consumers, maxMessagesInFlight, windowMs, commitMs, callbackTimeoutMs, maxThrottlingDelayMs, containerMetrics, () -> 0L, false);
runLoop.run();
}
use of org.apache.samza.container.TaskName in project samza by apache.
the class ChangelogPartitionManager method writeChangeLogPartitionMapping.
/**
* Write the taskName to partition mapping that is being maintained by this ChangelogManager
* @param changelogEntries The entries that needs to be written to the coordinator stream, the map takes the taskName
* and it's corresponding changelog partition.
*/
public void writeChangeLogPartitionMapping(Map<TaskName, Integer> changelogEntries) {
log.debug("Updating changelog information with: ");
for (Map.Entry<TaskName, Integer> entry : changelogEntries.entrySet()) {
log.debug("TaskName: {} to Partition: {}", entry.getKey().getTaskName(), entry.getValue());
send(new SetChangelogMapping(getSource(), entry.getKey().getTaskName(), entry.getValue()));
}
}
use of org.apache.samza.container.TaskName in project samza by apache.
the class SamzaTaskProxy method getJobModel.
/**
* Retrieves the jobModel from the jobCoordinator.
* @param jobInstance the job instance (jobId, jobName).
* @return the JobModel fetched from the coordinator stream.
*/
protected JobModel getJobModel(JobInstance jobInstance) {
CoordinatorStreamSystemConsumer coordinatorSystemConsumer = null;
CoordinatorStreamSystemProducer coordinatorSystemProducer = null;
try {
CoordinatorStreamSystemFactory coordinatorStreamSystemFactory = new CoordinatorStreamSystemFactory();
Config coordinatorSystemConfig = getCoordinatorSystemConfig(jobInstance);
LOG.info("Using config: {} to create coordinatorStream producer and consumer.", coordinatorSystemConfig);
coordinatorSystemConsumer = coordinatorStreamSystemFactory.getCoordinatorStreamSystemConsumer(coordinatorSystemConfig, METRICS_REGISTRY);
coordinatorSystemProducer = coordinatorStreamSystemFactory.getCoordinatorStreamSystemProducer(coordinatorSystemConfig, METRICS_REGISTRY);
LOG.info("Registering coordinator system stream consumer.");
coordinatorSystemConsumer.register();
LOG.debug("Starting coordinator system stream consumer.");
coordinatorSystemConsumer.start();
LOG.debug("Bootstrapping coordinator system stream consumer.");
coordinatorSystemConsumer.bootstrap();
LOG.info("Registering coordinator system stream producer.");
coordinatorSystemProducer.register(SOURCE);
Config config = coordinatorSystemConsumer.getConfig();
LOG.info("Got config from coordinatorSystemConsumer: {}.", config);
ChangelogPartitionManager changelogManager = new ChangelogPartitionManager(coordinatorSystemProducer, coordinatorSystemConsumer, SOURCE);
changelogManager.start();
LocalityManager localityManager = new LocalityManager(coordinatorSystemProducer, coordinatorSystemConsumer);
localityManager.start();
String jobCoordinatorSystemName = config.get(JobConfig.JOB_COORDINATOR_SYSTEM());
/**
* Select job coordinator system properties from config and instantiate SystemAdmin for it alone.
* Instantiating SystemAdmin's for other input/output systems defined in config is unnecessary.
*/
Config systemAdminConfig = config.subset(String.format("systems.%s", jobCoordinatorSystemName), false);
scala.collection.immutable.Map<String, SystemAdmin> systemAdmins = JobModelManager.getSystemAdmins(systemAdminConfig);
StreamMetadataCache streamMetadataCache = new StreamMetadataCache(systemAdmins, 0, SystemClock.instance());
Map<TaskName, Integer> changeLogPartitionMapping = changelogManager.readChangeLogPartitionMapping();
return JobModelManager.readJobModel(config, changeLogPartitionMapping, localityManager, streamMetadataCache, null);
} finally {
if (coordinatorSystemConsumer != null) {
coordinatorSystemConsumer.stop();
}
if (coordinatorSystemProducer != null) {
coordinatorSystemProducer.stop();
}
}
}
use of org.apache.samza.container.TaskName in project samza by apache.
the class LocalStoreMonitor method monitor.
/**
* This monitor method is invoked periodically to delete the stale state stores
* of dead jobs/tasks.
* @throws Exception if there was any problem in running the monitor.
*/
@Override
public void monitor() throws Exception {
File localStoreDir = new File(config.getLocalStoreBaseDir());
Preconditions.checkState(localStoreDir.isDirectory(), String.format("LocalStoreDir: %s is not a directory", localStoreDir.getAbsolutePath()));
String localHostName = InetAddress.getLocalHost().getHostName();
for (JobInstance jobInstance : getHostAffinityEnabledJobs(localStoreDir)) {
File jobDir = new File(localStoreDir, String.format("%s-%s", jobInstance.getJobName(), jobInstance.getJobId()));
try {
JobStatus jobStatus = jobsClient.getJobStatus(jobInstance);
for (Task task : jobsClient.getTasks(jobInstance)) {
for (String storeName : jobDir.list(DirectoryFileFilter.DIRECTORY)) {
LOG.info("Job: {} has the running status: {} with preferred host: {}.", jobInstance, jobStatus, task.getPreferredHost());
/**
* A task store is active if all of the following conditions are true:
* a) If the store is amongst the active stores of the task.
* b) If the job has been started.
* c) If the preferred host of the task is the localhost on which the monitor is run.
*/
if (jobStatus.hasBeenStarted() && task.getStoreNames().contains(storeName) && task.getPreferredHost().equals(localHostName)) {
LOG.info(String.format("Store %s is actively used by the task: %s.", storeName, task.getTaskName()));
} else {
LOG.info(String.format("Store %s not used by the task: %s.", storeName, task.getTaskName()));
markSweepTaskStore(TaskStorageManager.getStorePartitionDir(jobDir, storeName, new TaskName(task.getTaskName())));
}
}
}
} catch (Exception ex) {
if (!config.getIgnoreFailures()) {
throw ex;
}
LOG.warn("Config: {} turned on, failures will be ignored. Local store cleanup for job: {} resulted in exception: {}.", new Object[] { LocalStoreMonitorConfig.CONFIG_IGNORE_FAILURES, jobInstance, ex });
}
}
}
use of org.apache.samza.container.TaskName in project samza by apache.
the class GroupByPartition method group.
@Override
public Map<TaskName, Set<SystemStreamPartition>> group(Set<SystemStreamPartition> ssps) {
Map<TaskName, Set<SystemStreamPartition>> groupedMap = new HashMap<TaskName, Set<SystemStreamPartition>>();
for (SystemStreamPartition ssp : ssps) {
// skip the broadcast streams if there is any
if (broadcastStreams.contains(ssp)) {
continue;
}
TaskName taskName = new TaskName("Partition " + ssp.getPartition().getPartitionId());
if (!groupedMap.containsKey(taskName)) {
groupedMap.put(taskName, new HashSet<SystemStreamPartition>());
}
groupedMap.get(taskName).add(ssp);
}
// assign the broadcast streams to all the taskNames
if (!broadcastStreams.isEmpty()) {
for (Set<SystemStreamPartition> value : groupedMap.values()) {
for (SystemStreamPartition ssp : broadcastStreams) {
value.add(ssp);
}
}
}
return groupedMap;
}
Aggregations