Search in sources :

Example 86 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestJoinOperator method getTestJoinStreamGraph.

private StreamApplicationDescriptorImpl getTestJoinStreamGraph(TestJoinFunction joinFn) throws IOException {
    Map<String, String> mapConfig = new HashMap<>();
    mapConfig.put("job.name", "jobName");
    mapConfig.put("job.id", "jobId");
    StreamTestUtils.addStreamConfigs(mapConfig, "inStream", "insystem", "instream");
    StreamTestUtils.addStreamConfigs(mapConfig, "inStream2", "insystem", "instream2");
    Config config = new MapConfig(mapConfig);
    return new StreamApplicationDescriptorImpl(appDesc -> {
        IntegerSerde integerSerde = new IntegerSerde();
        KVSerde<Integer, Integer> kvSerde = KVSerde.of(integerSerde, integerSerde);
        GenericSystemDescriptor sd = new GenericSystemDescriptor("insystem", "mockFactoryClassName");
        GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor1 = sd.getInputDescriptor("inStream", kvSerde);
        GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor2 = sd.getInputDescriptor("inStream2", kvSerde);
        MessageStream<KV<Integer, Integer>> inStream = appDesc.getInputStream(inputDescriptor1);
        MessageStream<KV<Integer, Integer>> inStream2 = appDesc.getInputStream(inputDescriptor2);
        inStream.join(inStream2, joinFn, integerSerde, kvSerde, kvSerde, JOIN_TTL, "j1").sink((message, messageCollector, taskCoordinator) -> {
            SystemStream outputSystemStream = new SystemStream("outputSystem", "outputStream");
            messageCollector.send(new OutgoingMessageEnvelope(outputSystemStream, message));
        });
    }, config);
}
Also used : HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) SystemStream(org.apache.samza.system.SystemStream) IntegerSerde(org.apache.samza.serializers.IntegerSerde) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) MapConfig(org.apache.samza.config.MapConfig) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor)

Example 87 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestControlMessageSender method testSend.

@Test
public void testSend() {
    SystemStreamMetadata metadata = mock(SystemStreamMetadata.class);
    Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> partitionMetadata = new HashMap<>();
    partitionMetadata.put(new Partition(0), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
    partitionMetadata.put(new Partition(1), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
    partitionMetadata.put(new Partition(2), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
    partitionMetadata.put(new Partition(3), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
    when(metadata.getSystemStreamPartitionMetadata()).thenReturn(partitionMetadata);
    StreamMetadataCache metadataCache = mock(StreamMetadataCache.class);
    when(metadataCache.getSystemStreamMetadata(anyObject(), anyBoolean())).thenReturn(metadata);
    SystemStream systemStream = new SystemStream("test-system", "test-stream");
    Set<Integer> partitions = new HashSet<>();
    MessageCollector collector = mock(MessageCollector.class);
    doAnswer(invocation -> {
        OutgoingMessageEnvelope envelope = (OutgoingMessageEnvelope) invocation.getArguments()[0];
        partitions.add((Integer) envelope.getPartitionKey());
        assertEquals(envelope.getSystemStream(), systemStream);
        return null;
    }).when(collector).send(any());
    ControlMessageSender sender = new ControlMessageSender(metadataCache);
    WatermarkMessage watermark = new WatermarkMessage(System.currentTimeMillis(), "task 0");
    sender.send(watermark, systemStream, collector);
    assertEquals(partitions.size(), 1);
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashMap(java.util.HashMap) SystemStream(org.apache.samza.system.SystemStream) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) WatermarkMessage(org.apache.samza.system.WatermarkMessage) MessageCollector(org.apache.samza.task.MessageCollector) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 88 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestControlMessageSender method testBroadcast.

@Test
public void testBroadcast() {
    SystemStreamMetadata metadata = mock(SystemStreamMetadata.class);
    Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> partitionMetadata = new HashMap<>();
    partitionMetadata.put(new Partition(0), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
    partitionMetadata.put(new Partition(1), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
    partitionMetadata.put(new Partition(2), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
    partitionMetadata.put(new Partition(3), mock(SystemStreamMetadata.SystemStreamPartitionMetadata.class));
    when(metadata.getSystemStreamPartitionMetadata()).thenReturn(partitionMetadata);
    StreamMetadataCache metadataCache = mock(StreamMetadataCache.class);
    when(metadataCache.getSystemStreamMetadata(anyObject(), anyBoolean())).thenReturn(metadata);
    SystemStream systemStream = new SystemStream("test-system", "test-stream");
    Set<Integer> partitions = new HashSet<>();
    MessageCollector collector = mock(MessageCollector.class);
    doAnswer(invocation -> {
        OutgoingMessageEnvelope envelope = (OutgoingMessageEnvelope) invocation.getArguments()[0];
        partitions.add((Integer) envelope.getPartitionKey());
        assertEquals(envelope.getSystemStream(), systemStream);
        return null;
    }).when(collector).send(any());
    ControlMessageSender sender = new ControlMessageSender(metadataCache);
    WatermarkMessage watermark = new WatermarkMessage(System.currentTimeMillis(), "task 0");
    SystemStreamPartition ssp = new SystemStreamPartition(systemStream, new Partition(0));
    sender.broadcastToOtherPartitions(watermark, ssp, collector);
    assertEquals(partitions.size(), 3);
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashMap(java.util.HashMap) SystemStream(org.apache.samza.system.SystemStream) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) WatermarkMessage(org.apache.samza.system.WatermarkMessage) MessageCollector(org.apache.samza.task.MessageCollector) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) HashSet(java.util.HashSet) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 89 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class AzureJobCoordinator method getInputStreamPartitions.

/**
 * For each input stream specified in config, exactly determine its
 * partitions, returning a set of SystemStreamPartitions containing them all.
 */
private Set<SystemStreamPartition> getInputStreamPartitions() {
    TaskConfig taskConfig = new TaskConfig(config);
    scala.collection.immutable.Set<SystemStream> inputSystemStreams = JavaConverters.asScalaSetConverter(taskConfig.getInputStreams()).asScala().toSet();
    // Get the set of partitions for each SystemStream from the stream metadata
    Set<SystemStreamPartition> sspSet = JavaConverters.mapAsJavaMapConverter(streamMetadataCache.getStreamMetadata(inputSystemStreams, true)).asJava().entrySet().stream().flatMap(this::mapSSMToSSP).collect(Collectors.toSet());
    return sspSet;
}
Also used : SystemStream(org.apache.samza.system.SystemStream) TaskConfig(org.apache.samza.config.TaskConfig) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 90 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class ContainerStorageManager method getChangelogSystemStreams.

/**
 * For each standby task, we remove its changeLogSSPs from changelogSSP map and add it to the task's taskSideInputSSPs.
 * The task's sideInputManager will consume and restore these as well.
 *
 * @param containerModel the container's model
 * @param changelogSystemStreams the passed in set of changelogSystemStreams
 * @return A map of changeLogSSP to storeName across all tasks, assuming no two stores have the same changelogSSP
 */
private Map<String, SystemStream> getChangelogSystemStreams(ContainerModel containerModel, Map<String, SystemStream> changelogSystemStreams) {
    if (MapUtils.invertMap(changelogSystemStreams).size() != changelogSystemStreams.size()) {
        throw new SamzaException("Two stores cannot have the same changelog system-stream");
    }
    Map<SystemStreamPartition, String> changelogSSPToStore = new HashMap<>();
    changelogSystemStreams.forEach((storeName, systemStream) -> containerModel.getTasks().forEach((taskName, taskModel) -> changelogSSPToStore.put(new SystemStreamPartition(systemStream, taskModel.getChangelogPartition()), storeName)));
    getTasks(containerModel, TaskMode.Standby).forEach((taskName, taskModel) -> {
        taskSideInputStoreSSPs.putIfAbsent(taskName, new HashMap<>());
        changelogSystemStreams.forEach((storeName, systemStream) -> {
            SystemStreamPartition ssp = new SystemStreamPartition(systemStream, taskModel.getChangelogPartition());
            changelogSSPToStore.remove(ssp);
            taskSideInputStoreSSPs.get(taskName).put(storeName, Collections.singleton(ssp));
        });
    });
    // changelogSystemStreams correspond only to active tasks (since those of standby-tasks moved to sideInputs above)
    return MapUtils.invertMap(changelogSSPToStore).entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, x -> x.getValue().getSystemStream()));
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) SerdeUtils(org.apache.samza.table.utils.SerdeUtils) LoggerFactory(org.slf4j.LoggerFactory) TaskModel(org.apache.samza.job.model.TaskModel) Future(java.util.concurrent.Future) SystemConsumer(org.apache.samza.system.SystemConsumer) SamzaContainerMetrics(org.apache.samza.container.SamzaContainerMetrics) Map(java.util.Map) TaskInstanceCollector(org.apache.samza.task.TaskInstanceCollector) RoundRobinChooserFactory(org.apache.samza.system.chooser.RoundRobinChooserFactory) Path(java.nio.file.Path) StorageConfig(org.apache.samza.config.StorageConfig) RunLoopTask(org.apache.samza.container.RunLoopTask) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Collection(java.util.Collection) Set(java.util.Set) DefaultChooser(org.apache.samza.system.chooser.DefaultChooser) Checkpoint(org.apache.samza.checkpoint.Checkpoint) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) Optional(java.util.Optional) Config(org.apache.samza.config.Config) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) SystemAdmins(org.apache.samza.system.SystemAdmins) ScalaJavaUtil(org.apache.samza.util.ScalaJavaUtil) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) MessageChooser(org.apache.samza.system.chooser.MessageChooser) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Serde(org.apache.samza.serializers.Serde) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Function(java.util.function.Function) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Gauge(org.apache.samza.metrics.Gauge) ImmutableList(com.google.common.collect.ImmutableList) SerdeManager(org.apache.samza.serializers.SerdeManager) MessageCollector(org.apache.samza.task.MessageCollector) CheckpointManager(org.apache.samza.checkpoint.CheckpointManager) SystemStream(org.apache.samza.system.SystemStream) RunLoop(org.apache.samza.container.RunLoop) SystemConsumersMetrics(org.apache.samza.system.SystemConsumersMetrics) ExecutorService(java.util.concurrent.ExecutorService) MapUtils(org.apache.commons.collections4.MapUtils) JavaConversions(scala.collection.JavaConversions) TaskInstanceMetrics(org.apache.samza.container.TaskInstanceMetrics) Logger(org.slf4j.Logger) TaskConfig(org.apache.samza.config.TaskConfig) JobContext(org.apache.samza.context.JobContext) ContainerContext(org.apache.samza.context.ContainerContext) SystemFactory(org.apache.samza.system.SystemFactory) Clock(org.apache.samza.util.Clock) SystemConsumers(org.apache.samza.system.SystemConsumers) File(java.io.File) SamzaException(org.apache.samza.SamzaException) TimeUnit(java.util.concurrent.TimeUnit) TaskMode(org.apache.samza.job.model.TaskMode) Entry(org.apache.samza.storage.kv.Entry) ReflectionUtil(org.apache.samza.util.ReflectionUtil) ContainerModel(org.apache.samza.job.model.ContainerModel) VisibleForTesting(com.google.common.annotations.VisibleForTesting) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) Collections(java.util.Collections) HashMap(java.util.HashMap) SamzaException(org.apache.samza.SamzaException) Map(java.util.Map) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Aggregations

SystemStream (org.apache.samza.system.SystemStream)143 HashMap (java.util.HashMap)75 Test (org.junit.Test)74 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)72 Partition (org.apache.samza.Partition)58 Map (java.util.Map)55 TaskName (org.apache.samza.container.TaskName)52 MapConfig (org.apache.samza.config.MapConfig)49 Config (org.apache.samza.config.Config)46 SystemAdmin (org.apache.samza.system.SystemAdmin)42 SystemAdmins (org.apache.samza.system.SystemAdmins)40 TaskModel (org.apache.samza.job.model.TaskModel)39 Collections (java.util.Collections)37 Set (java.util.Set)37 TaskConfig (org.apache.samza.config.TaskConfig)37 Clock (org.apache.samza.util.Clock)36 File (java.io.File)35 ImmutableMap (com.google.common.collect.ImmutableMap)34 SystemStreamPartitionMetadata (org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata)33 TaskMode (org.apache.samza.job.model.TaskMode)32