Search in sources :

Example 1 with OutputStream

use of org.apache.samza.operators.OutputStream in project samza by apache.

the class TestExecutionPlanner method createSimpleGraph.

private StreamGraphImpl createSimpleGraph() {
    /**
     * a simple graph of partitionBy and map
     *
     * input1 -> partitionBy -> map -> output1
     *
     */
    StreamGraphImpl streamGraph = new StreamGraphImpl(runner, config);
    Function mockFn = mock(Function.class);
    OutputStream<Object, Object, Object> output1 = streamGraph.getOutputStream("output1", mockFn, mockFn);
    BiFunction mockBuilder = mock(BiFunction.class);
    streamGraph.getInputStream("input1", mockBuilder).partitionBy(m -> "yes!!!").map(m -> m).sendTo(output1);
    return streamGraph;
}
Also used : BiFunction(java.util.function.BiFunction) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) Function(java.util.function.Function) ArrayList(java.util.ArrayList) Duration(java.time.Duration) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) MessageStream(org.apache.samza.operators.MessageStream) Before(org.junit.Before) ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) Windows(org.apache.samza.operators.windows.Windows) TaskConfig(org.apache.samza.config.TaskConfig) Collection(java.util.Collection) Partition(org.apache.samza.Partition) Set(java.util.Set) Assert.assertTrue(org.junit.Assert.assertTrue) StreamSpec(org.apache.samza.system.StreamSpec) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) StreamGraphImpl(org.apache.samza.operators.StreamGraphImpl) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) SystemAdmin(org.apache.samza.system.SystemAdmin) Config(org.apache.samza.config.Config) OutputStream(org.apache.samza.operators.OutputStream) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) Mockito.mock(org.mockito.Mockito.mock) BiFunction(java.util.function.BiFunction) Function(java.util.function.Function) JoinFunction(org.apache.samza.operators.functions.JoinFunction) BiFunction(java.util.function.BiFunction) StreamGraphImpl(org.apache.samza.operators.StreamGraphImpl)

Example 2 with OutputStream

use of org.apache.samza.operators.OutputStream in project samza by apache.

the class TestJobGraphJsonGenerator method test.

@Test
public void test() throws Exception {
    /**
     * the graph looks like the following. number of partitions in parentheses. quotes indicate expected value.
     *
     *                               input1 (64) -> map -> join -> output1 (8)
     *                                                       |
     *          input2 (16) -> partitionBy ("64") -> filter -|
     *                                                       |
     * input3 (32) -> filter -> partitionBy ("64") -> map -> join -> output2 (16)
     *
     */
    Map<String, String> configMap = new HashMap<>();
    configMap.put(JobConfig.JOB_NAME(), "test-app");
    configMap.put(JobConfig.JOB_DEFAULT_SYSTEM(), "test-system");
    Config config = new MapConfig(configMap);
    StreamSpec input1 = new StreamSpec("input1", "input1", "system1");
    StreamSpec input2 = new StreamSpec("input2", "input2", "system2");
    StreamSpec input3 = new StreamSpec("input3", "input3", "system2");
    StreamSpec output1 = new StreamSpec("output1", "output1", "system1");
    StreamSpec output2 = new StreamSpec("output2", "output2", "system2");
    ApplicationRunner runner = mock(ApplicationRunner.class);
    when(runner.getStreamSpec("input1")).thenReturn(input1);
    when(runner.getStreamSpec("input2")).thenReturn(input2);
    when(runner.getStreamSpec("input3")).thenReturn(input3);
    when(runner.getStreamSpec("output1")).thenReturn(output1);
    when(runner.getStreamSpec("output2")).thenReturn(output2);
    // intermediate streams used in tests
    when(runner.getStreamSpec("test-app-1-partition_by-0")).thenReturn(new StreamSpec("test-app-1-partition_by-0", "test-app-1-partition_by-0", "default-system"));
    when(runner.getStreamSpec("test-app-1-partition_by-1")).thenReturn(new StreamSpec("test-app-1-partition_by-1", "test-app-1-partition_by-1", "default-system"));
    when(runner.getStreamSpec("test-app-1-partition_by-4")).thenReturn(new StreamSpec("test-app-1-partition_by-4", "test-app-1-partition_by-4", "default-system"));
    // set up external partition count
    Map<String, Integer> system1Map = new HashMap<>();
    system1Map.put("input1", 64);
    system1Map.put("output1", 8);
    Map<String, Integer> system2Map = new HashMap<>();
    system2Map.put("input2", 16);
    system2Map.put("input3", 32);
    system2Map.put("output2", 16);
    Map<String, SystemAdmin> systemAdmins = new HashMap<>();
    SystemAdmin systemAdmin1 = createSystemAdmin(system1Map);
    SystemAdmin systemAdmin2 = createSystemAdmin(system2Map);
    systemAdmins.put("system1", systemAdmin1);
    systemAdmins.put("system2", systemAdmin2);
    StreamManager streamManager = new StreamManager(systemAdmins);
    StreamGraphImpl streamGraph = new StreamGraphImpl(runner, config);
    BiFunction mockBuilder = mock(BiFunction.class);
    MessageStream m1 = streamGraph.getInputStream("input1", mockBuilder).map(m -> m);
    MessageStream m2 = streamGraph.getInputStream("input2", mockBuilder).partitionBy(m -> "haha").filter(m -> true);
    MessageStream m3 = streamGraph.getInputStream("input3", mockBuilder).filter(m -> true).partitionBy(m -> "hehe").map(m -> m);
    Function mockFn = mock(Function.class);
    OutputStream<Object, Object, Object> outputStream1 = streamGraph.getOutputStream("output1", mockFn, mockFn);
    OutputStream<Object, Object, Object> outputStream2 = streamGraph.getOutputStream("output2", mockFn, mockFn);
    m1.join(m2, mock(JoinFunction.class), Duration.ofHours(2)).sendTo(outputStream1);
    m2.sink((message, collector, coordinator) -> {
    });
    m3.join(m2, mock(JoinFunction.class), Duration.ofHours(1)).sendTo(outputStream2);
    ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
    ExecutionPlan plan = planner.plan(streamGraph);
    String json = plan.getPlanAsJson();
    System.out.println(json);
    // deserialize
    ObjectMapper mapper = new ObjectMapper();
    JobGraphJsonGenerator.JobGraphJson nodes = mapper.readValue(json, JobGraphJsonGenerator.JobGraphJson.class);
    assertTrue(nodes.jobs.get(0).operatorGraph.inputStreams.size() == 5);
    assertTrue(nodes.jobs.get(0).operatorGraph.operators.size() == 13);
    assertTrue(nodes.sourceStreams.size() == 3);
    assertTrue(nodes.sinkStreams.size() == 2);
    assertTrue(nodes.intermediateStreams.size() == 2);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) BiFunction(java.util.function.BiFunction) JobConfig(org.apache.samza.config.JobConfig) Assert.assertTrue(org.junit.Assert.assertTrue) HashMap(java.util.HashMap) StreamSpec(org.apache.samza.system.StreamSpec) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) Function(java.util.function.Function) TestExecutionPlanner.createSystemAdmin(org.apache.samza.execution.TestExecutionPlanner.createSystemAdmin) StreamGraphImpl(org.apache.samza.operators.StreamGraphImpl) Duration(java.time.Duration) Map(java.util.Map) SystemAdmin(org.apache.samza.system.SystemAdmin) Config(org.apache.samza.config.Config) MapConfig(org.apache.samza.config.MapConfig) OutputStream(org.apache.samza.operators.OutputStream) ObjectMapper(org.codehaus.jackson.map.ObjectMapper) MessageStream(org.apache.samza.operators.MessageStream) Mockito.mock(org.mockito.Mockito.mock) StreamSpec(org.apache.samza.system.StreamSpec) HashMap(java.util.HashMap) JobConfig(org.apache.samza.config.JobConfig) Config(org.apache.samza.config.Config) MapConfig(org.apache.samza.config.MapConfig) BiFunction(java.util.function.BiFunction) JoinFunction(org.apache.samza.operators.functions.JoinFunction) Function(java.util.function.Function) ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) BiFunction(java.util.function.BiFunction) MessageStream(org.apache.samza.operators.MessageStream) StreamGraphImpl(org.apache.samza.operators.StreamGraphImpl) MapConfig(org.apache.samza.config.MapConfig) TestExecutionPlanner.createSystemAdmin(org.apache.samza.execution.TestExecutionPlanner.createSystemAdmin) SystemAdmin(org.apache.samza.system.SystemAdmin) ObjectMapper(org.codehaus.jackson.map.ObjectMapper) Test(org.junit.Test)

Example 3 with OutputStream

use of org.apache.samza.operators.OutputStream in project samza by apache.

the class TestOperatorImplGraph method testGetOutputToInputStreams.

@Test
public void testGetOutputToInputStreams() {
    String inputStreamId1 = "input1";
    String inputStreamId2 = "input2";
    String inputStreamId3 = "input3";
    String inputSystem = "input-system";
    String outputStreamId1 = "output1";
    String outputStreamId2 = "output2";
    String outputSystem = "output-system";
    String intStreamId1 = "test-app-1-partition_by-p1";
    String intStreamId2 = "test-app-1-partition_by-p2";
    String intSystem = "test-system";
    HashMap<String, String> configs = new HashMap<>();
    configs.put(JobConfig.JOB_NAME, "test-app");
    configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intSystem);
    StreamTestUtils.addStreamConfigs(configs, inputStreamId1, inputSystem, inputStreamId1);
    StreamTestUtils.addStreamConfigs(configs, inputStreamId2, inputSystem, inputStreamId2);
    StreamTestUtils.addStreamConfigs(configs, inputStreamId3, inputSystem, inputStreamId3);
    StreamTestUtils.addStreamConfigs(configs, outputStreamId1, outputSystem, outputStreamId1);
    StreamTestUtils.addStreamConfigs(configs, outputStreamId2, outputSystem, outputStreamId2);
    Config config = new MapConfig(configs);
    when(this.context.getJobContext().getConfig()).thenReturn(config);
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        GenericSystemDescriptor isd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
        GenericInputDescriptor inputDescriptor1 = isd.getInputDescriptor(inputStreamId1, mock(Serde.class));
        GenericInputDescriptor inputDescriptor2 = isd.getInputDescriptor(inputStreamId2, mock(Serde.class));
        GenericInputDescriptor inputDescriptor3 = isd.getInputDescriptor(inputStreamId3, mock(Serde.class));
        GenericSystemDescriptor osd = new GenericSystemDescriptor(outputSystem, "mockFactoryClass");
        GenericOutputDescriptor outputDescriptor1 = osd.getOutputDescriptor(outputStreamId1, mock(Serde.class));
        GenericOutputDescriptor outputDescriptor2 = osd.getOutputDescriptor(outputStreamId2, mock(Serde.class));
        MessageStream messageStream1 = appDesc.getInputStream(inputDescriptor1).map(m -> m);
        MessageStream messageStream2 = appDesc.getInputStream(inputDescriptor2).filter(m -> true);
        MessageStream messageStream3 = appDesc.getInputStream(inputDescriptor3).filter(m -> true).partitionBy(m -> "m", m -> m, mock(KVSerde.class), "p1").map(m -> m);
        OutputStream<Object> outputStream1 = appDesc.getOutputStream(outputDescriptor1);
        OutputStream<Object> outputStream2 = appDesc.getOutputStream(outputDescriptor2);
        messageStream1.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(2), "j1").partitionBy(m -> "m", m -> m, mock(KVSerde.class), "p2").sendTo(outputStream1);
        messageStream3.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2").sendTo(outputStream2);
    }, config);
    Multimap<SystemStream, SystemStream> outputToInput = OperatorImplGraph.getIntermediateToInputStreamsMap(graphSpec.getOperatorSpecGraph(), new StreamConfig(config));
    Collection<SystemStream> inputs = outputToInput.get(new SystemStream(intSystem, intStreamId2));
    assertEquals(inputs.size(), 2);
    assertTrue(inputs.contains(new SystemStream(inputSystem, inputStreamId1)));
    assertTrue(inputs.contains(new SystemStream(inputSystem, inputStreamId2)));
    inputs = outputToInput.get(new SystemStream(intSystem, intStreamId1));
    assertEquals(inputs.size(), 1);
    assertEquals(inputs.iterator().next(), new SystemStream(inputSystem, inputStreamId3));
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) KVSerde(org.apache.samza.serializers.KVSerde) Serde(org.apache.samza.serializers.Serde) IntegerSerde(org.apache.samza.serializers.IntegerSerde) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) BiFunction(java.util.function.BiFunction) Assert.assertNotSame(org.junit.Assert.assertNotSame) TaskModel(org.apache.samza.job.model.TaskModel) TimestampedValue(org.apache.samza.util.TimestampedValue) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) HashMultimap(com.google.common.collect.HashMultimap) Matchers.eq(org.mockito.Matchers.eq) After(org.junit.After) Duration(java.time.Duration) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Collection(java.util.Collection) Set(java.util.Set) Serializable(java.io.Serializable) Context(org.apache.samza.context.Context) List(java.util.List) SystemClock(org.apache.samza.util.SystemClock) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Mockito.mock(org.mockito.Mockito.mock) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) ClosableFunction(org.apache.samza.operators.functions.ClosableFunction) Serde(org.apache.samza.serializers.Serde) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Multimap(com.google.common.collect.Multimap) Function(java.util.function.Function) StreamConfig(org.apache.samza.config.StreamConfig) MapFunction(org.apache.samza.operators.functions.MapFunction) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) MockContext(org.apache.samza.context.MockContext) IntegerSerde(org.apache.samza.serializers.IntegerSerde) JobModel(org.apache.samza.job.model.JobModel) MessageStream(org.apache.samza.operators.MessageStream) Before(org.junit.Before) OpCode(org.apache.samza.operators.spec.OperatorSpec.OpCode) FilterFunction(org.apache.samza.operators.functions.FilterFunction) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) InitableFunction(org.apache.samza.operators.functions.InitableFunction) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) TaskCoordinator(org.apache.samza.task.TaskCoordinator) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskContextImpl(org.apache.samza.context.TaskContextImpl) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) HashMap(java.util.HashMap) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) StreamConfig(org.apache.samza.config.StreamConfig) SystemStream(org.apache.samza.system.SystemStream) StreamConfig(org.apache.samza.config.StreamConfig) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) JoinFunction(org.apache.samza.operators.functions.JoinFunction) MessageStream(org.apache.samza.operators.MessageStream) MapConfig(org.apache.samza.config.MapConfig) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Test(org.junit.Test)

Example 4 with OutputStream

use of org.apache.samza.operators.OutputStream in project samza by apache.

the class QueryTranslator method sendToOutputStream.

private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
    SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
    MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
    MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
    Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
    if (!tableDescriptor.isPresent()) {
        KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
        String systemName = sinkConfig.getSystemName();
        DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
        GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
        OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
        outputStream.sendTo(stm);
        // Process system events only if the output is a stream.
        if (sqlConfig.isProcessSystemEvents()) {
            for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
                MessageStream<KV<Object, Object>> systemEventStream = inputStream.filter(message -> message.getMetadata().isSystemMessage()).map(SamzaSqlInputMessage::getKeyAndMessageKV);
                systemEventStream.sendTo(stm);
            }
        }
    } else {
        Table outputTable = appDesc.getTable(tableDescriptor.get());
        if (outputTable == null) {
            String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
            throw new SamzaException(msg);
        }
        outputStream.sendTo(outputTable);
    }
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) TableScan(org.apache.calcite.rel.core.TableScan) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) LogicalFilter(org.apache.calcite.rel.logical.LogicalFilter) RelShuttleImpl(org.apache.calcite.rel.RelShuttleImpl) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) RelRoot(org.apache.calcite.rel.RelRoot) TaskContext(org.apache.samza.context.TaskContext) MapFunction(org.apache.samza.operators.functions.MapFunction) Counter(org.apache.samza.metrics.Counter) LogicalJoin(org.apache.calcite.rel.logical.LogicalJoin) QueryPlanner(org.apache.samza.sql.planner.QueryPlanner) ApplicationContainerContext(org.apache.samza.context.ApplicationContainerContext) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Map(java.util.Map) TableModify(org.apache.calcite.rel.core.TableModify) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) SamzaHistogram(org.apache.samza.metrics.SamzaHistogram) ExternalContext(org.apache.samza.context.ExternalContext) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) Logger(org.slf4j.Logger) LogicalProject(org.apache.calcite.rel.logical.LogicalProject) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) JobContext(org.apache.samza.context.JobContext) ContainerContext(org.apache.samza.context.ContainerContext) SamzaRelConverter(org.apache.samza.sql.interfaces.SamzaRelConverter) SamzaSqlExecutionContext(org.apache.samza.sql.data.SamzaSqlExecutionContext) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) RelNode(org.apache.calcite.rel.RelNode) SamzaException(org.apache.samza.SamzaException) ApplicationTaskContextFactory(org.apache.samza.context.ApplicationTaskContextFactory) SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) Context(org.apache.samza.context.Context) LogicalAggregate(org.apache.calcite.rel.logical.LogicalAggregate) Validate(org.apache.commons.lang3.Validate) SamzaSqlQueryParser(org.apache.samza.sql.util.SamzaSqlQueryParser) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) Optional(java.util.Optional) SamzaSqlApplicationContext(org.apache.samza.sql.runner.SamzaSqlApplicationContext) VisibleForTesting(com.google.common.annotations.VisibleForTesting) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) Table(org.apache.samza.table.Table) OutputStream(org.apache.samza.operators.OutputStream) KV(org.apache.samza.operators.KV) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) SamzaException(org.apache.samza.SamzaException) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 5 with OutputStream

use of org.apache.samza.operators.OutputStream in project samza by apache.

the class SessionWindowApp method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
    KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
    KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
    KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
    KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);
    MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
    OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);
    pageViews.filter(m -> !FILTER_KEY.equals(m.getUserId())).window(Windows.keyedSessionWindow(PageView::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(PageView.class)), "sessionWindow").map(m -> KV.of(m.getKey().getKey(), m.getMessage().size())).sendTo(outputStream);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) CommandLine(org.apache.samza.util.CommandLine) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) Duration(java.time.Duration) Config(org.apache.samza.config.Config) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) OutputStream(org.apache.samza.operators.OutputStream) IntegerSerde(org.apache.samza.serializers.IntegerSerde) MessageStream(org.apache.samza.operators.MessageStream) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) IntegerSerde(org.apache.samza.serializers.IntegerSerde)

Aggregations

MessageStream (org.apache.samza.operators.MessageStream)17 OutputStream (org.apache.samza.operators.OutputStream)17 Config (org.apache.samza.config.Config)16 Duration (java.time.Duration)14 KV (org.apache.samza.operators.KV)12 Windows (org.apache.samza.operators.windows.Windows)12 KVSerde (org.apache.samza.serializers.KVSerde)12 ApplicationRunner (org.apache.samza.runtime.ApplicationRunner)11 StringSerde (org.apache.samza.serializers.StringSerde)11 HashMap (java.util.HashMap)10 Map (java.util.Map)10 Collections (java.util.Collections)9 JobConfig (org.apache.samza.config.JobConfig)9 MapConfig (org.apache.samza.config.MapConfig)9 JoinFunction (org.apache.samza.operators.functions.JoinFunction)9 JsonSerdeV2 (org.apache.samza.serializers.JsonSerdeV2)9 Test (org.junit.Test)9 Collection (java.util.Collection)8 List (java.util.List)8 Set (java.util.Set)8