Search in sources :

Example 1 with JsonSerdeV2

use of org.apache.samza.serializers.JsonSerdeV2 in project samza by apache.

the class TestJobGraphJsonGenerator method setUp.

@Before
public void setUp() {
    input1Spec = new StreamSpec("input1", "input1", "input-system");
    input2Spec = new StreamSpec("input2", "input2", "input-system");
    outputSpec = new StreamSpec("output", "output", "output-system");
    repartitionSpec = new StreamSpec("jobName-jobId-partition_by-p1", "partition_by-p1", "intermediate-system");
    defaultSerde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>());
    inputSystemDescriptor = new GenericSystemDescriptor("input-system", "mockSystemFactoryClassName");
    outputSystemDescriptor = new GenericSystemDescriptor("output-system", "mockSystemFactoryClassName");
    intermediateSystemDescriptor = new GenericSystemDescriptor("intermediate-system", "mockSystemFactoryClassName");
    input1Descriptor = inputSystemDescriptor.getInputDescriptor("input1", defaultSerde);
    input2Descriptor = inputSystemDescriptor.getInputDescriptor("input2", defaultSerde);
    outputDescriptor = outputSystemDescriptor.getOutputDescriptor("output", defaultSerde);
    table1Descriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table1", defaultSerde);
    table2Descriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table2", defaultSerde);
    Map<String, String> configs = new HashMap<>();
    configs.put(JobConfig.JOB_NAME, "jobName");
    configs.put(JobConfig.JOB_ID, "jobId");
    mockConfig = spy(new MapConfig(configs));
    mockJobNode = mock(JobNode.class);
    StreamEdge input1Edge = new StreamEdge(input1Spec, false, false, mockConfig);
    StreamEdge input2Edge = new StreamEdge(input2Spec, false, false, mockConfig);
    StreamEdge outputEdge = new StreamEdge(outputSpec, false, false, mockConfig);
    StreamEdge repartitionEdge = new StreamEdge(repartitionSpec, true, false, mockConfig);
    Map<String, StreamEdge> inputEdges = new HashMap<>();
    inputEdges.put(input1Descriptor.getStreamId(), input1Edge);
    inputEdges.put(input2Descriptor.getStreamId(), input2Edge);
    inputEdges.put(repartitionSpec.getId(), repartitionEdge);
    Map<String, StreamEdge> outputEdges = new HashMap<>();
    outputEdges.put(outputDescriptor.getStreamId(), outputEdge);
    outputEdges.put(repartitionSpec.getId(), repartitionEdge);
    when(mockJobNode.getInEdges()).thenReturn(inputEdges);
    when(mockJobNode.getOutEdges()).thenReturn(outputEdges);
    when(mockJobNode.getConfig()).thenReturn(mockConfig);
    when(mockJobNode.getJobName()).thenReturn("jobName");
    when(mockJobNode.getJobId()).thenReturn("jobId");
    when(mockJobNode.getJobNameAndId()).thenReturn(JobNode.createJobNameAndId("jobName", "jobId"));
    Map<String, TableDescriptor> tables = new HashMap<>();
    tables.put(table1Descriptor.getTableId(), table1Descriptor);
    tables.put(table2Descriptor.getTableId(), table2Descriptor);
    when(mockJobNode.getTables()).thenReturn(tables);
}
Also used : StreamSpec(org.apache.samza.system.StreamSpec) StringSerde(org.apache.samza.serializers.StringSerde) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) HashMap(java.util.HashMap) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) MapConfig(org.apache.samza.config.MapConfig) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Before(org.junit.Before)

Example 2 with JsonSerdeV2

use of org.apache.samza.serializers.JsonSerdeV2 in project samza by apache.

the class ExecutionPlannerTestBase method setUp.

@Before
public void setUp() {
    defaultSerde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>());
    inputSystemDescriptor = new GenericSystemDescriptor("input-system", "mockSystemFactoryClassName");
    outputSystemDescriptor = new GenericSystemDescriptor("output-system", "mockSystemFactoryClassName");
    intermediateSystemDescriptor = new GenericSystemDescriptor("intermediate-system", "mockSystemFactoryClassName");
    input1Descriptor = inputSystemDescriptor.getInputDescriptor("input1", defaultSerde);
    input2Descriptor = inputSystemDescriptor.getInputDescriptor("input2", defaultSerde);
    outputDescriptor = outputSystemDescriptor.getOutputDescriptor("output", defaultSerde);
    intermediateInputDescriptor = intermediateSystemDescriptor.getInputDescriptor("jobName-jobId-partition_by-p1", defaultSerde).withPhysicalName("jobName-jobId-partition_by-p1");
    intermediateOutputDescriptor = intermediateSystemDescriptor.getOutputDescriptor("jobName-jobId-partition_by-p1", defaultSerde).withPhysicalName("jobName-jobId-partition_by-p1");
    broadcastInputDesriptor = intermediateSystemDescriptor.getInputDescriptor("jobName-jobId-broadcast-b1", defaultSerde).withPhysicalName("jobName-jobId-broadcast-b1");
    Map<String, String> configs = new HashMap<>();
    configs.put(JobConfig.JOB_NAME, "jobName");
    configs.put(JobConfig.JOB_ID, "jobId");
    configs.putAll(input1Descriptor.toConfig());
    configs.putAll(input2Descriptor.toConfig());
    configs.putAll(outputDescriptor.toConfig());
    configs.putAll(inputSystemDescriptor.toConfig());
    configs.putAll(outputSystemDescriptor.toConfig());
    configs.putAll(intermediateSystemDescriptor.toConfig());
    configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intermediateSystemDescriptor.getSystemName());
    mockConfig = spy(new MapConfig(configs));
    mockStreamAppDesc = new StreamApplicationDescriptorImpl(getRepartitionJoinStreamApplication(), mockConfig);
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) HashMap(java.util.HashMap) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) MapConfig(org.apache.samza.config.MapConfig) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Before(org.junit.Before)

Example 3 with JsonSerdeV2

use of org.apache.samza.serializers.JsonSerdeV2 in project samza by apache.

the class TestSchedulingApp method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    final JsonSerdeV2<PageView> serde = new JsonSerdeV2<>(PageView.class);
    KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("kafka");
    KafkaInputDescriptor<PageView> isd = ksd.getInputDescriptor(PAGE_VIEWS, serde);
    final MessageStream<PageView> pageViews = appDescriptor.getInputStream(isd);
    final MessageStream<PageView> output = pageViews.flatMap(new FlatmapScheduledFn());
    MessageStreamAssert.that("Output from scheduling function should container all complete messages", output, serde).containsInAnyOrder(Arrays.asList(new PageView("v1-complete", "p1", "u1"), new PageView("v2-complete", "p2", "u1"), new PageView("v3-complete", "p1", "u2"), new PageView("v4-complete", "p3", "u2")));
}
Also used : PageView(org.apache.samza.test.operator.data.PageView) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2)

Example 4 with JsonSerdeV2

use of org.apache.samza.serializers.JsonSerdeV2 in project samza by apache.

the class RepartitionJoinWindowApp method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    // offset.default = oldest required for tests since checkpoint topic is empty on start and messages are published
    // before the application is run
    Config config = appDescriptor.getConfig();
    String inputTopic1 = config.get(INPUT_TOPIC_1_CONFIG_KEY);
    String inputTopic2 = config.get(INPUT_TOPIC_2_CONFIG_KEY);
    String outputTopic = config.get(OUTPUT_TOPIC_CONFIG_KEY);
    KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
    KafkaInputDescriptor<PageView> id1 = ksd.getInputDescriptor(inputTopic1, new JsonSerdeV2<>(PageView.class));
    KafkaInputDescriptor<AdClick> id2 = ksd.getInputDescriptor(inputTopic2, new JsonSerdeV2<>(AdClick.class));
    MessageStream<PageView> pageViews = appDescriptor.getInputStream(id1);
    MessageStream<AdClick> adClicks = appDescriptor.getInputStream(id2);
    MessageStream<KV<String, PageView>> pageViewsRepartitionedByViewId = pageViews.partitionBy(PageView::getViewId, pv -> pv, new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(PageView.class)), "pageViewsByViewId");
    MessageStream<PageView> pageViewsRepartitionedByViewIdValueONly = pageViewsRepartitionedByViewId.map(KV::getValue);
    MessageStream<KV<String, AdClick>> adClicksRepartitionedByViewId = adClicks.partitionBy(AdClick::getViewId, ac -> ac, new KVSerde<>(new StringSerde(), new JsonSerdeV2<>(AdClick.class)), "adClicksByViewId");
    MessageStream<AdClick> adClicksRepartitionedByViewIdValueOnly = adClicksRepartitionedByViewId.map(KV::getValue);
    MessageStream<UserPageAdClick> userPageAdClicks = pageViewsRepartitionedByViewIdValueONly.join(adClicksRepartitionedByViewIdValueOnly, new UserPageViewAdClicksJoiner(), new StringSerde(), new JsonSerdeV2<>(PageView.class), new JsonSerdeV2<>(AdClick.class), Duration.ofMinutes(1), "pageViewAdClickJoin");
    MessageStream<KV<String, UserPageAdClick>> userPageAdClicksByUserId = userPageAdClicks.partitionBy(UserPageAdClick::getUserId, upac -> upac, KVSerde.of(new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userPageAdClicksByUserId");
    userPageAdClicksByUserId.map(KV::getValue).window(Windows.keyedSessionWindow(UserPageAdClick::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(UserPageAdClick.class)), "userAdClickWindow").map(windowPane -> KV.of(windowPane.getKey().getKey(), String.valueOf(windowPane.getMessage().size()))).sink((message, messageCollector, taskCoordinator) -> {
        taskCoordinator.commit(TaskCoordinator.RequestScope.ALL_TASKS_IN_CONTAINER);
        messageCollector.send(new OutgoingMessageEnvelope(new SystemStream("kafka", outputTopic), null, message.getKey(), message.getValue()));
    });
    intermediateStreamIds.add(((IntermediateMessageStreamImpl) pageViewsRepartitionedByViewId).getStreamId());
    intermediateStreamIds.add(((IntermediateMessageStreamImpl) adClicksRepartitionedByViewId).getStreamId());
    intermediateStreamIds.add(((IntermediateMessageStreamImpl) userPageAdClicksByUserId).getStreamId());
}
Also used : Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) AdClick(org.apache.samza.test.operator.data.AdClick) UserPageAdClick(org.apache.samza.test.operator.data.UserPageAdClick) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) IntermediateMessageStreamImpl(org.apache.samza.operators.stream.IntermediateMessageStreamImpl) JoinFunction(org.apache.samza.operators.functions.JoinFunction) PageView(org.apache.samza.test.operator.data.PageView) TaskCoordinator(org.apache.samza.task.TaskCoordinator) ArrayList(java.util.ArrayList) StringSerde(org.apache.samza.serializers.StringSerde) List(java.util.List) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) SystemStream(org.apache.samza.system.SystemStream) Duration(java.time.Duration) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) Config(org.apache.samza.config.Config) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) MessageStream(org.apache.samza.operators.MessageStream) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) Config(org.apache.samza.config.Config) SystemStream(org.apache.samza.system.SystemStream) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) AdClick(org.apache.samza.test.operator.data.AdClick) UserPageAdClick(org.apache.samza.test.operator.data.UserPageAdClick) UserPageAdClick(org.apache.samza.test.operator.data.UserPageAdClick) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope)

Example 5 with JsonSerdeV2

use of org.apache.samza.serializers.JsonSerdeV2 in project samza by apache.

the class SessionWindowApp method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
    KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
    KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
    KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
    KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);
    MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
    OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);
    pageViews.filter(m -> !FILTER_KEY.equals(m.getUserId())).window(Windows.keyedSessionWindow(PageView::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(PageView.class)), "sessionWindow").map(m -> KV.of(m.getKey().getKey(), m.getMessage().size())).sendTo(outputStream);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) CommandLine(org.apache.samza.util.CommandLine) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) Duration(java.time.Duration) Config(org.apache.samza.config.Config) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) OutputStream(org.apache.samza.operators.OutputStream) IntegerSerde(org.apache.samza.serializers.IntegerSerde) MessageStream(org.apache.samza.operators.MessageStream) PageView(org.apache.samza.test.operator.data.PageView) StringSerde(org.apache.samza.serializers.StringSerde) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) IntegerSerde(org.apache.samza.serializers.IntegerSerde)

Aggregations

JsonSerdeV2 (org.apache.samza.serializers.JsonSerdeV2)17 Config (org.apache.samza.config.Config)13 StringSerde (org.apache.samza.serializers.StringSerde)13 KafkaSystemDescriptor (org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor)13 StreamApplication (org.apache.samza.application.StreamApplication)11 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)11 KV (org.apache.samza.operators.KV)11 KVSerde (org.apache.samza.serializers.KVSerde)11 KafkaInputDescriptor (org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor)11 MessageStream (org.apache.samza.operators.MessageStream)10 ApplicationRunner (org.apache.samza.runtime.ApplicationRunner)10 ApplicationRunners (org.apache.samza.runtime.ApplicationRunners)10 KafkaOutputDescriptor (org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor)10 CommandLine (org.apache.samza.util.CommandLine)10 Duration (java.time.Duration)9 OutputStream (org.apache.samza.operators.OutputStream)8 Windows (org.apache.samza.operators.windows.Windows)8 PageViewEvent (org.apache.samza.example.models.PageViewEvent)6 HashMap (java.util.HashMap)4 WindowPane (org.apache.samza.operators.windows.WindowPane)4