Search in sources :

Example 1 with SupplierFunction

use of org.apache.samza.operators.functions.SupplierFunction in project samza by apache.

the class TestMessageStreamImpl method testWindowWithRelaxedTypes.

@Test
public void testWindowWithRelaxedTypes() throws Exception {
    StreamApplicationDescriptorImpl mockGraph = mock(StreamApplicationDescriptorImpl.class);
    OperatorSpec mockOpSpec = mock(OperatorSpec.class);
    MessageStream<TestInputMessageEnvelope> inputStream = new MessageStreamImpl<>(mockGraph, mockOpSpec);
    MapFunction<TestMessageEnvelope, String> keyExtractor = m -> m.getKey();
    FoldLeftFunction<TestMessageEnvelope, Integer> aggregator = (m, c) -> c + 1;
    SupplierFunction<Integer> initialValue = () -> 0;
    // should compile since TestMessageEnvelope (input for functions) is base class of TestInputMessageEnvelope (M)
    Window<TestInputMessageEnvelope, String, Integer> window = Windows.keyedTumblingWindow(keyExtractor, Duration.ofHours(1), initialValue, aggregator, null, mock(Serde.class));
    MessageStream<WindowPane<String, Integer>> windowedStream = inputStream.window(window, "w1");
    ArgumentCaptor<OperatorSpec> registeredOpCaptor = ArgumentCaptor.forClass(OperatorSpec.class);
    verify(mockOpSpec).registerNextOperatorSpec(registeredOpCaptor.capture());
    OperatorSpec<?, TestMessageEnvelope> registeredOpSpec = registeredOpCaptor.getValue();
    assertTrue(registeredOpSpec instanceof WindowOperatorSpec);
    assertEquals(OpCode.WINDOW, registeredOpSpec.getOpCode());
    assertEquals(window, ((WindowOperatorSpec) registeredOpSpec).getWindow());
}
Also used : StreamOperatorSpec(org.apache.samza.operators.spec.StreamOperatorSpec) PartitionByOperatorSpec(org.apache.samza.operators.spec.PartitionByOperatorSpec) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) IntermediateMessageStreamImpl(org.apache.samza.operators.stream.IntermediateMessageStreamImpl) JoinOperatorSpec(org.apache.samza.operators.spec.JoinOperatorSpec) Serde(org.apache.samza.serializers.Serde) SendToTableOperatorSpec(org.apache.samza.operators.spec.SendToTableOperatorSpec) MapFunction(org.apache.samza.operators.functions.MapFunction) OutputStreamImpl(org.apache.samza.operators.spec.OutputStreamImpl) WindowPane(org.apache.samza.operators.windows.WindowPane) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) SinkOperatorSpec(org.apache.samza.operators.spec.SinkOperatorSpec) Matchers.eq(org.mockito.Matchers.eq) Duration(java.time.Duration) Matchers.anyObject(org.mockito.Matchers.anyObject) TestOutputMessageEnvelope(org.apache.samza.operators.data.TestOutputMessageEnvelope) TestMessageEnvelope(org.apache.samza.operators.data.TestMessageEnvelope) OpCode(org.apache.samza.operators.spec.OperatorSpec.OpCode) FilterFunction(org.apache.samza.operators.functions.FilterFunction) Windows(org.apache.samza.operators.windows.Windows) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) Assert.assertNotNull(org.junit.Assert.assertNotNull) Collection(java.util.Collection) OutputOperatorSpec(org.apache.samza.operators.spec.OutputOperatorSpec) WindowOperatorSpec(org.apache.samza.operators.spec.WindowOperatorSpec) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) Mockito.times(org.mockito.Mockito.times) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) FlatMapFunction(org.apache.samza.operators.functions.FlatMapFunction) Mockito.verify(org.mockito.Mockito.verify) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) Window(org.apache.samza.operators.windows.Window) SinkFunction(org.apache.samza.operators.functions.SinkFunction) KVSerde(org.apache.samza.serializers.KVSerde) Collections(java.util.Collections) FoldLeftFunction(org.apache.samza.operators.functions.FoldLeftFunction) SupplierFunction(org.apache.samza.operators.functions.SupplierFunction) Assert.assertEquals(org.junit.Assert.assertEquals) Mockito.mock(org.mockito.Mockito.mock) Serde(org.apache.samza.serializers.Serde) KVSerde(org.apache.samza.serializers.KVSerde) IntermediateMessageStreamImpl(org.apache.samza.operators.stream.IntermediateMessageStreamImpl) WindowOperatorSpec(org.apache.samza.operators.spec.WindowOperatorSpec) WindowPane(org.apache.samza.operators.windows.WindowPane) StreamOperatorSpec(org.apache.samza.operators.spec.StreamOperatorSpec) PartitionByOperatorSpec(org.apache.samza.operators.spec.PartitionByOperatorSpec) JoinOperatorSpec(org.apache.samza.operators.spec.JoinOperatorSpec) SendToTableOperatorSpec(org.apache.samza.operators.spec.SendToTableOperatorSpec) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) SinkOperatorSpec(org.apache.samza.operators.spec.SinkOperatorSpec) OutputOperatorSpec(org.apache.samza.operators.spec.OutputOperatorSpec) WindowOperatorSpec(org.apache.samza.operators.spec.WindowOperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) TestMessageEnvelope(org.apache.samza.operators.data.TestMessageEnvelope) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) Test(org.junit.Test)

Example 2 with SupplierFunction

use of org.apache.samza.operators.functions.SupplierFunction in project samza by apache.

the class LogicalAggregateTranslator method translate.

void translate(final LogicalAggregate aggregate, final TranslatorContext context) {
    validateAggregateFunctions(aggregate);
    MessageStream<SamzaSqlRelMessage> inputStream = context.getMessageStream(aggregate.getInput().getId());
    // At this point, the assumption is that only count function is supported.
    SupplierFunction<Long> initialValue = () -> (long) 0;
    FoldLeftFunction<SamzaSqlRelMessage, Long> foldCountFn = (m, c) -> c + 1;
    final ArrayList<String> aggFieldNames = getAggFieldNames(aggregate);
    MessageStream<SamzaSqlRelMessage> outputStream = inputStream.map(new TranslatorInputMetricsMapFunction(logicalOpId)).window(Windows.keyedTumblingWindow(m -> m, Duration.ofMillis(context.getExecutionContext().getSamzaSqlApplicationConfig().getWindowDurationMs()), initialValue, foldCountFn, new SamzaSqlRelMessageSerdeFactory.SamzaSqlRelMessageSerde(), new LongSerde()).setAccumulationMode(AccumulationMode.DISCARDING), changeLogStorePrefix + "_tumblingWindow_" + logicalOpId).map(windowPane -> {
        List<String> fieldNames = windowPane.getKey().getKey().getSamzaSqlRelRecord().getFieldNames();
        List<Object> fieldValues = windowPane.getKey().getKey().getSamzaSqlRelRecord().getFieldValues();
        fieldNames.add(aggFieldNames.get(0));
        fieldValues.add(windowPane.getMessage());
        return new SamzaSqlRelMessage(fieldNames, fieldValues, new SamzaSqlRelMsgMetadata(0L, 0L));
    });
    context.registerMessageStream(aggregate.getId(), outputStream);
    outputStream.map(new TranslatorOutputMetricsMapFunction(logicalOpId));
}
Also used : SqlKind(org.apache.calcite.sql.SqlKind) Windows(org.apache.samza.operators.windows.Windows) Logger(org.slf4j.Logger) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) LoggerFactory(org.slf4j.LoggerFactory) SamzaException(org.apache.samza.SamzaException) ArrayList(java.util.ArrayList) List(java.util.List) LogicalAggregate(org.apache.calcite.rel.logical.LogicalAggregate) AccumulationMode(org.apache.samza.operators.windows.AccumulationMode) Duration(java.time.Duration) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) SamzaSqlRelMessageSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelMessageSerdeFactory) FoldLeftFunction(org.apache.samza.operators.functions.FoldLeftFunction) SupplierFunction(org.apache.samza.operators.functions.SupplierFunction) MessageStream(org.apache.samza.operators.MessageStream) LongSerde(org.apache.samza.serializers.LongSerde) LongSerde(org.apache.samza.serializers.LongSerde) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 3 with SupplierFunction

use of org.apache.samza.operators.functions.SupplierFunction in project samza by apache.

the class WindowExample method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
    KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));
    KafkaOutputDescriptor<Integer> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", new IntegerSerde());
    SupplierFunction<Integer> initialValue = () -> 0;
    FoldLeftFunction<PageViewEvent, Integer> counter = (m, c) -> c == null ? 1 : c + 1;
    MessageStream<PageViewEvent> inputStream = appDescriptor.getInputStream(inputStreamDescriptor);
    OutputStream<Integer> outputStream = appDescriptor.getOutputStream(outputStreamDescriptor);
    // create a tumbling window that outputs the number of message collected every 10 minutes.
    // also emit early results if either the number of messages collected reaches 30000, or if no new messages arrive
    // for 1 minute.
    inputStream.window(Windows.tumblingWindow(Duration.ofMinutes(10), initialValue, counter, new IntegerSerde()).setLateTrigger(Triggers.any(Triggers.count(30000), Triggers.timeSinceLastMessage(Duration.ofMinutes(1)))), "window").map(WindowPane::getMessage).sendTo(outputStream);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) CommandLine(org.apache.samza.util.CommandLine) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) PageViewEvent(org.apache.samza.example.models.PageViewEvent) Triggers(org.apache.samza.operators.triggers.Triggers) WindowPane(org.apache.samza.operators.windows.WindowPane) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) Duration(java.time.Duration) Config(org.apache.samza.config.Config) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) StreamApplication(org.apache.samza.application.StreamApplication) OutputStream(org.apache.samza.operators.OutputStream) FoldLeftFunction(org.apache.samza.operators.functions.FoldLeftFunction) IntegerSerde(org.apache.samza.serializers.IntegerSerde) SupplierFunction(org.apache.samza.operators.functions.SupplierFunction) MessageStream(org.apache.samza.operators.MessageStream) PageViewEvent(org.apache.samza.example.models.PageViewEvent) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) IntegerSerde(org.apache.samza.serializers.IntegerSerde)

Example 4 with SupplierFunction

use of org.apache.samza.operators.functions.SupplierFunction in project samza by apache.

the class PageViewCounterExample method describe.

@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
    KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
    KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));
    KafkaOutputDescriptor<KV<String, PageViewCount>> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewCount.class)));
    MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
    OutputStream<KV<String, PageViewCount>> pageViewEventPerMemberStream = appDescriptor.getOutputStream(outputStreamDescriptor);
    SupplierFunction<Integer> initialValue = () -> 0;
    FoldLeftFunction<PageViewEvent, Integer> foldLeftFn = (m, c) -> c + 1;
    pageViewEvents.window(Windows.keyedTumblingWindow(PageViewEvent::getMemberId, Duration.ofSeconds(10), initialValue, foldLeftFn, null, null).setEarlyTrigger(Triggers.repeat(Triggers.count(5))).setAccumulationMode(AccumulationMode.DISCARDING), "tumblingWindow").map(windowPane -> KV.of(windowPane.getKey().getKey(), buildPageViewCount(windowPane))).sendTo(pageViewEventPerMemberStream);
}
Also used : ApplicationRunner(org.apache.samza.runtime.ApplicationRunner) Windows(org.apache.samza.operators.windows.Windows) KafkaInputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor) CommandLine(org.apache.samza.util.CommandLine) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) PageViewEvent(org.apache.samza.example.models.PageViewEvent) Triggers(org.apache.samza.operators.triggers.Triggers) WindowPane(org.apache.samza.operators.windows.WindowPane) StringSerde(org.apache.samza.serializers.StringSerde) PageViewCount(org.apache.samza.example.models.PageViewCount) KafkaOutputDescriptor(org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) AccumulationMode(org.apache.samza.operators.windows.AccumulationMode) Duration(java.time.Duration) Config(org.apache.samza.config.Config) ApplicationRunners(org.apache.samza.runtime.ApplicationRunners) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplication(org.apache.samza.application.StreamApplication) KV(org.apache.samza.operators.KV) OutputStream(org.apache.samza.operators.OutputStream) FoldLeftFunction(org.apache.samza.operators.functions.FoldLeftFunction) SupplierFunction(org.apache.samza.operators.functions.SupplierFunction) MessageStream(org.apache.samza.operators.MessageStream) StringSerde(org.apache.samza.serializers.StringSerde) PageViewEvent(org.apache.samza.example.models.PageViewEvent) KV(org.apache.samza.operators.KV) KafkaSystemDescriptor(org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2)

Aggregations

Duration (java.time.Duration)4 FoldLeftFunction (org.apache.samza.operators.functions.FoldLeftFunction)4 SupplierFunction (org.apache.samza.operators.functions.SupplierFunction)4 Windows (org.apache.samza.operators.windows.Windows)4 MessageStream (org.apache.samza.operators.MessageStream)3 WindowPane (org.apache.samza.operators.windows.WindowPane)3 StreamApplication (org.apache.samza.application.StreamApplication)2 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)2 Config (org.apache.samza.config.Config)2 PageViewEvent (org.apache.samza.example.models.PageViewEvent)2 OutputStream (org.apache.samza.operators.OutputStream)2 Triggers (org.apache.samza.operators.triggers.Triggers)2 AccumulationMode (org.apache.samza.operators.windows.AccumulationMode)2 ApplicationRunner (org.apache.samza.runtime.ApplicationRunner)2 ApplicationRunners (org.apache.samza.runtime.ApplicationRunners)2 JsonSerdeV2 (org.apache.samza.serializers.JsonSerdeV2)2 KVSerde (org.apache.samza.serializers.KVSerde)2 KafkaInputDescriptor (org.apache.samza.system.kafka.descriptors.KafkaInputDescriptor)2 KafkaOutputDescriptor (org.apache.samza.system.kafka.descriptors.KafkaOutputDescriptor)2 KafkaSystemDescriptor (org.apache.samza.system.kafka.descriptors.KafkaSystemDescriptor)2