Search in sources :

Example 76 with StreamApplicationDescriptorImpl

use of org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl in project samza by apache.

the class TestJoinOperator method joinNoMatch.

@Test
public void joinNoMatch() throws Exception {
    StreamApplicationDescriptorImpl streamAppDesc = this.getTestJoinStreamGraph(new TestJoinFunction());
    StreamOperatorTask sot = createStreamOperatorTask(new SystemClock(), streamAppDesc);
    List<Integer> output = new ArrayList<>();
    MessageCollector messageCollector = envelope -> output.add((Integer) envelope.getMessage());
    // push messages to first stream
    numbers.forEach(n -> sot.processAsync(new FirstStreamIME(n, n), messageCollector, taskCoordinator, taskCallback));
    // push messages to second stream with different keys
    numbers.forEach(n -> sot.processAsync(new SecondStreamIME(n + 100, n), messageCollector, taskCoordinator, taskCallback));
    assertTrue(output.isEmpty());
}
Also used : StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) TaskModel(org.apache.samza.job.model.TaskModel) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) ArrayList(java.util.ArrayList) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) Matchers.eq(org.mockito.Matchers.eq) MockContext(org.apache.samza.context.MockContext) Duration(java.time.Duration) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) IntegerSerde(org.apache.samza.serializers.IntegerSerde) TaskCallback(org.apache.samza.task.TaskCallback) ImmutableSet(com.google.common.collect.ImmutableSet) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Partition(org.apache.samza.Partition) Set(java.util.Set) Assert.assertTrue(org.junit.Assert.assertTrue) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) IOException(java.io.IOException) Mockito.times(org.mockito.Mockito.times) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) SamzaException(org.apache.samza.SamzaException) TaskCoordinator(org.apache.samza.task.TaskCoordinator) Mockito.verify(org.mockito.Mockito.verify) Context(org.apache.samza.context.Context) TimestampedValueSerde(org.apache.samza.operators.impl.store.TimestampedValueSerde) Matchers.any(org.mockito.Matchers.any) List(java.util.List) SystemClock(org.apache.samza.util.SystemClock) TestClock(org.apache.samza.testUtils.TestClock) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) TestInMemoryStore(org.apache.samza.operators.impl.store.TestInMemoryStore) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) Assert.assertEquals(org.junit.Assert.assertEquals) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Mockito.mock(org.mockito.Mockito.mock) SystemClock(org.apache.samza.util.SystemClock) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) MessageCollector(org.apache.samza.task.MessageCollector) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 77 with StreamApplicationDescriptorImpl

use of org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl in project samza by apache.

the class TestJoinOperator method join.

@Test
public void join() throws Exception {
    StreamApplicationDescriptorImpl streamAppDesc = this.getTestJoinStreamGraph(new TestJoinFunction());
    StreamOperatorTask sot = createStreamOperatorTask(new SystemClock(), streamAppDesc);
    List<Integer> output = new ArrayList<>();
    MessageCollector messageCollector = envelope -> output.add((Integer) envelope.getMessage());
    // push messages to first stream
    numbers.forEach(n -> sot.processAsync(new FirstStreamIME(n, n), messageCollector, taskCoordinator, taskCallback));
    // push messages to second stream with same keys
    numbers.forEach(n -> sot.processAsync(new SecondStreamIME(n, n), messageCollector, taskCoordinator, taskCallback));
    int outputSum = output.stream().reduce(0, (s, m) -> s + m);
    assertEquals(110, outputSum);
}
Also used : StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) TaskModel(org.apache.samza.job.model.TaskModel) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) ArrayList(java.util.ArrayList) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) Matchers.eq(org.mockito.Matchers.eq) MockContext(org.apache.samza.context.MockContext) Duration(java.time.Duration) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) IntegerSerde(org.apache.samza.serializers.IntegerSerde) TaskCallback(org.apache.samza.task.TaskCallback) ImmutableSet(com.google.common.collect.ImmutableSet) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Partition(org.apache.samza.Partition) Set(java.util.Set) Assert.assertTrue(org.junit.Assert.assertTrue) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) IOException(java.io.IOException) Mockito.times(org.mockito.Mockito.times) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) SamzaException(org.apache.samza.SamzaException) TaskCoordinator(org.apache.samza.task.TaskCoordinator) Mockito.verify(org.mockito.Mockito.verify) Context(org.apache.samza.context.Context) TimestampedValueSerde(org.apache.samza.operators.impl.store.TimestampedValueSerde) Matchers.any(org.mockito.Matchers.any) List(java.util.List) SystemClock(org.apache.samza.util.SystemClock) TestClock(org.apache.samza.testUtils.TestClock) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) TestInMemoryStore(org.apache.samza.operators.impl.store.TestInMemoryStore) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) Assert.assertEquals(org.junit.Assert.assertEquals) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Mockito.mock(org.mockito.Mockito.mock) SystemClock(org.apache.samza.util.SystemClock) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) MessageCollector(org.apache.samza.task.MessageCollector) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 78 with StreamApplicationDescriptorImpl

use of org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl in project samza by apache.

the class TestJoinOperator method joinNoMatchReverse.

@Test
public void joinNoMatchReverse() throws Exception {
    StreamApplicationDescriptorImpl streamAppDesc = this.getTestJoinStreamGraph(new TestJoinFunction());
    StreamOperatorTask sot = createStreamOperatorTask(new SystemClock(), streamAppDesc);
    List<Integer> output = new ArrayList<>();
    MessageCollector messageCollector = envelope -> output.add((Integer) envelope.getMessage());
    // push messages to second stream
    numbers.forEach(n -> sot.processAsync(new SecondStreamIME(n, n), messageCollector, taskCoordinator, taskCallback));
    // push messages to first stream with different keys
    numbers.forEach(n -> sot.processAsync(new FirstStreamIME(n + 100, n), messageCollector, taskCoordinator, taskCallback));
    assertTrue(output.isEmpty());
}
Also used : StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) TaskModel(org.apache.samza.job.model.TaskModel) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) ArrayList(java.util.ArrayList) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) Matchers.eq(org.mockito.Matchers.eq) MockContext(org.apache.samza.context.MockContext) Duration(java.time.Duration) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) IntegerSerde(org.apache.samza.serializers.IntegerSerde) TaskCallback(org.apache.samza.task.TaskCallback) ImmutableSet(com.google.common.collect.ImmutableSet) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Partition(org.apache.samza.Partition) Set(java.util.Set) Assert.assertTrue(org.junit.Assert.assertTrue) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) IOException(java.io.IOException) Mockito.times(org.mockito.Mockito.times) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) SamzaException(org.apache.samza.SamzaException) TaskCoordinator(org.apache.samza.task.TaskCoordinator) Mockito.verify(org.mockito.Mockito.verify) Context(org.apache.samza.context.Context) TimestampedValueSerde(org.apache.samza.operators.impl.store.TimestampedValueSerde) Matchers.any(org.mockito.Matchers.any) List(java.util.List) SystemClock(org.apache.samza.util.SystemClock) TestClock(org.apache.samza.testUtils.TestClock) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) TestInMemoryStore(org.apache.samza.operators.impl.store.TestInMemoryStore) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) Assert.assertEquals(org.junit.Assert.assertEquals) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Mockito.mock(org.mockito.Mockito.mock) SystemClock(org.apache.samza.util.SystemClock) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) MessageCollector(org.apache.samza.task.MessageCollector) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 79 with StreamApplicationDescriptorImpl

use of org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl in project samza by apache.

the class TestPartitionByOperatorSpec method testPartitionBy.

@Test
public void testPartitionBy() {
    MapFunction<Object, String> keyFn = m -> m.toString();
    MapFunction<Object, Object> valueFn = m -> m;
    KVSerde<Object, Object> partitionBySerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
    StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
        MessageStream inputStream = appDesc.getInputStream(testInputDescriptor);
        inputStream.partitionBy(keyFn, valueFn, partitionBySerde, testRepartitionedStreamName);
    }, getConfig());
    assertEquals(2, streamAppDesc.getInputOperators().size());
    Map<String, InputOperatorSpec> inputOpSpecs = streamAppDesc.getInputOperators();
    assertTrue(inputOpSpecs.keySet().contains(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName)));
    InputOperatorSpec inputOpSpec = inputOpSpecs.get(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
    assertEquals(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName), inputOpSpec.getStreamId());
    assertTrue(inputOpSpec.getKeySerde() instanceof NoOpSerde);
    assertTrue(inputOpSpec.getValueSerde() instanceof NoOpSerde);
    assertTrue(inputOpSpec.isKeyed());
    assertNull(inputOpSpec.getScheduledFn());
    assertNull(inputOpSpec.getWatermarkFn());
    InputOperatorSpec originInputSpec = inputOpSpecs.get(testInputDescriptor.getStreamId());
    assertTrue(originInputSpec.getRegisteredOperatorSpecs().toArray()[0] instanceof PartitionByOperatorSpec);
    PartitionByOperatorSpec reparOpSpec = (PartitionByOperatorSpec) originInputSpec.getRegisteredOperatorSpecs().toArray()[0];
    assertEquals(reparOpSpec.getOpId(), String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
    assertEquals(reparOpSpec.getKeyFunction(), keyFn);
    assertEquals(reparOpSpec.getValueFunction(), valueFn);
    assertEquals(reparOpSpec.getOutputStream().getStreamId(), reparOpSpec.getOpId());
    assertNull(reparOpSpec.getScheduledFn());
    assertNull(reparOpSpec.getWatermarkFn());
}
Also used : StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) ScheduledFunction(org.apache.samza.operators.functions.ScheduledFunction) Assert.assertNotNull(org.junit.Assert.assertNotNull) Collection(java.util.Collection) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) Assert.assertTrue(org.junit.Assert.assertTrue) HashMap(java.util.HashMap) Scheduler(org.apache.samza.operators.Scheduler) Serde(org.apache.samza.serializers.Serde) Test(org.junit.Test) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) OperatorSpecGraph(org.apache.samza.operators.OperatorSpecGraph) MapFunction(org.apache.samza.operators.functions.MapFunction) WatermarkFunction(org.apache.samza.operators.functions.WatermarkFunction) Assert.assertNull(org.junit.Assert.assertNull) Map(java.util.Map) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) MapConfig(org.apache.samza.config.MapConfig) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Assert.assertEquals(org.junit.Assert.assertEquals) MessageStream(org.apache.samza.operators.MessageStream) Mockito.mock(org.mockito.Mockito.mock) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) MessageStream(org.apache.samza.operators.MessageStream) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Test(org.junit.Test)

Example 80 with StreamApplicationDescriptorImpl

use of org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl in project samza by apache.

the class TestExecutionPlanner method createStreamGraphWithStreamTableJoin.

private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoin() {
    /**
     * Example stream-table join app. Expected partition counts of intermediate streams introduced
     * by partitionBy operations are enclosed in quotes.
     *
     *    input2 (16) -> partitionBy ("32") -> send-to-table t
     *
     *                                      join-table t —————
     *                                       |                |
     *    input1 (64) -> partitionBy ("32") _|                |
     *                                                       join -> output1 (8)
     *                                                        |
     *                                      input3 (32) ——————
     */
    return new StreamApplicationDescriptorImpl(appDesc -> {
        MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
        MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor);
        MessageStream<KV<Object, Object>> messageStream3 = appDesc.getInputStream(input3Descriptor);
        OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
        TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table-id", new KVSerde(new StringSerde(), new StringSerde()));
        Table table = appDesc.getTable(tableDescriptor);
        messageStream2.partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1").sendTo(table);
        messageStream1.partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p2").join(table, mock(StreamTableJoinFunction.class)).join(messageStream3, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2").sendTo(output1);
    }, config);
}
Also used : Arrays(java.util.Arrays) TaskApplicationDescriptorImpl(org.apache.samza.application.descriptors.TaskApplicationDescriptorImpl) LegacyTaskApplication(org.apache.samza.application.LegacyTaskApplication) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) Duration(java.time.Duration) Map(java.util.Map) SamzaApplication(org.apache.samza.application.SamzaApplication) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Mockito.doReturn(org.mockito.Mockito.doReturn) OutputDescriptor(org.apache.samza.system.descriptors.OutputDescriptor) Table(org.apache.samza.table.Table) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) Collection(java.util.Collection) Set(java.util.Set) Collectors(java.util.stream.Collectors) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) SystemAdmins(org.apache.samza.system.SystemAdmins) Mockito.mock(org.mockito.Mockito.mock) SystemDescriptor(org.apache.samza.system.descriptors.SystemDescriptor) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) Serde(org.apache.samza.serializers.Serde) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) StreamConfig(org.apache.samza.config.StreamConfig) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) ApplicationDescriptor(org.apache.samza.application.descriptors.ApplicationDescriptor) MessageStream(org.apache.samza.operators.MessageStream) Before(org.junit.Before) InputDescriptor(org.apache.samza.system.descriptors.InputDescriptor) Windows(org.apache.samza.operators.windows.Windows) TaskConfig(org.apache.samza.config.TaskConfig) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) StreamSpec(org.apache.samza.system.StreamSpec) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) SideInputsProcessor(org.apache.samza.storage.SideInputsProcessor) SamzaException(org.apache.samza.SamzaException) SystemAdmin(org.apache.samza.system.SystemAdmin) Assert(org.junit.Assert) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) KVSerde(org.apache.samza.serializers.KVSerde) StringSerde(org.apache.samza.serializers.StringSerde) Table(org.apache.samza.table.Table) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) KV(org.apache.samza.operators.KV) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor)

Aggregations

StreamApplicationDescriptorImpl (org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl)104 Test (org.junit.Test)93 MapConfig (org.apache.samza.config.MapConfig)67 Config (org.apache.samza.config.Config)62 StreamConfig (org.apache.samza.config.StreamConfig)39 HashMap (java.util.HashMap)36 Collection (java.util.Collection)32 KVSerde (org.apache.samza.serializers.KVSerde)31 JobConfig (org.apache.samza.config.JobConfig)27 GenericSystemDescriptor (org.apache.samza.system.descriptors.GenericSystemDescriptor)26 SamzaSqlApplicationConfig (org.apache.samza.sql.runner.SamzaSqlApplicationConfig)24 SamzaSqlQueryParser (org.apache.samza.sql.util.SamzaSqlQueryParser)23 SamzaSqlTestConfig (org.apache.samza.sql.util.SamzaSqlTestConfig)23 GenericInputDescriptor (org.apache.samza.system.descriptors.GenericInputDescriptor)23 Duration (java.time.Duration)20 Map (java.util.Map)20 JoinFunction (org.apache.samza.operators.functions.JoinFunction)20 Serde (org.apache.samza.serializers.Serde)20 StringSerde (org.apache.samza.serializers.StringSerde)20 IntegerSerde (org.apache.samza.serializers.IntegerSerde)19