Search in sources :

Example 21 with StreamApplicationDescriptorImpl

use of org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl in project samza by apache.

the class TestJoinOperator method joinRetainsLatestMessageForKeyReverse.

@Test
public void joinRetainsLatestMessageForKeyReverse() throws Exception {
    StreamApplicationDescriptorImpl streamAppDesc = this.getTestJoinStreamGraph(new TestJoinFunction());
    StreamOperatorTask sot = createStreamOperatorTask(new SystemClock(), streamAppDesc);
    List<Integer> output = new ArrayList<>();
    MessageCollector messageCollector = envelope -> output.add((Integer) envelope.getMessage());
    // push messages to second stream
    numbers.forEach(n -> sot.processAsync(new SecondStreamIME(n, n), messageCollector, taskCoordinator, taskCallback));
    // push messages to second stream again with same keys but different values
    numbers.forEach(n -> sot.processAsync(new SecondStreamIME(n, 2 * n), messageCollector, taskCoordinator, taskCallback));
    // push messages to first stream with same key
    numbers.forEach(n -> sot.processAsync(new FirstStreamIME(n, n), messageCollector, taskCoordinator, taskCallback));
    int outputSum = output.stream().reduce(0, (s, m) -> s + m);
    // should use latest messages in the second stream
    assertEquals(165, outputSum);
}
Also used : StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) TaskModel(org.apache.samza.job.model.TaskModel) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) ArrayList(java.util.ArrayList) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) Matchers.eq(org.mockito.Matchers.eq) MockContext(org.apache.samza.context.MockContext) Duration(java.time.Duration) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) IntegerSerde(org.apache.samza.serializers.IntegerSerde) TaskCallback(org.apache.samza.task.TaskCallback) ImmutableSet(com.google.common.collect.ImmutableSet) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Partition(org.apache.samza.Partition) Set(java.util.Set) Assert.assertTrue(org.junit.Assert.assertTrue) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) IOException(java.io.IOException) Mockito.times(org.mockito.Mockito.times) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) SamzaException(org.apache.samza.SamzaException) TaskCoordinator(org.apache.samza.task.TaskCoordinator) Mockito.verify(org.mockito.Mockito.verify) Context(org.apache.samza.context.Context) TimestampedValueSerde(org.apache.samza.operators.impl.store.TimestampedValueSerde) Matchers.any(org.mockito.Matchers.any) List(java.util.List) SystemClock(org.apache.samza.util.SystemClock) TestClock(org.apache.samza.testUtils.TestClock) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) TestInMemoryStore(org.apache.samza.operators.impl.store.TestInMemoryStore) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) Assert.assertEquals(org.junit.Assert.assertEquals) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Mockito.mock(org.mockito.Mockito.mock) SystemClock(org.apache.samza.util.SystemClock) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) MessageCollector(org.apache.samza.task.MessageCollector) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 22 with StreamApplicationDescriptorImpl

use of org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl in project samza by apache.

the class TestJoinOperator method joinWithSelfThrowsException.

@Test(expected = SamzaException.class)
public void joinWithSelfThrowsException() throws Exception {
    Map<String, String> mapConfig = new HashMap<>();
    mapConfig.put("job.name", "jobName");
    mapConfig.put("job.id", "jobId");
    StreamTestUtils.addStreamConfigs(mapConfig, "inStream", "insystem", "instream");
    Config config = new MapConfig(mapConfig);
    StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
        IntegerSerde integerSerde = new IntegerSerde();
        KVSerde<Integer, Integer> kvSerde = KVSerde.of(integerSerde, integerSerde);
        GenericSystemDescriptor sd = new GenericSystemDescriptor("insystem", "mockFactoryClassName");
        GenericInputDescriptor<KV<Integer, Integer>> inputDescriptor = sd.getInputDescriptor("inStream", kvSerde);
        MessageStream<KV<Integer, Integer>> inStream = appDesc.getInputStream(inputDescriptor);
        inStream.join(inStream, new TestJoinFunction(), integerSerde, kvSerde, kvSerde, JOIN_TTL, "join");
    }, config);
    // should throw an exception
    createStreamOperatorTask(new SystemClock(), streamAppDesc);
}
Also used : SystemClock(org.apache.samza.util.SystemClock) HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) IntegerSerde(org.apache.samza.serializers.IntegerSerde) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) MapConfig(org.apache.samza.config.MapConfig) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Test(org.junit.Test)

Example 23 with StreamApplicationDescriptorImpl

use of org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl in project samza by apache.

the class TestJoinOperator method joinFnInitAndClose.

@Test
public void joinFnInitAndClose() throws Exception {
    TestJoinFunction joinFn = new TestJoinFunction();
    StreamApplicationDescriptorImpl streamAppDesc = this.getTestJoinStreamGraph(joinFn);
    StreamOperatorTask sot = createStreamOperatorTask(new SystemClock(), streamAppDesc);
    MessageCollector messageCollector = mock(MessageCollector.class);
    // push messages to first stream
    numbers.forEach(n -> sot.processAsync(new FirstStreamIME(n, n), messageCollector, taskCoordinator, taskCallback));
    // close should not be called till now
    sot.close();
    verify(messageCollector, times(0)).send(any(OutgoingMessageEnvelope.class));
    // Make sure the joinFn has been copied instead of directly referred by the task instance
    assertEquals(0, joinFn.getNumInitCalls());
    assertEquals(0, joinFn.getNumCloseCalls());
}
Also used : SystemClock(org.apache.samza.util.SystemClock) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) MessageCollector(org.apache.samza.task.MessageCollector) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) Test(org.junit.Test)

Example 24 with StreamApplicationDescriptorImpl

use of org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl in project samza by apache.

the class TestJoinOperator method joinRetainsMatchedMessagesReverse.

@Test
public void joinRetainsMatchedMessagesReverse() throws Exception {
    StreamApplicationDescriptorImpl streamAppDesc = this.getTestJoinStreamGraph(new TestJoinFunction());
    StreamOperatorTask sot = createStreamOperatorTask(new SystemClock(), streamAppDesc);
    List<Integer> output = new ArrayList<>();
    MessageCollector messageCollector = envelope -> output.add((Integer) envelope.getMessage());
    // push messages to first stream
    numbers.forEach(n -> sot.processAsync(new FirstStreamIME(n, n), messageCollector, taskCoordinator, taskCallback));
    // push messages to second stream with same key
    numbers.forEach(n -> sot.processAsync(new SecondStreamIME(n, n), messageCollector, taskCoordinator, taskCallback));
    int outputSum = output.stream().reduce(0, (s, m) -> s + m);
    assertEquals(110, outputSum);
    output.clear();
    // push messages to second stream with same keys once again.
    numbers.forEach(n -> sot.processAsync(new SecondStreamIME(n, n), messageCollector, taskCoordinator, taskCallback));
    int newOutputSum = output.stream().reduce(0, (s, m) -> s + m);
    // should produce the same output as before
    assertEquals(110, newOutputSum);
}
Also used : StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) TaskModel(org.apache.samza.job.model.TaskModel) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) ArrayList(java.util.ArrayList) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) Matchers.eq(org.mockito.Matchers.eq) MockContext(org.apache.samza.context.MockContext) Duration(java.time.Duration) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) IntegerSerde(org.apache.samza.serializers.IntegerSerde) TaskCallback(org.apache.samza.task.TaskCallback) ImmutableSet(com.google.common.collect.ImmutableSet) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Partition(org.apache.samza.Partition) Set(java.util.Set) Assert.assertTrue(org.junit.Assert.assertTrue) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) IOException(java.io.IOException) Mockito.times(org.mockito.Mockito.times) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) SamzaException(org.apache.samza.SamzaException) TaskCoordinator(org.apache.samza.task.TaskCoordinator) Mockito.verify(org.mockito.Mockito.verify) Context(org.apache.samza.context.Context) TimestampedValueSerde(org.apache.samza.operators.impl.store.TimestampedValueSerde) Matchers.any(org.mockito.Matchers.any) List(java.util.List) SystemClock(org.apache.samza.util.SystemClock) TestClock(org.apache.samza.testUtils.TestClock) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) TestInMemoryStore(org.apache.samza.operators.impl.store.TestInMemoryStore) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) Assert.assertEquals(org.junit.Assert.assertEquals) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Mockito.mock(org.mockito.Mockito.mock) SystemClock(org.apache.samza.util.SystemClock) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) MessageCollector(org.apache.samza.task.MessageCollector) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 25 with StreamApplicationDescriptorImpl

use of org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl in project samza by apache.

the class TestJoinOperator method joinRemovesExpiredMessagesReverse.

@Test
public void joinRemovesExpiredMessagesReverse() throws Exception {
    TestClock testClock = new TestClock();
    StreamApplicationDescriptorImpl streamAppDesc = this.getTestJoinStreamGraph(new TestJoinFunction());
    StreamOperatorTask sot = createStreamOperatorTask(testClock, streamAppDesc);
    List<Integer> output = new ArrayList<>();
    MessageCollector messageCollector = envelope -> output.add((Integer) envelope.getMessage());
    // push messages to second stream
    numbers.forEach(n -> sot.processAsync(new SecondStreamIME(n, n), messageCollector, taskCoordinator, taskCallback));
    // 1 minute after ttl
    testClock.advanceTime(JOIN_TTL.plus(Duration.ofMinutes(1)));
    // should expire second stream messages
    sot.window(messageCollector, taskCoordinator);
    // push messages to first stream with same key
    numbers.forEach(n -> sot.processAsync(new FirstStreamIME(n, n), messageCollector, taskCoordinator, taskCallback));
    assertTrue(output.isEmpty());
}
Also used : TestClock(org.apache.samza.testUtils.TestClock) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) TaskModel(org.apache.samza.job.model.TaskModel) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) ArrayList(java.util.ArrayList) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) Matchers.eq(org.mockito.Matchers.eq) MockContext(org.apache.samza.context.MockContext) Duration(java.time.Duration) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) IntegerSerde(org.apache.samza.serializers.IntegerSerde) TaskCallback(org.apache.samza.task.TaskCallback) ImmutableSet(com.google.common.collect.ImmutableSet) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Partition(org.apache.samza.Partition) Set(java.util.Set) Assert.assertTrue(org.junit.Assert.assertTrue) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) IOException(java.io.IOException) Mockito.times(org.mockito.Mockito.times) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) SamzaException(org.apache.samza.SamzaException) TaskCoordinator(org.apache.samza.task.TaskCoordinator) Mockito.verify(org.mockito.Mockito.verify) Context(org.apache.samza.context.Context) TimestampedValueSerde(org.apache.samza.operators.impl.store.TimestampedValueSerde) Matchers.any(org.mockito.Matchers.any) List(java.util.List) SystemClock(org.apache.samza.util.SystemClock) TestClock(org.apache.samza.testUtils.TestClock) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) TestInMemoryStore(org.apache.samza.operators.impl.store.TestInMemoryStore) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) Assert.assertEquals(org.junit.Assert.assertEquals) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Mockito.mock(org.mockito.Mockito.mock) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) StreamOperatorTask(org.apache.samza.task.StreamOperatorTask) MessageCollector(org.apache.samza.task.MessageCollector) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Aggregations

StreamApplicationDescriptorImpl (org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl)104 Test (org.junit.Test)93 MapConfig (org.apache.samza.config.MapConfig)67 Config (org.apache.samza.config.Config)62 StreamConfig (org.apache.samza.config.StreamConfig)39 HashMap (java.util.HashMap)36 Collection (java.util.Collection)32 KVSerde (org.apache.samza.serializers.KVSerde)31 JobConfig (org.apache.samza.config.JobConfig)27 GenericSystemDescriptor (org.apache.samza.system.descriptors.GenericSystemDescriptor)26 SamzaSqlApplicationConfig (org.apache.samza.sql.runner.SamzaSqlApplicationConfig)24 SamzaSqlQueryParser (org.apache.samza.sql.util.SamzaSqlQueryParser)23 SamzaSqlTestConfig (org.apache.samza.sql.util.SamzaSqlTestConfig)23 GenericInputDescriptor (org.apache.samza.system.descriptors.GenericInputDescriptor)23 Duration (java.time.Duration)20 Map (java.util.Map)20 JoinFunction (org.apache.samza.operators.functions.JoinFunction)20 Serde (org.apache.samza.serializers.Serde)20 StringSerde (org.apache.samza.serializers.StringSerde)20 IntegerSerde (org.apache.samza.serializers.IntegerSerde)19