Search in sources :

Example 81 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestEndOfStreamStates method testUpdate.

@Test
public void testUpdate() {
    SystemStream input = new SystemStream("system", "input");
    SystemStream intermediate = new SystemStream("system", "intermediate");
    Set<SystemStreamPartition> ssps = new HashSet<>();
    SystemStreamPartition inputPartition0 = new SystemStreamPartition(input, new Partition(0));
    SystemStreamPartition intPartition0 = new SystemStreamPartition(intermediate, new Partition(0));
    SystemStreamPartition intPartition1 = new SystemStreamPartition(intermediate, new Partition(1));
    ssps.add(inputPartition0);
    ssps.add(intPartition0);
    ssps.add(intPartition1);
    Map<SystemStream, Integer> producerCounts = new HashMap<>();
    producerCounts.put(intermediate, 2);
    EndOfStreamStates endOfStreamStates = new EndOfStreamStates(ssps, producerCounts);
    assertFalse(endOfStreamStates.isEndOfStream(input));
    assertFalse(endOfStreamStates.isEndOfStream(intermediate));
    assertFalse(endOfStreamStates.allEndOfStream());
    IncomingMessageEnvelope envelope = IncomingMessageEnvelope.buildEndOfStreamEnvelope(inputPartition0);
    endOfStreamStates.update((EndOfStreamMessage) envelope.getMessage(), envelope.getSystemStreamPartition());
    assertTrue(endOfStreamStates.isEndOfStream(input));
    assertFalse(endOfStreamStates.isEndOfStream(intermediate));
    assertFalse(endOfStreamStates.allEndOfStream());
    EndOfStreamMessage eos = new EndOfStreamMessage("task 0");
    endOfStreamStates.update(eos, intPartition0);
    endOfStreamStates.update(eos, intPartition1);
    assertFalse(endOfStreamStates.isEndOfStream(intermediate));
    assertFalse(endOfStreamStates.allEndOfStream());
    eos = new EndOfStreamMessage("task 1");
    endOfStreamStates.update(eos, intPartition0);
    endOfStreamStates.update(eos, intPartition1);
    assertTrue(endOfStreamStates.isEndOfStream(intermediate));
    assertTrue(endOfStreamStates.allEndOfStream());
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashMap(java.util.HashMap) SystemStream(org.apache.samza.system.SystemStream) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) EndOfStreamMessage(org.apache.samza.system.EndOfStreamMessage) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 82 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestOperatorImplGraph method testJoinChain.

@Test
public void testJoinChain() {
    String inputStreamId1 = "input1";
    String inputStreamId2 = "input2";
    String inputSystem = "input-system";
    String inputPhysicalName1 = "input-stream1";
    String inputPhysicalName2 = "input-stream2";
    HashMap<String, String> configs = new HashMap<>();
    configs.put(JobConfig.JOB_NAME, "jobName");
    configs.put(JobConfig.JOB_ID, "jobId");
    StreamTestUtils.addStreamConfigs(configs, inputStreamId1, inputSystem, inputPhysicalName1);
    StreamTestUtils.addStreamConfigs(configs, inputStreamId2, inputSystem, inputPhysicalName2);
    Config config = new MapConfig(configs);
    when(this.context.getJobContext().getConfig()).thenReturn(config);
    Integer joinKey = new Integer(1);
    Function<Object, Integer> keyFn = (Function & Serializable) m -> joinKey;
    JoinFunction testJoinFunction = new TestJoinFunction("jobName-jobId-join-j1", (BiFunction & Serializable) (m1, m2) -> KV.of(m1, m2), keyFn, keyFn);
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
        GenericInputDescriptor inputDescriptor1 = sd.getInputDescriptor(inputStreamId1, mock(Serde.class));
        GenericInputDescriptor inputDescriptor2 = sd.getInputDescriptor(inputStreamId2, mock(Serde.class));
        MessageStream<Object> inputStream1 = appDesc.getInputStream(inputDescriptor1);
        MessageStream<Object> inputStream2 = appDesc.getInputStream(inputDescriptor2);
        inputStream1.join(inputStream2, testJoinFunction, mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j1");
    }, config);
    TaskName mockTaskName = mock(TaskName.class);
    TaskModel taskModel = mock(TaskModel.class);
    when(taskModel.getTaskName()).thenReturn(mockTaskName);
    when(this.context.getTaskContext().getTaskModel()).thenReturn(taskModel);
    KeyValueStore mockLeftStore = mock(KeyValueStore.class);
    when(this.context.getTaskContext().getStore(eq("jobName-jobId-join-j1-L"))).thenReturn(mockLeftStore);
    KeyValueStore mockRightStore = mock(KeyValueStore.class);
    when(this.context.getTaskContext().getStore(eq("jobName-jobId-join-j1-R"))).thenReturn(mockRightStore);
    OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
    // verify that join function is initialized once.
    assertEquals(TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1").numInitCalled, 1);
    InputOperatorImpl inputOpImpl1 = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName1));
    InputOperatorImpl inputOpImpl2 = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName2));
    PartialJoinOperatorImpl leftPartialJoinOpImpl = (PartialJoinOperatorImpl) inputOpImpl1.registeredOperators.iterator().next();
    PartialJoinOperatorImpl rightPartialJoinOpImpl = (PartialJoinOperatorImpl) inputOpImpl2.registeredOperators.iterator().next();
    assertEquals(leftPartialJoinOpImpl.getOperatorSpec(), rightPartialJoinOpImpl.getOperatorSpec());
    assertNotSame(leftPartialJoinOpImpl, rightPartialJoinOpImpl);
    // verify that left partial join operator calls getFirstKey
    Object mockLeftMessage = mock(Object.class);
    long currentTimeMillis = System.currentTimeMillis();
    when(mockLeftStore.get(eq(joinKey))).thenReturn(new TimestampedValue<>(mockLeftMessage, currentTimeMillis));
    IncomingMessageEnvelope leftMessage = new IncomingMessageEnvelope(mock(SystemStreamPartition.class), "", "", mockLeftMessage);
    inputOpImpl1.onMessage(leftMessage, mock(MessageCollector.class), mock(TaskCoordinator.class));
    // verify that right partial join operator calls getSecondKey
    Object mockRightMessage = mock(Object.class);
    when(mockRightStore.get(eq(joinKey))).thenReturn(new TimestampedValue<>(mockRightMessage, currentTimeMillis));
    IncomingMessageEnvelope rightMessage = new IncomingMessageEnvelope(mock(SystemStreamPartition.class), "", "", mockRightMessage);
    inputOpImpl2.onMessage(rightMessage, mock(MessageCollector.class), mock(TaskCoordinator.class));
    // verify that the join function apply is called with the correct messages on match
    assertEquals(((TestJoinFunction) TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1")).joinResults.size(), 1);
    KV joinResult = (KV) ((TestJoinFunction) TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1")).joinResults.iterator().next();
    assertEquals(joinResult.getKey(), mockLeftMessage);
    assertEquals(joinResult.getValue(), mockRightMessage);
}
Also used : StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) BiFunction(java.util.function.BiFunction) Assert.assertNotSame(org.junit.Assert.assertNotSame) TaskModel(org.apache.samza.job.model.TaskModel) TimestampedValue(org.apache.samza.util.TimestampedValue) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) HashMultimap(com.google.common.collect.HashMultimap) Matchers.eq(org.mockito.Matchers.eq) After(org.junit.After) Duration(java.time.Duration) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Collection(java.util.Collection) Set(java.util.Set) Serializable(java.io.Serializable) Context(org.apache.samza.context.Context) List(java.util.List) SystemClock(org.apache.samza.util.SystemClock) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Mockito.mock(org.mockito.Mockito.mock) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) ClosableFunction(org.apache.samza.operators.functions.ClosableFunction) Serde(org.apache.samza.serializers.Serde) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Multimap(com.google.common.collect.Multimap) Function(java.util.function.Function) StreamConfig(org.apache.samza.config.StreamConfig) MapFunction(org.apache.samza.operators.functions.MapFunction) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) MockContext(org.apache.samza.context.MockContext) IntegerSerde(org.apache.samza.serializers.IntegerSerde) JobModel(org.apache.samza.job.model.JobModel) MessageStream(org.apache.samza.operators.MessageStream) Before(org.junit.Before) OpCode(org.apache.samza.operators.spec.OperatorSpec.OpCode) FilterFunction(org.apache.samza.operators.functions.FilterFunction) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) InitableFunction(org.apache.samza.operators.functions.InitableFunction) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) TaskCoordinator(org.apache.samza.task.TaskCoordinator) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskContextImpl(org.apache.samza.context.TaskContextImpl) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) StringSerde(org.apache.samza.serializers.StringSerde) KVSerde(org.apache.samza.serializers.KVSerde) Serde(org.apache.samza.serializers.Serde) IntegerSerde(org.apache.samza.serializers.IntegerSerde) Serializable(java.io.Serializable) HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) StreamConfig(org.apache.samza.config.StreamConfig) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) SystemClock(org.apache.samza.util.SystemClock) Clock(org.apache.samza.util.Clock) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) MessageCollector(org.apache.samza.task.MessageCollector) MapConfig(org.apache.samza.config.MapConfig) SystemStream(org.apache.samza.system.SystemStream) TaskCoordinator(org.apache.samza.task.TaskCoordinator) KV(org.apache.samza.operators.KV) BiFunction(java.util.function.BiFunction) TaskName(org.apache.samza.container.TaskName) JoinFunction(org.apache.samza.operators.functions.JoinFunction) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 83 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestOperatorImplGraph method testPartitionByChain.

@Test
public void testPartitionByChain() {
    String inputStreamId = "input";
    String inputSystem = "input-system";
    String inputPhysicalName = "input-stream";
    String outputStreamId = "output";
    String outputSystem = "output-system";
    String outputPhysicalName = "output-stream";
    String intermediateStreamId = "jobName-jobId-partition_by-p1";
    String intermediateSystem = "intermediate-system";
    HashMap<String, String> configs = new HashMap<>();
    configs.put(JobConfig.JOB_NAME, "jobName");
    configs.put(JobConfig.JOB_ID, "jobId");
    configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intermediateSystem);
    StreamTestUtils.addStreamConfigs(configs, inputStreamId, inputSystem, inputPhysicalName);
    StreamTestUtils.addStreamConfigs(configs, outputStreamId, outputSystem, outputPhysicalName);
    Config config = new MapConfig(configs);
    when(this.context.getJobContext().getConfig()).thenReturn(config);
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        GenericSystemDescriptor isd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
        GenericSystemDescriptor osd = new GenericSystemDescriptor(outputSystem, "mockFactoryClass");
        GenericInputDescriptor inputDescriptor = isd.getInputDescriptor(inputStreamId, mock(Serde.class));
        GenericOutputDescriptor outputDescriptor = osd.getOutputDescriptor(outputStreamId, KVSerde.of(mock(IntegerSerde.class), mock(StringSerde.class)));
        MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
        OutputStream<KV<Integer, String>> outputStream = appDesc.getOutputStream(outputDescriptor);
        inputStream.partitionBy(Object::hashCode, Object::toString, KVSerde.of(mock(IntegerSerde.class), mock(StringSerde.class)), "p1").sendTo(outputStream);
    }, config);
    JobModel jobModel = mock(JobModel.class);
    ContainerModel containerModel = mock(ContainerModel.class);
    TaskModel taskModel = mock(TaskModel.class);
    when(jobModel.getContainers()).thenReturn(Collections.singletonMap("0", containerModel));
    when(containerModel.getTasks()).thenReturn(Collections.singletonMap(new TaskName("task 0"), taskModel));
    when(taskModel.getSystemStreamPartitions()).thenReturn(Collections.emptySet());
    when(((TaskContextImpl) this.context.getTaskContext()).getJobModel()).thenReturn(jobModel);
    OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
    InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName));
    assertEquals(1, inputOpImpl.registeredOperators.size());
    OperatorImpl partitionByOpImpl = (PartitionByOperatorImpl) inputOpImpl.registeredOperators.iterator().next();
    // is terminal but paired with an input operator
    assertEquals(0, partitionByOpImpl.registeredOperators.size());
    assertEquals(OpCode.PARTITION_BY, partitionByOpImpl.getOperatorSpec().getOpCode());
    InputOperatorImpl repartitionedInputOpImpl = opImplGraph.getInputOperator(new SystemStream(intermediateSystem, intermediateStreamId));
    assertEquals(1, repartitionedInputOpImpl.registeredOperators.size());
    OperatorImpl sendToOpImpl = (OutputOperatorImpl) repartitionedInputOpImpl.registeredOperators.iterator().next();
    assertEquals(0, sendToOpImpl.registeredOperators.size());
    assertEquals(OpCode.SEND_TO, sendToOpImpl.getOperatorSpec().getOpCode());
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) KVSerde(org.apache.samza.serializers.KVSerde) Serde(org.apache.samza.serializers.Serde) IntegerSerde(org.apache.samza.serializers.IntegerSerde) StringSerde(org.apache.samza.serializers.StringSerde) HashMap(java.util.HashMap) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) StreamConfig(org.apache.samza.config.StreamConfig) SystemClock(org.apache.samza.util.SystemClock) Clock(org.apache.samza.util.Clock) IntegerSerde(org.apache.samza.serializers.IntegerSerde) ContainerModel(org.apache.samza.job.model.ContainerModel) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) JobModel(org.apache.samza.job.model.JobModel) MapConfig(org.apache.samza.config.MapConfig) SystemStream(org.apache.samza.system.SystemStream) KV(org.apache.samza.operators.KV) TaskContextImpl(org.apache.samza.context.TaskContextImpl) TaskName(org.apache.samza.container.TaskName) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) TaskModel(org.apache.samza.job.model.TaskModel) Test(org.junit.Test)

Example 84 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestOperatorImplGraph method testGetStreamToConsumerTasks.

@Test
public void testGetStreamToConsumerTasks() {
    String system = "test-system";
    String streamId0 = "test-stream-0";
    String streamId1 = "test-stream-1";
    HashMap<String, String> configs = new HashMap<>();
    configs.put(JobConfig.JOB_NAME, "test-app");
    configs.put(JobConfig.JOB_DEFAULT_SYSTEM, "test-system");
    StreamTestUtils.addStreamConfigs(configs, streamId0, system, streamId0);
    StreamTestUtils.addStreamConfigs(configs, streamId1, system, streamId1);
    Config config = new MapConfig(configs);
    when(this.context.getJobContext().getConfig()).thenReturn(config);
    SystemStreamPartition ssp0 = new SystemStreamPartition(system, streamId0, new Partition(0));
    SystemStreamPartition ssp1 = new SystemStreamPartition(system, streamId0, new Partition(1));
    SystemStreamPartition ssp2 = new SystemStreamPartition(system, streamId1, new Partition(0));
    TaskName task0 = new TaskName("Task 0");
    TaskName task1 = new TaskName("Task 1");
    Set<SystemStreamPartition> ssps = new HashSet<>();
    ssps.add(ssp0);
    ssps.add(ssp2);
    TaskModel tm0 = new TaskModel(task0, ssps, new Partition(0));
    ContainerModel cm0 = new ContainerModel("c0", Collections.singletonMap(task0, tm0));
    TaskModel tm1 = new TaskModel(task1, Collections.singleton(ssp1), new Partition(1));
    ContainerModel cm1 = new ContainerModel("c1", Collections.singletonMap(task1, tm1));
    Map<String, ContainerModel> cms = new HashMap<>();
    cms.put(cm0.getId(), cm0);
    cms.put(cm1.getId(), cm1);
    JobModel jobModel = new JobModel(config, cms);
    Multimap<SystemStream, String> streamToTasks = OperatorImplGraph.getStreamToConsumerTasks(jobModel);
    assertEquals(streamToTasks.get(ssp0.getSystemStream()).size(), 2);
    assertEquals(streamToTasks.get(ssp2.getSystemStream()).size(), 1);
}
Also used : SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) StreamConfig(org.apache.samza.config.StreamConfig) SystemStream(org.apache.samza.system.SystemStream) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskName(org.apache.samza.container.TaskName) JobModel(org.apache.samza.job.model.JobModel) MapConfig(org.apache.samza.config.MapConfig) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 85 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestMetricsSnapshotReporterFactory method testGetSystemStream.

@Test
public void testGetSystemStream() {
    Config config = new MapConfig(ImmutableMap.of("metrics.reporter.metrics-reporter.stream", "system0.stream0"));
    assertEquals(new SystemStream("system0", "stream0"), this.factory.getSystemStream(REPORTER, config));
}
Also used : Config(org.apache.samza.config.Config) MapConfig(org.apache.samza.config.MapConfig) SystemStream(org.apache.samza.system.SystemStream) MapConfig(org.apache.samza.config.MapConfig) Test(org.junit.Test)

Aggregations

SystemStream (org.apache.samza.system.SystemStream)143 HashMap (java.util.HashMap)75 Test (org.junit.Test)74 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)72 Partition (org.apache.samza.Partition)58 Map (java.util.Map)55 TaskName (org.apache.samza.container.TaskName)52 MapConfig (org.apache.samza.config.MapConfig)49 Config (org.apache.samza.config.Config)46 SystemAdmin (org.apache.samza.system.SystemAdmin)42 SystemAdmins (org.apache.samza.system.SystemAdmins)40 TaskModel (org.apache.samza.job.model.TaskModel)39 Collections (java.util.Collections)37 Set (java.util.Set)37 TaskConfig (org.apache.samza.config.TaskConfig)37 Clock (org.apache.samza.util.Clock)36 File (java.io.File)35 ImmutableMap (com.google.common.collect.ImmutableMap)34 SystemStreamPartitionMetadata (org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata)33 TaskMode (org.apache.samza.job.model.TaskMode)32