Search in sources :

Example 51 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestOperatorImplGraph method testGetOutputToInputStreams.

@Test
public void testGetOutputToInputStreams() {
    String inputStreamId1 = "input1";
    String inputStreamId2 = "input2";
    String inputStreamId3 = "input3";
    String inputSystem = "input-system";
    String outputStreamId1 = "output1";
    String outputStreamId2 = "output2";
    String outputSystem = "output-system";
    String intStreamId1 = "test-app-1-partition_by-p1";
    String intStreamId2 = "test-app-1-partition_by-p2";
    String intSystem = "test-system";
    HashMap<String, String> configs = new HashMap<>();
    configs.put(JobConfig.JOB_NAME, "test-app");
    configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intSystem);
    StreamTestUtils.addStreamConfigs(configs, inputStreamId1, inputSystem, inputStreamId1);
    StreamTestUtils.addStreamConfigs(configs, inputStreamId2, inputSystem, inputStreamId2);
    StreamTestUtils.addStreamConfigs(configs, inputStreamId3, inputSystem, inputStreamId3);
    StreamTestUtils.addStreamConfigs(configs, outputStreamId1, outputSystem, outputStreamId1);
    StreamTestUtils.addStreamConfigs(configs, outputStreamId2, outputSystem, outputStreamId2);
    Config config = new MapConfig(configs);
    when(this.context.getJobContext().getConfig()).thenReturn(config);
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        GenericSystemDescriptor isd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
        GenericInputDescriptor inputDescriptor1 = isd.getInputDescriptor(inputStreamId1, mock(Serde.class));
        GenericInputDescriptor inputDescriptor2 = isd.getInputDescriptor(inputStreamId2, mock(Serde.class));
        GenericInputDescriptor inputDescriptor3 = isd.getInputDescriptor(inputStreamId3, mock(Serde.class));
        GenericSystemDescriptor osd = new GenericSystemDescriptor(outputSystem, "mockFactoryClass");
        GenericOutputDescriptor outputDescriptor1 = osd.getOutputDescriptor(outputStreamId1, mock(Serde.class));
        GenericOutputDescriptor outputDescriptor2 = osd.getOutputDescriptor(outputStreamId2, mock(Serde.class));
        MessageStream messageStream1 = appDesc.getInputStream(inputDescriptor1).map(m -> m);
        MessageStream messageStream2 = appDesc.getInputStream(inputDescriptor2).filter(m -> true);
        MessageStream messageStream3 = appDesc.getInputStream(inputDescriptor3).filter(m -> true).partitionBy(m -> "m", m -> m, mock(KVSerde.class), "p1").map(m -> m);
        OutputStream<Object> outputStream1 = appDesc.getOutputStream(outputDescriptor1);
        OutputStream<Object> outputStream2 = appDesc.getOutputStream(outputDescriptor2);
        messageStream1.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(2), "j1").partitionBy(m -> "m", m -> m, mock(KVSerde.class), "p2").sendTo(outputStream1);
        messageStream3.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2").sendTo(outputStream2);
    }, config);
    Multimap<SystemStream, SystemStream> outputToInput = OperatorImplGraph.getIntermediateToInputStreamsMap(graphSpec.getOperatorSpecGraph(), new StreamConfig(config));
    Collection<SystemStream> inputs = outputToInput.get(new SystemStream(intSystem, intStreamId2));
    assertEquals(inputs.size(), 2);
    assertTrue(inputs.contains(new SystemStream(inputSystem, inputStreamId1)));
    assertTrue(inputs.contains(new SystemStream(inputSystem, inputStreamId2)));
    inputs = outputToInput.get(new SystemStream(intSystem, intStreamId1));
    assertEquals(inputs.size(), 1);
    assertEquals(inputs.iterator().next(), new SystemStream(inputSystem, inputStreamId3));
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) KVSerde(org.apache.samza.serializers.KVSerde) Serde(org.apache.samza.serializers.Serde) IntegerSerde(org.apache.samza.serializers.IntegerSerde) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) BiFunction(java.util.function.BiFunction) Assert.assertNotSame(org.junit.Assert.assertNotSame) TaskModel(org.apache.samza.job.model.TaskModel) TimestampedValue(org.apache.samza.util.TimestampedValue) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) HashMultimap(com.google.common.collect.HashMultimap) Matchers.eq(org.mockito.Matchers.eq) After(org.junit.After) Duration(java.time.Duration) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Collection(java.util.Collection) Set(java.util.Set) Serializable(java.io.Serializable) Context(org.apache.samza.context.Context) List(java.util.List) SystemClock(org.apache.samza.util.SystemClock) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Mockito.mock(org.mockito.Mockito.mock) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) ClosableFunction(org.apache.samza.operators.functions.ClosableFunction) Serde(org.apache.samza.serializers.Serde) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Multimap(com.google.common.collect.Multimap) Function(java.util.function.Function) StreamConfig(org.apache.samza.config.StreamConfig) MapFunction(org.apache.samza.operators.functions.MapFunction) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) MockContext(org.apache.samza.context.MockContext) IntegerSerde(org.apache.samza.serializers.IntegerSerde) JobModel(org.apache.samza.job.model.JobModel) MessageStream(org.apache.samza.operators.MessageStream) Before(org.junit.Before) OpCode(org.apache.samza.operators.spec.OperatorSpec.OpCode) FilterFunction(org.apache.samza.operators.functions.FilterFunction) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) InitableFunction(org.apache.samza.operators.functions.InitableFunction) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) TaskCoordinator(org.apache.samza.task.TaskCoordinator) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskContextImpl(org.apache.samza.context.TaskContextImpl) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) HashMap(java.util.HashMap) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) StreamConfig(org.apache.samza.config.StreamConfig) SystemStream(org.apache.samza.system.SystemStream) StreamConfig(org.apache.samza.config.StreamConfig) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) JoinFunction(org.apache.samza.operators.functions.JoinFunction) MessageStream(org.apache.samza.operators.MessageStream) MapConfig(org.apache.samza.config.MapConfig) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Test(org.junit.Test)

Example 52 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestOperatorImplGraph method testLinearChain.

@Test
public void testLinearChain() {
    String inputStreamId = "input";
    String inputSystem = "input-system";
    String inputPhysicalName = "input-stream";
    String outputStreamId = "output";
    String outputSystem = "output-system";
    String outputPhysicalName = "output-stream";
    String intermediateSystem = "intermediate-system";
    HashMap<String, String> configs = new HashMap<>();
    configs.put(JobConfig.JOB_NAME, "jobName");
    configs.put(JobConfig.JOB_ID, "jobId");
    configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intermediateSystem);
    StreamTestUtils.addStreamConfigs(configs, inputStreamId, inputSystem, inputPhysicalName);
    StreamTestUtils.addStreamConfigs(configs, outputStreamId, outputSystem, outputPhysicalName);
    Config config = new MapConfig(configs);
    when(this.context.getJobContext().getConfig()).thenReturn(config);
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
        GenericInputDescriptor inputDescriptor = sd.getInputDescriptor(inputStreamId, mock(Serde.class));
        GenericOutputDescriptor outputDescriptor = sd.getOutputDescriptor(outputStreamId, mock(Serde.class));
        MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
        OutputStream<Object> outputStream = appDesc.getOutputStream(outputDescriptor);
        inputStream.filter(mock(FilterFunction.class)).map(mock(MapFunction.class)).sendTo(outputStream);
    }, config);
    OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
    InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName));
    assertEquals(1, inputOpImpl.registeredOperators.size());
    OperatorImpl filterOpImpl = (FlatmapOperatorImpl) inputOpImpl.registeredOperators.iterator().next();
    assertEquals(1, filterOpImpl.registeredOperators.size());
    assertEquals(OpCode.FILTER, filterOpImpl.getOperatorSpec().getOpCode());
    OperatorImpl mapOpImpl = (FlatmapOperatorImpl) filterOpImpl.registeredOperators.iterator().next();
    assertEquals(1, mapOpImpl.registeredOperators.size());
    assertEquals(OpCode.MAP, mapOpImpl.getOperatorSpec().getOpCode());
    OperatorImpl sendToOpImpl = (OutputOperatorImpl) mapOpImpl.registeredOperators.iterator().next();
    assertEquals(0, sendToOpImpl.registeredOperators.size());
    assertEquals(OpCode.SEND_TO, sendToOpImpl.getOperatorSpec().getOpCode());
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) KVSerde(org.apache.samza.serializers.KVSerde) Serde(org.apache.samza.serializers.Serde) IntegerSerde(org.apache.samza.serializers.IntegerSerde) FilterFunction(org.apache.samza.operators.functions.FilterFunction) HashMap(java.util.HashMap) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) StreamConfig(org.apache.samza.config.StreamConfig) SystemStream(org.apache.samza.system.SystemStream) SystemClock(org.apache.samza.util.SystemClock) Clock(org.apache.samza.util.Clock) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) MapConfig(org.apache.samza.config.MapConfig) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Test(org.junit.Test)

Example 53 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestOperatorImplGraph method testGetProducerTaskCountForIntermediateStreams.

@Test
public void testGetProducerTaskCountForIntermediateStreams() {
    String inputStreamId1 = "input1";
    String inputStreamId2 = "input2";
    String inputStreamId3 = "input3";
    String inputSystem1 = "system1";
    String inputSystem2 = "system2";
    SystemStream input1 = new SystemStream("system1", "intput1");
    SystemStream input2 = new SystemStream("system2", "intput2");
    SystemStream input3 = new SystemStream("system2", "intput3");
    SystemStream int1 = new SystemStream("system1", "int1");
    SystemStream int2 = new SystemStream("system1", "int2");
    /**
     * the task assignment looks like the following:
     *
     * input1 -----> task0, task1 -----> int1
     *                                    ^
     * input2 ------> task1, task2--------|
     *                                    v
     * input3 ------> task1 -----------> int2
     */
    String task0 = "Task 0";
    String task1 = "Task 1";
    String task2 = "Task 2";
    Multimap<SystemStream, String> streamToConsumerTasks = HashMultimap.create();
    streamToConsumerTasks.put(input1, task0);
    streamToConsumerTasks.put(input1, task1);
    streamToConsumerTasks.put(input2, task1);
    streamToConsumerTasks.put(input2, task2);
    streamToConsumerTasks.put(input3, task1);
    streamToConsumerTasks.put(int1, task0);
    streamToConsumerTasks.put(int1, task1);
    streamToConsumerTasks.put(int2, task0);
    Multimap<SystemStream, SystemStream> intermediateToInputStreams = HashMultimap.create();
    intermediateToInputStreams.put(int1, input1);
    intermediateToInputStreams.put(int1, input2);
    intermediateToInputStreams.put(int2, input2);
    intermediateToInputStreams.put(int2, input3);
    Map<SystemStream, Integer> counts = OperatorImplGraph.getProducerTaskCountForIntermediateStreams(streamToConsumerTasks, intermediateToInputStreams);
    assertTrue(counts.get(int1) == 3);
    assertTrue(counts.get(int2) == 2);
}
Also used : SystemStream(org.apache.samza.system.SystemStream) Test(org.junit.Test)

Example 54 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class TestWatermarkStates method testUpdate.

@Test
public void testUpdate() {
    SystemStream input = new SystemStream("system", "input");
    SystemStream intermediate = new SystemStream("system", "intermediate");
    Set<SystemStreamPartition> ssps = new HashSet<>();
    SystemStreamPartition inputPartition0 = new SystemStreamPartition(input, new Partition(0));
    SystemStreamPartition intPartition0 = new SystemStreamPartition(intermediate, new Partition(0));
    SystemStreamPartition intPartition1 = new SystemStreamPartition(intermediate, new Partition(1));
    ssps.add(inputPartition0);
    ssps.add(intPartition0);
    ssps.add(intPartition1);
    Map<SystemStream, Integer> producerCounts = new HashMap<>();
    producerCounts.put(intermediate, 2);
    // advance watermark on input to 5
    WatermarkStates watermarkStates = new WatermarkStates(ssps, producerCounts, new MetricsRegistryMap());
    IncomingMessageEnvelope envelope = IncomingMessageEnvelope.buildWatermarkEnvelope(inputPartition0, 5L);
    watermarkStates.update((WatermarkMessage) envelope.getMessage(), envelope.getSystemStreamPartition());
    assertEquals(watermarkStates.getWatermark(input), 5L);
    assertEquals(watermarkStates.getWatermark(intermediate), WATERMARK_NOT_EXIST);
    // watermark from task 0 on int p0 to 6
    WatermarkMessage watermarkMessage = new WatermarkMessage(6L, "task 0");
    watermarkStates.update(watermarkMessage, intPartition0);
    assertEquals(watermarkStates.getWatermarkPerSSP(intPartition0), WATERMARK_NOT_EXIST);
    assertEquals(watermarkStates.getWatermark(intermediate), WATERMARK_NOT_EXIST);
    // watermark from task 1 on int p0 to 3
    watermarkMessage = new WatermarkMessage(3L, "task 1");
    watermarkStates.update(watermarkMessage, intPartition0);
    assertEquals(watermarkStates.getWatermarkPerSSP(intPartition0), 3L);
    assertEquals(watermarkStates.getWatermark(intermediate), WATERMARK_NOT_EXIST);
    // watermark from task 0 on int p1 to 10
    watermarkMessage = new WatermarkMessage(10L, "task 0");
    watermarkStates.update(watermarkMessage, intPartition1);
    assertEquals(watermarkStates.getWatermarkPerSSP(intPartition1), WATERMARK_NOT_EXIST);
    assertEquals(watermarkStates.getWatermark(intermediate), WATERMARK_NOT_EXIST);
    // watermark from task 1 on int p1 to 4
    watermarkMessage = new WatermarkMessage(4L, "task 1");
    watermarkStates.update(watermarkMessage, intPartition1);
    assertEquals(watermarkStates.getWatermarkPerSSP(intPartition1), 4L);
    // verify we got a watermark 3 (min) for int stream
    assertEquals(watermarkStates.getWatermark(intermediate), 3L);
    // advance watermark from task 1 on int p0 to 8
    watermarkMessage = new WatermarkMessage(8L, "task 1");
    watermarkStates.update(watermarkMessage, intPartition0);
    assertEquals(watermarkStates.getWatermarkPerSSP(intPartition0), 6L);
    // verify we got a watermark 4 (min) for int stream
    assertEquals(watermarkStates.getWatermark(intermediate), 4L);
    // advance watermark from task 1 on int p1 to 7
    watermarkMessage = new WatermarkMessage(7L, "task 1");
    watermarkStates.update(watermarkMessage, intPartition1);
    assertEquals(watermarkStates.getWatermarkPerSSP(intPartition1), 7L);
    // verify we got a watermark 6 (min) for int stream
    assertEquals(watermarkStates.getWatermark(intermediate), 6L);
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) WatermarkMessage(org.apache.samza.system.WatermarkMessage) HashMap(java.util.HashMap) SystemStream(org.apache.samza.system.SystemStream) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 55 with SystemStream

use of org.apache.samza.system.SystemStream in project samza by apache.

the class StreamAppender method setupSystem.

/**
 * This should only be called after verifying that the {@link LoggingContextHolder} has the config.
 */
protected void setupSystem() {
    config = getConfig();
    Log4jSystemConfig log4jSystemConfig = new Log4jSystemConfig(config);
    if (streamName == null) {
        streamName = getStreamName(log4jSystemConfig.getJobName(), log4jSystemConfig.getJobId());
    }
    // TODO we need the ACTUAL metrics registry, or the metrics won't get reported by the metric reporters!
    MetricsRegistry metricsRegistry = new MetricsRegistryMap();
    metrics = new StreamAppenderMetrics("stream-appender", metricsRegistry);
    String systemName = log4jSystemConfig.getSystemName();
    String systemFactoryName = log4jSystemConfig.getSystemFactory(systemName).orElseThrow(() -> new SamzaException("Could not figure out \"" + systemName + "\" system factory for log4j StreamAppender to use"));
    SystemFactory systemFactory = ReflectionUtil.getObj(systemFactoryName, SystemFactory.class);
    setSerde(log4jSystemConfig, systemName, streamName);
    if (config.getBoolean(CREATE_STREAM_ENABLED, false)) {
        int streamPartitionCount = getPartitionCount();
        System.out.println("[StreamAppender] creating stream " + streamName + " with partition count " + streamPartitionCount);
        StreamSpec streamSpec = StreamSpec.createStreamAppenderStreamSpec(streamName, systemName, streamPartitionCount);
        // SystemAdmin only needed for stream creation here.
        SystemAdmin systemAdmin = systemFactory.getAdmin(systemName, config);
        systemAdmin.start();
        systemAdmin.createStream(streamSpec);
        systemAdmin.stop();
    }
    systemProducer = systemFactory.getProducer(systemName, config, metricsRegistry, this.getClass().getSimpleName());
    systemStream = new SystemStream(systemName, streamName);
    systemProducer.register(SOURCE);
    systemProducer.start();
    log.info(SOURCE + " has been registered in " + systemName + ". So all the logs will be sent to " + streamName + " in " + systemName + ". Logs are partitioned by " + key);
    startTransferThread();
}
Also used : StreamSpec(org.apache.samza.system.StreamSpec) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) SystemFactory(org.apache.samza.system.SystemFactory) SystemStream(org.apache.samza.system.SystemStream) SystemAdmin(org.apache.samza.system.SystemAdmin) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Log4jSystemConfig(org.apache.samza.config.Log4jSystemConfig) SamzaException(org.apache.samza.SamzaException)

Aggregations

SystemStream (org.apache.samza.system.SystemStream)143 HashMap (java.util.HashMap)75 Test (org.junit.Test)74 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)72 Partition (org.apache.samza.Partition)58 Map (java.util.Map)55 TaskName (org.apache.samza.container.TaskName)52 MapConfig (org.apache.samza.config.MapConfig)49 Config (org.apache.samza.config.Config)46 SystemAdmin (org.apache.samza.system.SystemAdmin)42 SystemAdmins (org.apache.samza.system.SystemAdmins)40 TaskModel (org.apache.samza.job.model.TaskModel)39 Collections (java.util.Collections)37 Set (java.util.Set)37 TaskConfig (org.apache.samza.config.TaskConfig)37 Clock (org.apache.samza.util.Clock)36 File (java.io.File)35 ImmutableMap (com.google.common.collect.ImmutableMap)34 SystemStreamPartitionMetadata (org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata)33 TaskMode (org.apache.samza.job.model.TaskMode)32