use of org.apache.samza.system.SystemStream in project samza by apache.
the class TestOperatorImplGraph method testGetOutputToInputStreams.
@Test
public void testGetOutputToInputStreams() {
String inputStreamId1 = "input1";
String inputStreamId2 = "input2";
String inputStreamId3 = "input3";
String inputSystem = "input-system";
String outputStreamId1 = "output1";
String outputStreamId2 = "output2";
String outputSystem = "output-system";
String intStreamId1 = "test-app-1-partition_by-p1";
String intStreamId2 = "test-app-1-partition_by-p2";
String intSystem = "test-system";
HashMap<String, String> configs = new HashMap<>();
configs.put(JobConfig.JOB_NAME, "test-app");
configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intSystem);
StreamTestUtils.addStreamConfigs(configs, inputStreamId1, inputSystem, inputStreamId1);
StreamTestUtils.addStreamConfigs(configs, inputStreamId2, inputSystem, inputStreamId2);
StreamTestUtils.addStreamConfigs(configs, inputStreamId3, inputSystem, inputStreamId3);
StreamTestUtils.addStreamConfigs(configs, outputStreamId1, outputSystem, outputStreamId1);
StreamTestUtils.addStreamConfigs(configs, outputStreamId2, outputSystem, outputStreamId2);
Config config = new MapConfig(configs);
when(this.context.getJobContext().getConfig()).thenReturn(config);
StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
GenericSystemDescriptor isd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
GenericInputDescriptor inputDescriptor1 = isd.getInputDescriptor(inputStreamId1, mock(Serde.class));
GenericInputDescriptor inputDescriptor2 = isd.getInputDescriptor(inputStreamId2, mock(Serde.class));
GenericInputDescriptor inputDescriptor3 = isd.getInputDescriptor(inputStreamId3, mock(Serde.class));
GenericSystemDescriptor osd = new GenericSystemDescriptor(outputSystem, "mockFactoryClass");
GenericOutputDescriptor outputDescriptor1 = osd.getOutputDescriptor(outputStreamId1, mock(Serde.class));
GenericOutputDescriptor outputDescriptor2 = osd.getOutputDescriptor(outputStreamId2, mock(Serde.class));
MessageStream messageStream1 = appDesc.getInputStream(inputDescriptor1).map(m -> m);
MessageStream messageStream2 = appDesc.getInputStream(inputDescriptor2).filter(m -> true);
MessageStream messageStream3 = appDesc.getInputStream(inputDescriptor3).filter(m -> true).partitionBy(m -> "m", m -> m, mock(KVSerde.class), "p1").map(m -> m);
OutputStream<Object> outputStream1 = appDesc.getOutputStream(outputDescriptor1);
OutputStream<Object> outputStream2 = appDesc.getOutputStream(outputDescriptor2);
messageStream1.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(2), "j1").partitionBy(m -> "m", m -> m, mock(KVSerde.class), "p2").sendTo(outputStream1);
messageStream3.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2").sendTo(outputStream2);
}, config);
Multimap<SystemStream, SystemStream> outputToInput = OperatorImplGraph.getIntermediateToInputStreamsMap(graphSpec.getOperatorSpecGraph(), new StreamConfig(config));
Collection<SystemStream> inputs = outputToInput.get(new SystemStream(intSystem, intStreamId2));
assertEquals(inputs.size(), 2);
assertTrue(inputs.contains(new SystemStream(inputSystem, inputStreamId1)));
assertTrue(inputs.contains(new SystemStream(inputSystem, inputStreamId2)));
inputs = outputToInput.get(new SystemStream(intSystem, intStreamId1));
assertEquals(inputs.size(), 1);
assertEquals(inputs.iterator().next(), new SystemStream(inputSystem, inputStreamId3));
}
use of org.apache.samza.system.SystemStream in project samza by apache.
the class TestOperatorImplGraph method testLinearChain.
@Test
public void testLinearChain() {
String inputStreamId = "input";
String inputSystem = "input-system";
String inputPhysicalName = "input-stream";
String outputStreamId = "output";
String outputSystem = "output-system";
String outputPhysicalName = "output-stream";
String intermediateSystem = "intermediate-system";
HashMap<String, String> configs = new HashMap<>();
configs.put(JobConfig.JOB_NAME, "jobName");
configs.put(JobConfig.JOB_ID, "jobId");
configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intermediateSystem);
StreamTestUtils.addStreamConfigs(configs, inputStreamId, inputSystem, inputPhysicalName);
StreamTestUtils.addStreamConfigs(configs, outputStreamId, outputSystem, outputPhysicalName);
Config config = new MapConfig(configs);
when(this.context.getJobContext().getConfig()).thenReturn(config);
StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
GenericInputDescriptor inputDescriptor = sd.getInputDescriptor(inputStreamId, mock(Serde.class));
GenericOutputDescriptor outputDescriptor = sd.getOutputDescriptor(outputStreamId, mock(Serde.class));
MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
OutputStream<Object> outputStream = appDesc.getOutputStream(outputDescriptor);
inputStream.filter(mock(FilterFunction.class)).map(mock(MapFunction.class)).sendTo(outputStream);
}, config);
OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName));
assertEquals(1, inputOpImpl.registeredOperators.size());
OperatorImpl filterOpImpl = (FlatmapOperatorImpl) inputOpImpl.registeredOperators.iterator().next();
assertEquals(1, filterOpImpl.registeredOperators.size());
assertEquals(OpCode.FILTER, filterOpImpl.getOperatorSpec().getOpCode());
OperatorImpl mapOpImpl = (FlatmapOperatorImpl) filterOpImpl.registeredOperators.iterator().next();
assertEquals(1, mapOpImpl.registeredOperators.size());
assertEquals(OpCode.MAP, mapOpImpl.getOperatorSpec().getOpCode());
OperatorImpl sendToOpImpl = (OutputOperatorImpl) mapOpImpl.registeredOperators.iterator().next();
assertEquals(0, sendToOpImpl.registeredOperators.size());
assertEquals(OpCode.SEND_TO, sendToOpImpl.getOperatorSpec().getOpCode());
}
use of org.apache.samza.system.SystemStream in project samza by apache.
the class TestOperatorImplGraph method testGetProducerTaskCountForIntermediateStreams.
@Test
public void testGetProducerTaskCountForIntermediateStreams() {
String inputStreamId1 = "input1";
String inputStreamId2 = "input2";
String inputStreamId3 = "input3";
String inputSystem1 = "system1";
String inputSystem2 = "system2";
SystemStream input1 = new SystemStream("system1", "intput1");
SystemStream input2 = new SystemStream("system2", "intput2");
SystemStream input3 = new SystemStream("system2", "intput3");
SystemStream int1 = new SystemStream("system1", "int1");
SystemStream int2 = new SystemStream("system1", "int2");
/**
* the task assignment looks like the following:
*
* input1 -----> task0, task1 -----> int1
* ^
* input2 ------> task1, task2--------|
* v
* input3 ------> task1 -----------> int2
*/
String task0 = "Task 0";
String task1 = "Task 1";
String task2 = "Task 2";
Multimap<SystemStream, String> streamToConsumerTasks = HashMultimap.create();
streamToConsumerTasks.put(input1, task0);
streamToConsumerTasks.put(input1, task1);
streamToConsumerTasks.put(input2, task1);
streamToConsumerTasks.put(input2, task2);
streamToConsumerTasks.put(input3, task1);
streamToConsumerTasks.put(int1, task0);
streamToConsumerTasks.put(int1, task1);
streamToConsumerTasks.put(int2, task0);
Multimap<SystemStream, SystemStream> intermediateToInputStreams = HashMultimap.create();
intermediateToInputStreams.put(int1, input1);
intermediateToInputStreams.put(int1, input2);
intermediateToInputStreams.put(int2, input2);
intermediateToInputStreams.put(int2, input3);
Map<SystemStream, Integer> counts = OperatorImplGraph.getProducerTaskCountForIntermediateStreams(streamToConsumerTasks, intermediateToInputStreams);
assertTrue(counts.get(int1) == 3);
assertTrue(counts.get(int2) == 2);
}
use of org.apache.samza.system.SystemStream in project samza by apache.
the class TestWatermarkStates method testUpdate.
@Test
public void testUpdate() {
SystemStream input = new SystemStream("system", "input");
SystemStream intermediate = new SystemStream("system", "intermediate");
Set<SystemStreamPartition> ssps = new HashSet<>();
SystemStreamPartition inputPartition0 = new SystemStreamPartition(input, new Partition(0));
SystemStreamPartition intPartition0 = new SystemStreamPartition(intermediate, new Partition(0));
SystemStreamPartition intPartition1 = new SystemStreamPartition(intermediate, new Partition(1));
ssps.add(inputPartition0);
ssps.add(intPartition0);
ssps.add(intPartition1);
Map<SystemStream, Integer> producerCounts = new HashMap<>();
producerCounts.put(intermediate, 2);
// advance watermark on input to 5
WatermarkStates watermarkStates = new WatermarkStates(ssps, producerCounts, new MetricsRegistryMap());
IncomingMessageEnvelope envelope = IncomingMessageEnvelope.buildWatermarkEnvelope(inputPartition0, 5L);
watermarkStates.update((WatermarkMessage) envelope.getMessage(), envelope.getSystemStreamPartition());
assertEquals(watermarkStates.getWatermark(input), 5L);
assertEquals(watermarkStates.getWatermark(intermediate), WATERMARK_NOT_EXIST);
// watermark from task 0 on int p0 to 6
WatermarkMessage watermarkMessage = new WatermarkMessage(6L, "task 0");
watermarkStates.update(watermarkMessage, intPartition0);
assertEquals(watermarkStates.getWatermarkPerSSP(intPartition0), WATERMARK_NOT_EXIST);
assertEquals(watermarkStates.getWatermark(intermediate), WATERMARK_NOT_EXIST);
// watermark from task 1 on int p0 to 3
watermarkMessage = new WatermarkMessage(3L, "task 1");
watermarkStates.update(watermarkMessage, intPartition0);
assertEquals(watermarkStates.getWatermarkPerSSP(intPartition0), 3L);
assertEquals(watermarkStates.getWatermark(intermediate), WATERMARK_NOT_EXIST);
// watermark from task 0 on int p1 to 10
watermarkMessage = new WatermarkMessage(10L, "task 0");
watermarkStates.update(watermarkMessage, intPartition1);
assertEquals(watermarkStates.getWatermarkPerSSP(intPartition1), WATERMARK_NOT_EXIST);
assertEquals(watermarkStates.getWatermark(intermediate), WATERMARK_NOT_EXIST);
// watermark from task 1 on int p1 to 4
watermarkMessage = new WatermarkMessage(4L, "task 1");
watermarkStates.update(watermarkMessage, intPartition1);
assertEquals(watermarkStates.getWatermarkPerSSP(intPartition1), 4L);
// verify we got a watermark 3 (min) for int stream
assertEquals(watermarkStates.getWatermark(intermediate), 3L);
// advance watermark from task 1 on int p0 to 8
watermarkMessage = new WatermarkMessage(8L, "task 1");
watermarkStates.update(watermarkMessage, intPartition0);
assertEquals(watermarkStates.getWatermarkPerSSP(intPartition0), 6L);
// verify we got a watermark 4 (min) for int stream
assertEquals(watermarkStates.getWatermark(intermediate), 4L);
// advance watermark from task 1 on int p1 to 7
watermarkMessage = new WatermarkMessage(7L, "task 1");
watermarkStates.update(watermarkMessage, intPartition1);
assertEquals(watermarkStates.getWatermarkPerSSP(intPartition1), 7L);
// verify we got a watermark 6 (min) for int stream
assertEquals(watermarkStates.getWatermark(intermediate), 6L);
}
use of org.apache.samza.system.SystemStream in project samza by apache.
the class StreamAppender method setupSystem.
/**
* This should only be called after verifying that the {@link LoggingContextHolder} has the config.
*/
protected void setupSystem() {
config = getConfig();
Log4jSystemConfig log4jSystemConfig = new Log4jSystemConfig(config);
if (streamName == null) {
streamName = getStreamName(log4jSystemConfig.getJobName(), log4jSystemConfig.getJobId());
}
// TODO we need the ACTUAL metrics registry, or the metrics won't get reported by the metric reporters!
MetricsRegistry metricsRegistry = new MetricsRegistryMap();
metrics = new StreamAppenderMetrics("stream-appender", metricsRegistry);
String systemName = log4jSystemConfig.getSystemName();
String systemFactoryName = log4jSystemConfig.getSystemFactory(systemName).orElseThrow(() -> new SamzaException("Could not figure out \"" + systemName + "\" system factory for log4j StreamAppender to use"));
SystemFactory systemFactory = ReflectionUtil.getObj(systemFactoryName, SystemFactory.class);
setSerde(log4jSystemConfig, systemName, streamName);
if (config.getBoolean(CREATE_STREAM_ENABLED, false)) {
int streamPartitionCount = getPartitionCount();
System.out.println("[StreamAppender] creating stream " + streamName + " with partition count " + streamPartitionCount);
StreamSpec streamSpec = StreamSpec.createStreamAppenderStreamSpec(streamName, systemName, streamPartitionCount);
// SystemAdmin only needed for stream creation here.
SystemAdmin systemAdmin = systemFactory.getAdmin(systemName, config);
systemAdmin.start();
systemAdmin.createStream(streamSpec);
systemAdmin.stop();
}
systemProducer = systemFactory.getProducer(systemName, config, metricsRegistry, this.getClass().getSimpleName());
systemStream = new SystemStream(systemName, streamName);
systemProducer.register(SOURCE);
systemProducer.start();
log.info(SOURCE + " has been registered in " + systemName + ". So all the logs will be sent to " + streamName + " in " + systemName + ". Logs are partitioned by " + key);
startTransferThread();
}
Aggregations