use of org.apache.samza.operators.OutputStream in project samza by apache.
the class TestExecutionPlanner method createSimpleGraph.
private StreamGraphImpl createSimpleGraph() {
/**
* a simple graph of partitionBy and map
*
* input1 -> partitionBy -> map -> output1
*
*/
StreamGraphImpl streamGraph = new StreamGraphImpl(runner, config);
Function mockFn = mock(Function.class);
OutputStream<Object, Object, Object> output1 = streamGraph.getOutputStream("output1", mockFn, mockFn);
BiFunction mockBuilder = mock(BiFunction.class);
streamGraph.getInputStream("input1", mockBuilder).partitionBy(m -> "yes!!!").map(m -> m).sendTo(output1);
return streamGraph;
}
use of org.apache.samza.operators.OutputStream in project samza by apache.
the class TestJobGraphJsonGenerator method test.
@Test
public void test() throws Exception {
/**
* the graph looks like the following. number of partitions in parentheses. quotes indicate expected value.
*
* input1 (64) -> map -> join -> output1 (8)
* |
* input2 (16) -> partitionBy ("64") -> filter -|
* |
* input3 (32) -> filter -> partitionBy ("64") -> map -> join -> output2 (16)
*
*/
Map<String, String> configMap = new HashMap<>();
configMap.put(JobConfig.JOB_NAME(), "test-app");
configMap.put(JobConfig.JOB_DEFAULT_SYSTEM(), "test-system");
Config config = new MapConfig(configMap);
StreamSpec input1 = new StreamSpec("input1", "input1", "system1");
StreamSpec input2 = new StreamSpec("input2", "input2", "system2");
StreamSpec input3 = new StreamSpec("input3", "input3", "system2");
StreamSpec output1 = new StreamSpec("output1", "output1", "system1");
StreamSpec output2 = new StreamSpec("output2", "output2", "system2");
ApplicationRunner runner = mock(ApplicationRunner.class);
when(runner.getStreamSpec("input1")).thenReturn(input1);
when(runner.getStreamSpec("input2")).thenReturn(input2);
when(runner.getStreamSpec("input3")).thenReturn(input3);
when(runner.getStreamSpec("output1")).thenReturn(output1);
when(runner.getStreamSpec("output2")).thenReturn(output2);
// intermediate streams used in tests
when(runner.getStreamSpec("test-app-1-partition_by-0")).thenReturn(new StreamSpec("test-app-1-partition_by-0", "test-app-1-partition_by-0", "default-system"));
when(runner.getStreamSpec("test-app-1-partition_by-1")).thenReturn(new StreamSpec("test-app-1-partition_by-1", "test-app-1-partition_by-1", "default-system"));
when(runner.getStreamSpec("test-app-1-partition_by-4")).thenReturn(new StreamSpec("test-app-1-partition_by-4", "test-app-1-partition_by-4", "default-system"));
// set up external partition count
Map<String, Integer> system1Map = new HashMap<>();
system1Map.put("input1", 64);
system1Map.put("output1", 8);
Map<String, Integer> system2Map = new HashMap<>();
system2Map.put("input2", 16);
system2Map.put("input3", 32);
system2Map.put("output2", 16);
Map<String, SystemAdmin> systemAdmins = new HashMap<>();
SystemAdmin systemAdmin1 = createSystemAdmin(system1Map);
SystemAdmin systemAdmin2 = createSystemAdmin(system2Map);
systemAdmins.put("system1", systemAdmin1);
systemAdmins.put("system2", systemAdmin2);
StreamManager streamManager = new StreamManager(systemAdmins);
StreamGraphImpl streamGraph = new StreamGraphImpl(runner, config);
BiFunction mockBuilder = mock(BiFunction.class);
MessageStream m1 = streamGraph.getInputStream("input1", mockBuilder).map(m -> m);
MessageStream m2 = streamGraph.getInputStream("input2", mockBuilder).partitionBy(m -> "haha").filter(m -> true);
MessageStream m3 = streamGraph.getInputStream("input3", mockBuilder).filter(m -> true).partitionBy(m -> "hehe").map(m -> m);
Function mockFn = mock(Function.class);
OutputStream<Object, Object, Object> outputStream1 = streamGraph.getOutputStream("output1", mockFn, mockFn);
OutputStream<Object, Object, Object> outputStream2 = streamGraph.getOutputStream("output2", mockFn, mockFn);
m1.join(m2, mock(JoinFunction.class), Duration.ofHours(2)).sendTo(outputStream1);
m2.sink((message, collector, coordinator) -> {
});
m3.join(m2, mock(JoinFunction.class), Duration.ofHours(1)).sendTo(outputStream2);
ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
ExecutionPlan plan = planner.plan(streamGraph);
String json = plan.getPlanAsJson();
System.out.println(json);
// deserialize
ObjectMapper mapper = new ObjectMapper();
JobGraphJsonGenerator.JobGraphJson nodes = mapper.readValue(json, JobGraphJsonGenerator.JobGraphJson.class);
assertTrue(nodes.jobs.get(0).operatorGraph.inputStreams.size() == 5);
assertTrue(nodes.jobs.get(0).operatorGraph.operators.size() == 13);
assertTrue(nodes.sourceStreams.size() == 3);
assertTrue(nodes.sinkStreams.size() == 2);
assertTrue(nodes.intermediateStreams.size() == 2);
}
use of org.apache.samza.operators.OutputStream in project samza by apache.
the class TestOperatorImplGraph method testGetOutputToInputStreams.
@Test
public void testGetOutputToInputStreams() {
String inputStreamId1 = "input1";
String inputStreamId2 = "input2";
String inputStreamId3 = "input3";
String inputSystem = "input-system";
String outputStreamId1 = "output1";
String outputStreamId2 = "output2";
String outputSystem = "output-system";
String intStreamId1 = "test-app-1-partition_by-p1";
String intStreamId2 = "test-app-1-partition_by-p2";
String intSystem = "test-system";
HashMap<String, String> configs = new HashMap<>();
configs.put(JobConfig.JOB_NAME, "test-app");
configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intSystem);
StreamTestUtils.addStreamConfigs(configs, inputStreamId1, inputSystem, inputStreamId1);
StreamTestUtils.addStreamConfigs(configs, inputStreamId2, inputSystem, inputStreamId2);
StreamTestUtils.addStreamConfigs(configs, inputStreamId3, inputSystem, inputStreamId3);
StreamTestUtils.addStreamConfigs(configs, outputStreamId1, outputSystem, outputStreamId1);
StreamTestUtils.addStreamConfigs(configs, outputStreamId2, outputSystem, outputStreamId2);
Config config = new MapConfig(configs);
when(this.context.getJobContext().getConfig()).thenReturn(config);
StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
GenericSystemDescriptor isd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
GenericInputDescriptor inputDescriptor1 = isd.getInputDescriptor(inputStreamId1, mock(Serde.class));
GenericInputDescriptor inputDescriptor2 = isd.getInputDescriptor(inputStreamId2, mock(Serde.class));
GenericInputDescriptor inputDescriptor3 = isd.getInputDescriptor(inputStreamId3, mock(Serde.class));
GenericSystemDescriptor osd = new GenericSystemDescriptor(outputSystem, "mockFactoryClass");
GenericOutputDescriptor outputDescriptor1 = osd.getOutputDescriptor(outputStreamId1, mock(Serde.class));
GenericOutputDescriptor outputDescriptor2 = osd.getOutputDescriptor(outputStreamId2, mock(Serde.class));
MessageStream messageStream1 = appDesc.getInputStream(inputDescriptor1).map(m -> m);
MessageStream messageStream2 = appDesc.getInputStream(inputDescriptor2).filter(m -> true);
MessageStream messageStream3 = appDesc.getInputStream(inputDescriptor3).filter(m -> true).partitionBy(m -> "m", m -> m, mock(KVSerde.class), "p1").map(m -> m);
OutputStream<Object> outputStream1 = appDesc.getOutputStream(outputDescriptor1);
OutputStream<Object> outputStream2 = appDesc.getOutputStream(outputDescriptor2);
messageStream1.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(2), "j1").partitionBy(m -> "m", m -> m, mock(KVSerde.class), "p2").sendTo(outputStream1);
messageStream3.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2").sendTo(outputStream2);
}, config);
Multimap<SystemStream, SystemStream> outputToInput = OperatorImplGraph.getIntermediateToInputStreamsMap(graphSpec.getOperatorSpecGraph(), new StreamConfig(config));
Collection<SystemStream> inputs = outputToInput.get(new SystemStream(intSystem, intStreamId2));
assertEquals(inputs.size(), 2);
assertTrue(inputs.contains(new SystemStream(inputSystem, inputStreamId1)));
assertTrue(inputs.contains(new SystemStream(inputSystem, inputStreamId2)));
inputs = outputToInput.get(new SystemStream(intSystem, intStreamId1));
assertEquals(inputs.size(), 1);
assertEquals(inputs.iterator().next(), new SystemStream(inputSystem, inputStreamId3));
}
use of org.apache.samza.operators.OutputStream in project samza by apache.
the class QueryTranslator method sendToOutputStream.
private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
if (!tableDescriptor.isPresent()) {
KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
String systemName = sinkConfig.getSystemName();
DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
outputStream.sendTo(stm);
// Process system events only if the output is a stream.
if (sqlConfig.isProcessSystemEvents()) {
for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
MessageStream<KV<Object, Object>> systemEventStream = inputStream.filter(message -> message.getMetadata().isSystemMessage()).map(SamzaSqlInputMessage::getKeyAndMessageKV);
systemEventStream.sendTo(stm);
}
}
} else {
Table outputTable = appDesc.getTable(tableDescriptor.get());
if (outputTable == null) {
String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
throw new SamzaException(msg);
}
outputStream.sendTo(outputTable);
}
}
use of org.apache.samza.operators.OutputStream in project samza by apache.
the class SessionWindowApp method describe.
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
JsonSerdeV2<PageView> inputSerde = new JsonSerdeV2<>(PageView.class);
KVSerde<String, Integer> outputSerde = KVSerde.of(new StringSerde(), new IntegerSerde());
KafkaSystemDescriptor ksd = new KafkaSystemDescriptor(SYSTEM);
KafkaInputDescriptor<PageView> id = ksd.getInputDescriptor(INPUT_TOPIC, inputSerde);
KafkaOutputDescriptor<KV<String, Integer>> od = ksd.getOutputDescriptor(OUTPUT_TOPIC, outputSerde);
MessageStream<PageView> pageViews = appDescriptor.getInputStream(id);
OutputStream<KV<String, Integer>> outputStream = appDescriptor.getOutputStream(od);
pageViews.filter(m -> !FILTER_KEY.equals(m.getUserId())).window(Windows.keyedSessionWindow(PageView::getUserId, Duration.ofSeconds(3), new StringSerde(), new JsonSerdeV2<>(PageView.class)), "sessionWindow").map(m -> KV.of(m.getKey().getKey(), m.getMessage().size())).sendTo(outputStream);
}
Aggregations