use of org.apache.samza.operators.MessageStream in project samza by apache.
the class ExecutionPlanner method findReachableJoins.
/**
* This function traverses the StreamGraph to find and update mappings for all Joins reachable from this input StreamEdge
* @param inputMessageStream next input MessageStream to traverse {@link MessageStream}
* @param sourceStreamEdge source {@link StreamEdge}
* @param joinSpecToStreamEdges mapping from join spec to its source {@link StreamEdge}s
* @param streamEdgeToJoinSpecs mapping from source {@link StreamEdge} to the join specs that consumes it
* @param outputStreamToJoinSpec mapping from the output stream to the join spec
* @param joinQ queue that contains joinSpecs where at least one of the input stream edge partitions is known.
*/
private static void findReachableJoins(MessageStream inputMessageStream, StreamEdge sourceStreamEdge, Multimap<OperatorSpec, StreamEdge> joinSpecToStreamEdges, Multimap<StreamEdge, OperatorSpec> streamEdgeToJoinSpecs, Map<MessageStream, OperatorSpec> outputStreamToJoinSpec, Queue<OperatorSpec> joinQ, Set<OperatorSpec> visited) {
Collection<OperatorSpec> specs = ((MessageStreamImpl) inputMessageStream).getRegisteredOperatorSpecs();
for (OperatorSpec spec : specs) {
if (spec instanceof PartialJoinOperatorSpec) {
// every join will have two partial join operators
// we will choose one of them in order to consolidate the inputs
// the first one who registered with the outputStreamToJoinSpec will win
MessageStream output = spec.getNextStream();
OperatorSpec joinSpec = outputStreamToJoinSpec.get(output);
if (joinSpec == null) {
joinSpec = spec;
outputStreamToJoinSpec.put(output, joinSpec);
}
joinSpecToStreamEdges.put(joinSpec, sourceStreamEdge);
streamEdgeToJoinSpecs.put(sourceStreamEdge, joinSpec);
if (!visited.contains(joinSpec) && sourceStreamEdge.getPartitionCount() > 0) {
// put the joins with known input partitions into the queue
joinQ.add(joinSpec);
visited.add(joinSpec);
}
}
if (spec.getNextStream() != null) {
findReachableJoins(spec.getNextStream(), sourceStreamEdge, joinSpecToStreamEdges, streamEdgeToJoinSpecs, outputStreamToJoinSpec, joinQ, visited);
}
}
}
use of org.apache.samza.operators.MessageStream in project samza by apache.
the class TestJobGraphJsonGenerator method test.
@Test
public void test() throws Exception {
/**
* the graph looks like the following. number of partitions in parentheses. quotes indicate expected value.
*
* input1 (64) -> map -> join -> output1 (8)
* |
* input2 (16) -> partitionBy ("64") -> filter -|
* |
* input3 (32) -> filter -> partitionBy ("64") -> map -> join -> output2 (16)
*
*/
Map<String, String> configMap = new HashMap<>();
configMap.put(JobConfig.JOB_NAME(), "test-app");
configMap.put(JobConfig.JOB_DEFAULT_SYSTEM(), "test-system");
Config config = new MapConfig(configMap);
StreamSpec input1 = new StreamSpec("input1", "input1", "system1");
StreamSpec input2 = new StreamSpec("input2", "input2", "system2");
StreamSpec input3 = new StreamSpec("input3", "input3", "system2");
StreamSpec output1 = new StreamSpec("output1", "output1", "system1");
StreamSpec output2 = new StreamSpec("output2", "output2", "system2");
ApplicationRunner runner = mock(ApplicationRunner.class);
when(runner.getStreamSpec("input1")).thenReturn(input1);
when(runner.getStreamSpec("input2")).thenReturn(input2);
when(runner.getStreamSpec("input3")).thenReturn(input3);
when(runner.getStreamSpec("output1")).thenReturn(output1);
when(runner.getStreamSpec("output2")).thenReturn(output2);
// intermediate streams used in tests
when(runner.getStreamSpec("test-app-1-partition_by-0")).thenReturn(new StreamSpec("test-app-1-partition_by-0", "test-app-1-partition_by-0", "default-system"));
when(runner.getStreamSpec("test-app-1-partition_by-1")).thenReturn(new StreamSpec("test-app-1-partition_by-1", "test-app-1-partition_by-1", "default-system"));
when(runner.getStreamSpec("test-app-1-partition_by-4")).thenReturn(new StreamSpec("test-app-1-partition_by-4", "test-app-1-partition_by-4", "default-system"));
// set up external partition count
Map<String, Integer> system1Map = new HashMap<>();
system1Map.put("input1", 64);
system1Map.put("output1", 8);
Map<String, Integer> system2Map = new HashMap<>();
system2Map.put("input2", 16);
system2Map.put("input3", 32);
system2Map.put("output2", 16);
Map<String, SystemAdmin> systemAdmins = new HashMap<>();
SystemAdmin systemAdmin1 = createSystemAdmin(system1Map);
SystemAdmin systemAdmin2 = createSystemAdmin(system2Map);
systemAdmins.put("system1", systemAdmin1);
systemAdmins.put("system2", systemAdmin2);
StreamManager streamManager = new StreamManager(systemAdmins);
StreamGraphImpl streamGraph = new StreamGraphImpl(runner, config);
BiFunction mockBuilder = mock(BiFunction.class);
MessageStream m1 = streamGraph.getInputStream("input1", mockBuilder).map(m -> m);
MessageStream m2 = streamGraph.getInputStream("input2", mockBuilder).partitionBy(m -> "haha").filter(m -> true);
MessageStream m3 = streamGraph.getInputStream("input3", mockBuilder).filter(m -> true).partitionBy(m -> "hehe").map(m -> m);
Function mockFn = mock(Function.class);
OutputStream<Object, Object, Object> outputStream1 = streamGraph.getOutputStream("output1", mockFn, mockFn);
OutputStream<Object, Object, Object> outputStream2 = streamGraph.getOutputStream("output2", mockFn, mockFn);
m1.join(m2, mock(JoinFunction.class), Duration.ofHours(2)).sendTo(outputStream1);
m2.sink((message, collector, coordinator) -> {
});
m3.join(m2, mock(JoinFunction.class), Duration.ofHours(1)).sendTo(outputStream2);
ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
ExecutionPlan plan = planner.plan(streamGraph);
String json = plan.getPlanAsJson();
System.out.println(json);
// deserialize
ObjectMapper mapper = new ObjectMapper();
JobGraphJsonGenerator.JobGraphJson nodes = mapper.readValue(json, JobGraphJsonGenerator.JobGraphJson.class);
assertTrue(nodes.jobs.get(0).operatorGraph.inputStreams.size() == 5);
assertTrue(nodes.jobs.get(0).operatorGraph.operators.size() == 13);
assertTrue(nodes.sourceStreams.size() == 3);
assertTrue(nodes.sinkStreams.size() == 2);
assertTrue(nodes.intermediateStreams.size() == 2);
}
use of org.apache.samza.operators.MessageStream in project samza by apache.
the class TestOperatorImplGraph method testBroadcastChain.
@Test
public void testBroadcastChain() {
String inputStreamId = "input";
String inputSystem = "input-system";
String inputPhysicalName = "input-stream";
HashMap<String, String> configMap = new HashMap<>();
configMap.put(JobConfig.JOB_NAME, "test-job");
configMap.put(JobConfig.JOB_ID, "1");
StreamTestUtils.addStreamConfigs(configMap, inputStreamId, inputSystem, inputPhysicalName);
Config config = new MapConfig(configMap);
when(this.context.getJobContext().getConfig()).thenReturn(config);
StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
GenericInputDescriptor inputDescriptor = sd.getInputDescriptor(inputStreamId, mock(Serde.class));
MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
inputStream.filter(mock(FilterFunction.class));
inputStream.map(mock(MapFunction.class));
}, config);
OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName));
assertEquals(2, inputOpImpl.registeredOperators.size());
assertTrue(inputOpImpl.registeredOperators.stream().anyMatch(opImpl -> ((OperatorImpl) opImpl).getOperatorSpec().getOpCode() == OpCode.FILTER));
assertTrue(inputOpImpl.registeredOperators.stream().anyMatch(opImpl -> ((OperatorImpl) opImpl).getOperatorSpec().getOpCode() == OpCode.MAP));
}
use of org.apache.samza.operators.MessageStream in project samza by apache.
the class TestOperatorImplGraph method testGetOutputToInputStreams.
@Test
public void testGetOutputToInputStreams() {
String inputStreamId1 = "input1";
String inputStreamId2 = "input2";
String inputStreamId3 = "input3";
String inputSystem = "input-system";
String outputStreamId1 = "output1";
String outputStreamId2 = "output2";
String outputSystem = "output-system";
String intStreamId1 = "test-app-1-partition_by-p1";
String intStreamId2 = "test-app-1-partition_by-p2";
String intSystem = "test-system";
HashMap<String, String> configs = new HashMap<>();
configs.put(JobConfig.JOB_NAME, "test-app");
configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intSystem);
StreamTestUtils.addStreamConfigs(configs, inputStreamId1, inputSystem, inputStreamId1);
StreamTestUtils.addStreamConfigs(configs, inputStreamId2, inputSystem, inputStreamId2);
StreamTestUtils.addStreamConfigs(configs, inputStreamId3, inputSystem, inputStreamId3);
StreamTestUtils.addStreamConfigs(configs, outputStreamId1, outputSystem, outputStreamId1);
StreamTestUtils.addStreamConfigs(configs, outputStreamId2, outputSystem, outputStreamId2);
Config config = new MapConfig(configs);
when(this.context.getJobContext().getConfig()).thenReturn(config);
StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
GenericSystemDescriptor isd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
GenericInputDescriptor inputDescriptor1 = isd.getInputDescriptor(inputStreamId1, mock(Serde.class));
GenericInputDescriptor inputDescriptor2 = isd.getInputDescriptor(inputStreamId2, mock(Serde.class));
GenericInputDescriptor inputDescriptor3 = isd.getInputDescriptor(inputStreamId3, mock(Serde.class));
GenericSystemDescriptor osd = new GenericSystemDescriptor(outputSystem, "mockFactoryClass");
GenericOutputDescriptor outputDescriptor1 = osd.getOutputDescriptor(outputStreamId1, mock(Serde.class));
GenericOutputDescriptor outputDescriptor2 = osd.getOutputDescriptor(outputStreamId2, mock(Serde.class));
MessageStream messageStream1 = appDesc.getInputStream(inputDescriptor1).map(m -> m);
MessageStream messageStream2 = appDesc.getInputStream(inputDescriptor2).filter(m -> true);
MessageStream messageStream3 = appDesc.getInputStream(inputDescriptor3).filter(m -> true).partitionBy(m -> "m", m -> m, mock(KVSerde.class), "p1").map(m -> m);
OutputStream<Object> outputStream1 = appDesc.getOutputStream(outputDescriptor1);
OutputStream<Object> outputStream2 = appDesc.getOutputStream(outputDescriptor2);
messageStream1.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(2), "j1").partitionBy(m -> "m", m -> m, mock(KVSerde.class), "p2").sendTo(outputStream1);
messageStream3.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2").sendTo(outputStream2);
}, config);
Multimap<SystemStream, SystemStream> outputToInput = OperatorImplGraph.getIntermediateToInputStreamsMap(graphSpec.getOperatorSpecGraph(), new StreamConfig(config));
Collection<SystemStream> inputs = outputToInput.get(new SystemStream(intSystem, intStreamId2));
assertEquals(inputs.size(), 2);
assertTrue(inputs.contains(new SystemStream(inputSystem, inputStreamId1)));
assertTrue(inputs.contains(new SystemStream(inputSystem, inputStreamId2)));
inputs = outputToInput.get(new SystemStream(intSystem, intStreamId1));
assertEquals(inputs.size(), 1);
assertEquals(inputs.iterator().next(), new SystemStream(inputSystem, inputStreamId3));
}
use of org.apache.samza.operators.MessageStream in project samza by apache.
the class TestPartitionByOperatorSpec method testPartitionByWithNoSerde.
@Test
public void testPartitionByWithNoSerde() {
MapFunction<Object, String> keyFn = m -> m.toString();
MapFunction<Object, Object> valueFn = m -> m;
StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(appDesc -> {
MessageStream inputStream = appDesc.getInputStream(testInputDescriptor);
inputStream.partitionBy(keyFn, valueFn, mock(KVSerde.class), testRepartitionedStreamName);
}, getConfig());
InputOperatorSpec inputOpSpec = streamAppDesc.getInputOperators().get(String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
assertNotNull(inputOpSpec);
assertNull(inputOpSpec.getKeySerde());
assertNull(inputOpSpec.getValueSerde());
assertTrue(inputOpSpec.isKeyed());
assertNull(inputOpSpec.getScheduledFn());
assertNull(inputOpSpec.getWatermarkFn());
InputOperatorSpec originInputSpec = streamAppDesc.getInputOperators().get(testInputDescriptor.getStreamId());
assertTrue(originInputSpec.getRegisteredOperatorSpecs().toArray()[0] instanceof PartitionByOperatorSpec);
PartitionByOperatorSpec reparOpSpec = (PartitionByOperatorSpec) originInputSpec.getRegisteredOperatorSpecs().toArray()[0];
assertEquals(reparOpSpec.getOpId(), String.format("%s-%s-partition_by-%s", testJobName, testJobId, testRepartitionedStreamName));
assertEquals(reparOpSpec.getKeyFunction(), keyFn);
assertEquals(reparOpSpec.getValueFunction(), valueFn);
assertEquals(reparOpSpec.getOutputStream().getStreamId(), reparOpSpec.getOpId());
assertNull(reparOpSpec.getScheduledFn());
assertNull(reparOpSpec.getWatermarkFn());
}
Aggregations