use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.
the class TestExecutionPlanner method testCreateJobGraphForTaskApplication.
@Test
public void testCreateJobGraphForTaskApplication() {
TaskApplicationDescriptorImpl taskAppDesc = mock(TaskApplicationDescriptorImpl.class);
// add interemediate streams
String intermediateStream1 = "intermediate-stream1";
String intermediateBroadcast = "intermediate-broadcast1";
// intermediate stream1, not broadcast
GenericInputDescriptor<KV<Object, Object>> intermediateInput1 = system1Descriptor.getInputDescriptor(intermediateStream1, new KVSerde<>(new NoOpSerde(), new NoOpSerde()));
GenericOutputDescriptor<KV<Object, Object>> intermediateOutput1 = system1Descriptor.getOutputDescriptor(intermediateStream1, new KVSerde<>(new NoOpSerde(), new NoOpSerde()));
// intermediate stream2, broadcast
GenericInputDescriptor<KV<Object, Object>> intermediateBroacastInput1 = system1Descriptor.getInputDescriptor(intermediateBroadcast, new KVSerde<>(new NoOpSerde<>(), new NoOpSerde<>()));
GenericOutputDescriptor<KV<Object, Object>> intermediateBroacastOutput1 = system1Descriptor.getOutputDescriptor(intermediateBroadcast, new KVSerde<>(new NoOpSerde<>(), new NoOpSerde<>()));
inputDescriptors.put(intermediateStream1, intermediateInput1);
outputDescriptors.put(intermediateStream1, intermediateOutput1);
inputDescriptors.put(intermediateBroadcast, intermediateBroacastInput1);
outputDescriptors.put(intermediateBroadcast, intermediateBroacastOutput1);
Set<String> broadcastStreams = new HashSet<>();
broadcastStreams.add(intermediateBroadcast);
when(taskAppDesc.getInputDescriptors()).thenReturn(inputDescriptors);
when(taskAppDesc.getInputStreamIds()).thenReturn(inputDescriptors.keySet());
when(taskAppDesc.getOutputDescriptors()).thenReturn(outputDescriptors);
when(taskAppDesc.getOutputStreamIds()).thenReturn(outputDescriptors.keySet());
when(taskAppDesc.getTableDescriptors()).thenReturn(Collections.emptySet());
when(taskAppDesc.getSystemDescriptors()).thenReturn(systemDescriptors);
when(taskAppDesc.getIntermediateBroadcastStreamIds()).thenReturn(broadcastStreams);
doReturn(MockTaskApplication.class).when(taskAppDesc).getAppClass();
Map<String, String> systemStreamConfigs = new HashMap<>();
inputDescriptors.forEach((key, value) -> systemStreamConfigs.putAll(value.toConfig()));
outputDescriptors.forEach((key, value) -> systemStreamConfigs.putAll(value.toConfig()));
systemDescriptors.forEach(sd -> systemStreamConfigs.putAll(sd.toConfig()));
ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
JobGraph jobGraph = planner.createJobGraph(taskAppDesc);
assertEquals(1, jobGraph.getJobNodes().size());
assertTrue(jobGraph.getInputStreams().stream().map(edge -> edge.getName()).filter(streamId -> inputDescriptors.containsKey(streamId)).collect(Collectors.toList()).isEmpty());
Set<String> intermediateStreams = new HashSet<>(inputDescriptors.keySet());
jobGraph.getInputStreams().forEach(edge -> {
if (intermediateStreams.contains(edge.getStreamSpec().getId())) {
intermediateStreams.remove(edge.getStreamSpec().getId());
}
});
assertEquals(new HashSet<>(Arrays.asList(intermediateStream1, intermediateBroadcast)), intermediateStreams);
}
use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.
the class TestOperatorImplGraph method testBroadcastChain.
@Test
public void testBroadcastChain() {
String inputStreamId = "input";
String inputSystem = "input-system";
String inputPhysicalName = "input-stream";
HashMap<String, String> configMap = new HashMap<>();
configMap.put(JobConfig.JOB_NAME, "test-job");
configMap.put(JobConfig.JOB_ID, "1");
StreamTestUtils.addStreamConfigs(configMap, inputStreamId, inputSystem, inputPhysicalName);
Config config = new MapConfig(configMap);
when(this.context.getJobContext().getConfig()).thenReturn(config);
StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
GenericInputDescriptor inputDescriptor = sd.getInputDescriptor(inputStreamId, mock(Serde.class));
MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
inputStream.filter(mock(FilterFunction.class));
inputStream.map(mock(MapFunction.class));
}, config);
OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName));
assertEquals(2, inputOpImpl.registeredOperators.size());
assertTrue(inputOpImpl.registeredOperators.stream().anyMatch(opImpl -> ((OperatorImpl) opImpl).getOperatorSpec().getOpCode() == OpCode.FILTER));
assertTrue(inputOpImpl.registeredOperators.stream().anyMatch(opImpl -> ((OperatorImpl) opImpl).getOperatorSpec().getOpCode() == OpCode.MAP));
}
use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.
the class TestOperatorImplGraph method testGetOutputToInputStreams.
@Test
public void testGetOutputToInputStreams() {
String inputStreamId1 = "input1";
String inputStreamId2 = "input2";
String inputStreamId3 = "input3";
String inputSystem = "input-system";
String outputStreamId1 = "output1";
String outputStreamId2 = "output2";
String outputSystem = "output-system";
String intStreamId1 = "test-app-1-partition_by-p1";
String intStreamId2 = "test-app-1-partition_by-p2";
String intSystem = "test-system";
HashMap<String, String> configs = new HashMap<>();
configs.put(JobConfig.JOB_NAME, "test-app");
configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intSystem);
StreamTestUtils.addStreamConfigs(configs, inputStreamId1, inputSystem, inputStreamId1);
StreamTestUtils.addStreamConfigs(configs, inputStreamId2, inputSystem, inputStreamId2);
StreamTestUtils.addStreamConfigs(configs, inputStreamId3, inputSystem, inputStreamId3);
StreamTestUtils.addStreamConfigs(configs, outputStreamId1, outputSystem, outputStreamId1);
StreamTestUtils.addStreamConfigs(configs, outputStreamId2, outputSystem, outputStreamId2);
Config config = new MapConfig(configs);
when(this.context.getJobContext().getConfig()).thenReturn(config);
StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
GenericSystemDescriptor isd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
GenericInputDescriptor inputDescriptor1 = isd.getInputDescriptor(inputStreamId1, mock(Serde.class));
GenericInputDescriptor inputDescriptor2 = isd.getInputDescriptor(inputStreamId2, mock(Serde.class));
GenericInputDescriptor inputDescriptor3 = isd.getInputDescriptor(inputStreamId3, mock(Serde.class));
GenericSystemDescriptor osd = new GenericSystemDescriptor(outputSystem, "mockFactoryClass");
GenericOutputDescriptor outputDescriptor1 = osd.getOutputDescriptor(outputStreamId1, mock(Serde.class));
GenericOutputDescriptor outputDescriptor2 = osd.getOutputDescriptor(outputStreamId2, mock(Serde.class));
MessageStream messageStream1 = appDesc.getInputStream(inputDescriptor1).map(m -> m);
MessageStream messageStream2 = appDesc.getInputStream(inputDescriptor2).filter(m -> true);
MessageStream messageStream3 = appDesc.getInputStream(inputDescriptor3).filter(m -> true).partitionBy(m -> "m", m -> m, mock(KVSerde.class), "p1").map(m -> m);
OutputStream<Object> outputStream1 = appDesc.getOutputStream(outputDescriptor1);
OutputStream<Object> outputStream2 = appDesc.getOutputStream(outputDescriptor2);
messageStream1.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(2), "j1").partitionBy(m -> "m", m -> m, mock(KVSerde.class), "p2").sendTo(outputStream1);
messageStream3.join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2").sendTo(outputStream2);
}, config);
Multimap<SystemStream, SystemStream> outputToInput = OperatorImplGraph.getIntermediateToInputStreamsMap(graphSpec.getOperatorSpecGraph(), new StreamConfig(config));
Collection<SystemStream> inputs = outputToInput.get(new SystemStream(intSystem, intStreamId2));
assertEquals(inputs.size(), 2);
assertTrue(inputs.contains(new SystemStream(inputSystem, inputStreamId1)));
assertTrue(inputs.contains(new SystemStream(inputSystem, inputStreamId2)));
inputs = outputToInput.get(new SystemStream(intSystem, intStreamId1));
assertEquals(inputs.size(), 1);
assertEquals(inputs.iterator().next(), new SystemStream(inputSystem, inputStreamId3));
}
use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.
the class TestOperatorImplGraph method testLinearChain.
@Test
public void testLinearChain() {
String inputStreamId = "input";
String inputSystem = "input-system";
String inputPhysicalName = "input-stream";
String outputStreamId = "output";
String outputSystem = "output-system";
String outputPhysicalName = "output-stream";
String intermediateSystem = "intermediate-system";
HashMap<String, String> configs = new HashMap<>();
configs.put(JobConfig.JOB_NAME, "jobName");
configs.put(JobConfig.JOB_ID, "jobId");
configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intermediateSystem);
StreamTestUtils.addStreamConfigs(configs, inputStreamId, inputSystem, inputPhysicalName);
StreamTestUtils.addStreamConfigs(configs, outputStreamId, outputSystem, outputPhysicalName);
Config config = new MapConfig(configs);
when(this.context.getJobContext().getConfig()).thenReturn(config);
StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
GenericInputDescriptor inputDescriptor = sd.getInputDescriptor(inputStreamId, mock(Serde.class));
GenericOutputDescriptor outputDescriptor = sd.getOutputDescriptor(outputStreamId, mock(Serde.class));
MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
OutputStream<Object> outputStream = appDesc.getOutputStream(outputDescriptor);
inputStream.filter(mock(FilterFunction.class)).map(mock(MapFunction.class)).sendTo(outputStream);
}, config);
OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName));
assertEquals(1, inputOpImpl.registeredOperators.size());
OperatorImpl filterOpImpl = (FlatmapOperatorImpl) inputOpImpl.registeredOperators.iterator().next();
assertEquals(1, filterOpImpl.registeredOperators.size());
assertEquals(OpCode.FILTER, filterOpImpl.getOperatorSpec().getOpCode());
OperatorImpl mapOpImpl = (FlatmapOperatorImpl) filterOpImpl.registeredOperators.iterator().next();
assertEquals(1, mapOpImpl.registeredOperators.size());
assertEquals(OpCode.MAP, mapOpImpl.getOperatorSpec().getOpCode());
OperatorImpl sendToOpImpl = (OutputOperatorImpl) mapOpImpl.registeredOperators.iterator().next();
assertEquals(0, sendToOpImpl.registeredOperators.size());
assertEquals(OpCode.SEND_TO, sendToOpImpl.getOperatorSpec().getOpCode());
}
use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.
the class TestOperatorImplGraph method testOperatorGraphInitAndClose.
@Test
public void testOperatorGraphInitAndClose() {
String inputStreamId1 = "input1";
String inputStreamId2 = "input2";
String inputSystem = "input-system";
TaskName mockTaskName = mock(TaskName.class);
TaskModel taskModel = mock(TaskModel.class);
when(taskModel.getTaskName()).thenReturn(mockTaskName);
when(this.context.getTaskContext().getTaskModel()).thenReturn(taskModel);
StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
GenericInputDescriptor inputDescriptor1 = sd.getInputDescriptor(inputStreamId1, mock(Serde.class));
GenericInputDescriptor inputDescriptor2 = sd.getInputDescriptor(inputStreamId2, mock(Serde.class));
MessageStream<Object> inputStream1 = appDesc.getInputStream(inputDescriptor1);
MessageStream<Object> inputStream2 = appDesc.getInputStream(inputDescriptor2);
Function mapFn = (Function & Serializable) m -> m;
inputStream1.map(new TestMapFunction<Object, Object>("1", mapFn)).map(new TestMapFunction<Object, Object>("2", mapFn));
inputStream2.map(new TestMapFunction<Object, Object>("3", mapFn)).map(new TestMapFunction<Object, Object>("4", mapFn));
}, getConfig());
OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, SystemClock.instance());
List<String> initializedOperators = BaseTestFunction.getInitListByTaskName(mockTaskName);
// Assert that initialization occurs in topological order.
assertEquals(initializedOperators.get(0), "1");
assertEquals(initializedOperators.get(1), "2");
assertEquals(initializedOperators.get(2), "3");
assertEquals(initializedOperators.get(3), "4");
// Assert that finalization occurs in reverse topological order.
opImplGraph.close();
List<String> closedOperators = BaseTestFunction.getCloseListByTaskName(mockTaskName);
assertEquals(closedOperators.get(0), "4");
assertEquals(closedOperators.get(1), "3");
assertEquals(closedOperators.get(2), "2");
assertEquals(closedOperators.get(3), "1");
}
Aggregations