use of org.apache.samza.serializers.KVSerde in project samza by apache.
the class TestExecutionPlanner method testCreateJobGraphForTaskApplication.
@Test
public void testCreateJobGraphForTaskApplication() {
TaskApplicationDescriptorImpl taskAppDesc = mock(TaskApplicationDescriptorImpl.class);
// add interemediate streams
String intermediateStream1 = "intermediate-stream1";
String intermediateBroadcast = "intermediate-broadcast1";
// intermediate stream1, not broadcast
GenericInputDescriptor<KV<Object, Object>> intermediateInput1 = system1Descriptor.getInputDescriptor(intermediateStream1, new KVSerde<>(new NoOpSerde(), new NoOpSerde()));
GenericOutputDescriptor<KV<Object, Object>> intermediateOutput1 = system1Descriptor.getOutputDescriptor(intermediateStream1, new KVSerde<>(new NoOpSerde(), new NoOpSerde()));
// intermediate stream2, broadcast
GenericInputDescriptor<KV<Object, Object>> intermediateBroacastInput1 = system1Descriptor.getInputDescriptor(intermediateBroadcast, new KVSerde<>(new NoOpSerde<>(), new NoOpSerde<>()));
GenericOutputDescriptor<KV<Object, Object>> intermediateBroacastOutput1 = system1Descriptor.getOutputDescriptor(intermediateBroadcast, new KVSerde<>(new NoOpSerde<>(), new NoOpSerde<>()));
inputDescriptors.put(intermediateStream1, intermediateInput1);
outputDescriptors.put(intermediateStream1, intermediateOutput1);
inputDescriptors.put(intermediateBroadcast, intermediateBroacastInput1);
outputDescriptors.put(intermediateBroadcast, intermediateBroacastOutput1);
Set<String> broadcastStreams = new HashSet<>();
broadcastStreams.add(intermediateBroadcast);
when(taskAppDesc.getInputDescriptors()).thenReturn(inputDescriptors);
when(taskAppDesc.getInputStreamIds()).thenReturn(inputDescriptors.keySet());
when(taskAppDesc.getOutputDescriptors()).thenReturn(outputDescriptors);
when(taskAppDesc.getOutputStreamIds()).thenReturn(outputDescriptors.keySet());
when(taskAppDesc.getTableDescriptors()).thenReturn(Collections.emptySet());
when(taskAppDesc.getSystemDescriptors()).thenReturn(systemDescriptors);
when(taskAppDesc.getIntermediateBroadcastStreamIds()).thenReturn(broadcastStreams);
doReturn(MockTaskApplication.class).when(taskAppDesc).getAppClass();
Map<String, String> systemStreamConfigs = new HashMap<>();
inputDescriptors.forEach((key, value) -> systemStreamConfigs.putAll(value.toConfig()));
outputDescriptors.forEach((key, value) -> systemStreamConfigs.putAll(value.toConfig()));
systemDescriptors.forEach(sd -> systemStreamConfigs.putAll(sd.toConfig()));
ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
JobGraph jobGraph = planner.createJobGraph(taskAppDesc);
assertEquals(1, jobGraph.getJobNodes().size());
assertTrue(jobGraph.getInputStreams().stream().map(edge -> edge.getName()).filter(streamId -> inputDescriptors.containsKey(streamId)).collect(Collectors.toList()).isEmpty());
Set<String> intermediateStreams = new HashSet<>(inputDescriptors.keySet());
jobGraph.getInputStreams().forEach(edge -> {
if (intermediateStreams.contains(edge.getStreamSpec().getId())) {
intermediateStreams.remove(edge.getStreamSpec().getId());
}
});
assertEquals(new HashSet<>(Arrays.asList(intermediateStream1, intermediateBroadcast)), intermediateStreams);
}
use of org.apache.samza.serializers.KVSerde in project samza by apache.
the class TestExecutionPlanner method createStreamGraphWithInvalidStreamTableJoinWithSideInputs.
private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamTableJoinWithSideInputs() {
/**
* Example stream-table join that is invalid due to disagreement in partition count between the
* stream behind table t and another joined stream. Table t is configured with input2 (16) as
* side-input stream.
*
* join-table t -> output1 (8)
* |
* input1 (64) —————————
*/
return new StreamApplicationDescriptorImpl(appDesc -> {
MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table-id", new KVSerde(new StringSerde(), new StringSerde())).withSideInputs(Arrays.asList("input2")).withSideInputsProcessor(mock(SideInputsProcessor.class));
Table table = appDesc.getTable(tableDescriptor);
messageStream1.join(table, mock(StreamTableJoinFunction.class)).sendTo(output1);
}, config);
}
use of org.apache.samza.serializers.KVSerde in project samza by apache.
the class TestJoinOperator method createStreamOperatorTask.
private StreamOperatorTask createStreamOperatorTask(Clock clock, StreamApplicationDescriptorImpl graphSpec) throws Exception {
Map<String, String> mapConfig = new HashMap<>();
mapConfig.put("job.name", "jobName");
mapConfig.put("job.id", "jobId");
StreamTestUtils.addStreamConfigs(mapConfig, "inStream", "insystem", "instream");
StreamTestUtils.addStreamConfigs(mapConfig, "inStream2", "insystem", "instream2");
Context context = new MockContext(new MapConfig(mapConfig));
TaskModel taskModel = mock(TaskModel.class);
when(taskModel.getSystemStreamPartitions()).thenReturn(ImmutableSet.of(new SystemStreamPartition("insystem", "instream", new Partition(0)), new SystemStreamPartition("insystem", "instream2", new Partition(0))));
when(context.getTaskContext().getTaskModel()).thenReturn(taskModel);
when(context.getTaskContext().getTaskMetricsRegistry()).thenReturn(new MetricsRegistryMap());
when(context.getContainerContext().getContainerMetricsRegistry()).thenReturn(new MetricsRegistryMap());
// need to return different stores for left and right side
IntegerSerde integerSerde = new IntegerSerde();
TimestampedValueSerde timestampedValueSerde = new TimestampedValueSerde(new KVSerde(integerSerde, integerSerde));
when(context.getTaskContext().getStore(eq("jobName-jobId-join-j1-L"))).thenReturn(new TestInMemoryStore(integerSerde, timestampedValueSerde));
when(context.getTaskContext().getStore(eq("jobName-jobId-join-j1-R"))).thenReturn(new TestInMemoryStore(integerSerde, timestampedValueSerde));
StreamOperatorTask sot = new StreamOperatorTask(graphSpec.getOperatorSpecGraph(), clock);
sot.init(context);
return sot;
}
use of org.apache.samza.serializers.KVSerde in project samza by apache.
the class TestMessageStreamImpl method testPartitionBy.
@Test
public void testPartitionBy() throws IOException {
StreamApplicationDescriptorImpl mockGraph = mock(StreamApplicationDescriptorImpl.class);
OperatorSpec mockOpSpec = mock(OperatorSpec.class);
String mockOpName = "mockName";
when(mockGraph.getNextOpId(anyObject(), anyObject())).thenReturn(mockOpName);
OutputStreamImpl mockOutputStreamImpl = mock(OutputStreamImpl.class);
KVSerde mockKVSerde = mock(KVSerde.class);
IntermediateMessageStreamImpl mockIntermediateStream = mock(IntermediateMessageStreamImpl.class);
when(mockGraph.getIntermediateStream(eq(mockOpName), eq(mockKVSerde), eq(false))).thenReturn(mockIntermediateStream);
when(mockIntermediateStream.getOutputStream()).thenReturn(mockOutputStreamImpl);
when(mockIntermediateStream.isKeyed()).thenReturn(true);
MessageStreamImpl<TestMessageEnvelope> inputStream = new MessageStreamImpl<>(mockGraph, mockOpSpec);
MapFunction mockKeyFunction = mock(MapFunction.class);
MapFunction mockValueFunction = mock(MapFunction.class);
inputStream.partitionBy(mockKeyFunction, mockValueFunction, mockKVSerde, "p1");
ArgumentCaptor<OperatorSpec> registeredOpCaptor = ArgumentCaptor.forClass(OperatorSpec.class);
verify(mockOpSpec).registerNextOperatorSpec(registeredOpCaptor.capture());
OperatorSpec<?, TestMessageEnvelope> registeredOpSpec = registeredOpCaptor.getValue();
assertTrue(registeredOpSpec instanceof PartitionByOperatorSpec);
assertEquals(OpCode.PARTITION_BY, registeredOpSpec.getOpCode());
assertEquals(mockOutputStreamImpl, ((PartitionByOperatorSpec) registeredOpSpec).getOutputStream());
assertEquals(mockKeyFunction, ((PartitionByOperatorSpec) registeredOpSpec).getKeyFunction());
assertEquals(mockValueFunction, ((PartitionByOperatorSpec) registeredOpSpec).getValueFunction());
}
use of org.apache.samza.serializers.KVSerde in project samza by apache.
the class QueryTranslator method sendToOutputStream.
private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
if (!tableDescriptor.isPresent()) {
KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
String systemName = sinkConfig.getSystemName();
DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
outputStream.sendTo(stm);
// Process system events only if the output is a stream.
if (sqlConfig.isProcessSystemEvents()) {
for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
MessageStream<KV<Object, Object>> systemEventStream = inputStream.filter(message -> message.getMetadata().isSystemMessage()).map(SamzaSqlInputMessage::getKeyAndMessageKV);
systemEventStream.sendTo(stm);
}
}
} else {
Table outputTable = appDesc.getTable(tableDescriptor.get());
if (outputTable == null) {
String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
throw new SamzaException(msg);
}
outputStream.sendTo(outputTable);
}
}
Aggregations