use of org.apache.samza.operators.MessageStream in project samza by apache.
the class KeyValueStoreExample method describe.
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
KafkaInputDescriptor<PageViewEvent> inputStreamDescriptor = trackingSystem.getInputDescriptor("pageViewEvent", new JsonSerdeV2<>(PageViewEvent.class));
KafkaOutputDescriptor<KV<String, StatsOutput>> outputStreamDescriptor = trackingSystem.getOutputDescriptor("pageViewEventPerMember", KVSerde.of(new StringSerde(), new JsonSerdeV2<>(StatsOutput.class)));
appDescriptor.withDefaultSystem(trackingSystem);
MessageStream<PageViewEvent> pageViewEvents = appDescriptor.getInputStream(inputStreamDescriptor);
OutputStream<KV<String, StatsOutput>> pageViewEventPerMember = appDescriptor.getOutputStream(outputStreamDescriptor);
pageViewEvents.partitionBy(pve -> pve.getMemberId(), pve -> pve, KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class)), "partitionBy").map(KV::getValue).flatMap(new MyStatsCounter()).map(stats -> KV.of(stats.memberId, stats)).sendTo(pageViewEventPerMember);
}
use of org.apache.samza.operators.MessageStream in project samza by apache.
the class TestExecutionPlanner method createStreamGraphWithJoin.
private StreamGraphImpl createStreamGraphWithJoin() {
/**
* the graph looks like the following. number of partitions in parentheses. quotes indicate expected value.
*
* input1 (64) -> map -> join -> output1 (8)
* |
* input2 (16) -> partitionBy ("64") -> filter -|
* |
* input3 (32) -> filter -> partitionBy ("64") -> map -> join -> output2 (16)
*
*/
StreamGraphImpl streamGraph = new StreamGraphImpl(runner, config);
BiFunction msgBuilder = mock(BiFunction.class);
MessageStream m1 = streamGraph.getInputStream("input1", msgBuilder).map(m -> m);
MessageStream m2 = streamGraph.getInputStream("input2", msgBuilder).partitionBy(m -> "haha").filter(m -> true);
MessageStream m3 = streamGraph.getInputStream("input3", msgBuilder).filter(m -> true).partitionBy(m -> "hehe").map(m -> m);
Function mockFn = mock(Function.class);
OutputStream<Object, Object, Object> output1 = streamGraph.getOutputStream("output1", mockFn, mockFn);
OutputStream<Object, Object, Object> output2 = streamGraph.getOutputStream("output2", mockFn, mockFn);
m1.join(m2, mock(JoinFunction.class), Duration.ofHours(2)).sendTo(output1);
m3.join(m2, mock(JoinFunction.class), Duration.ofHours(1)).sendTo(output2);
return streamGraph;
}
use of org.apache.samza.operators.MessageStream in project samza by apache.
the class TestExecutionPlanner method createStreamGraphWithJoinAndWindow.
private StreamGraphImpl createStreamGraphWithJoinAndWindow() {
StreamGraphImpl streamGraph = new StreamGraphImpl(runner, config);
BiFunction msgBuilder = mock(BiFunction.class);
MessageStream m1 = streamGraph.getInputStream("input1", msgBuilder).map(m -> m);
MessageStream m2 = streamGraph.getInputStream("input2", msgBuilder).partitionBy(m -> "haha").filter(m -> true);
MessageStream m3 = streamGraph.getInputStream("input3", msgBuilder).filter(m -> true).partitionBy(m -> "hehe").map(m -> m);
Function mockFn = mock(Function.class);
OutputStream<Object, Object, Object> output1 = streamGraph.getOutputStream("output1", mockFn, mockFn);
OutputStream<Object, Object, Object> output2 = streamGraph.getOutputStream("output2", mockFn, mockFn);
m1.map(m -> m).filter(m -> true).window(Windows.<Object, Object>keyedTumblingWindow(m -> m, Duration.ofMillis(8)));
m2.map(m -> m).filter(m -> true).window(Windows.<Object, Object>keyedTumblingWindow(m -> m, Duration.ofMillis(16)));
m1.join(m2, mock(JoinFunction.class), Duration.ofMillis(1600)).sendTo(output1);
m3.join(m2, mock(JoinFunction.class), Duration.ofMillis(100)).sendTo(output2);
m3.join(m2, mock(JoinFunction.class), Duration.ofMillis(252)).sendTo(output2);
return streamGraph;
}
use of org.apache.samza.operators.MessageStream in project samza by apache.
the class ExecutionPlanner method calculateJoinInputPartitions.
/**
* Calculate the partitions for the input streams of join operators
*/
/* package private */
static void calculateJoinInputPartitions(StreamGraphImpl streamGraph, JobGraph jobGraph) {
// mapping from a source stream to all join specs reachable from it
Multimap<OperatorSpec, StreamEdge> joinSpecToStreamEdges = HashMultimap.create();
// reverse mapping of the above
Multimap<StreamEdge, OperatorSpec> streamEdgeToJoinSpecs = HashMultimap.create();
// Mapping from the output stream to the join spec. Since StreamGraph creates two partial join operators for a join and they
// will have the same output stream, this mapping is used to choose one of them as the unique join spec representing this join
// (who register first in the map wins).
Map<MessageStream, OperatorSpec> outputStreamToJoinSpec = new HashMap<>();
// A queue of joins with known input partitions
Queue<OperatorSpec> joinQ = new LinkedList<>();
// The visited set keeps track of the join specs that have been already inserted in the queue before
Set<OperatorSpec> visited = new HashSet<>();
streamGraph.getInputStreams().entrySet().forEach(entry -> {
StreamEdge streamEdge = jobGraph.getOrCreateStreamEdge(entry.getKey());
findReachableJoins(entry.getValue(), streamEdge, joinSpecToStreamEdges, streamEdgeToJoinSpecs, outputStreamToJoinSpec, joinQ, visited);
});
// At this point, joinQ contains joinSpecs where at least one of the input stream edge partitions is known.
while (!joinQ.isEmpty()) {
OperatorSpec join = joinQ.poll();
int partitions = StreamEdge.PARTITIONS_UNKNOWN;
// loop through the input streams to the join and find the partition count
for (StreamEdge edge : joinSpecToStreamEdges.get(join)) {
int edgePartitions = edge.getPartitionCount();
if (edgePartitions != StreamEdge.PARTITIONS_UNKNOWN) {
if (partitions == StreamEdge.PARTITIONS_UNKNOWN) {
//if the partition is not assigned
partitions = edgePartitions;
} else if (partitions != edgePartitions) {
throw new SamzaException(String.format("Unable to resolve input partitions of stream %s for join. Expected: %d, Actual: %d", edge.getFormattedSystemStream(), partitions, edgePartitions));
}
}
}
// assign the partition count for intermediate streams
for (StreamEdge edge : joinSpecToStreamEdges.get(join)) {
if (edge.getPartitionCount() <= 0) {
edge.setPartitionCount(partitions);
// find other joins can be inferred by setting this edge
for (OperatorSpec op : streamEdgeToJoinSpecs.get(edge)) {
if (!visited.contains(op)) {
joinQ.add(op);
visited.add(op);
}
}
}
}
}
}
use of org.apache.samza.operators.MessageStream in project samza by apache.
the class BroadcastExample method describe.
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
KVSerde<String, PageViewEvent> serde = KVSerde.of(new StringSerde("UTF-8"), new JsonSerdeV2<>(PageViewEvent.class));
KafkaSystemDescriptor trackingSystem = new KafkaSystemDescriptor("tracking");
KafkaInputDescriptor<KV<String, PageViewEvent>> pageViewEvent = trackingSystem.getInputDescriptor("pageViewEvent", serde);
KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream1 = trackingSystem.getOutputDescriptor("outStream1", serde);
KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream2 = trackingSystem.getOutputDescriptor("outStream2", serde);
KafkaOutputDescriptor<KV<String, PageViewEvent>> outStream3 = trackingSystem.getOutputDescriptor("outStream3", serde);
MessageStream<KV<String, PageViewEvent>> inputStream = appDescriptor.getInputStream(pageViewEvent);
inputStream.filter(m -> m.key.equals("key1")).sendTo(appDescriptor.getOutputStream(outStream1));
inputStream.filter(m -> m.key.equals("key2")).sendTo(appDescriptor.getOutputStream(outStream2));
inputStream.filter(m -> m.key.equals("key3")).sendTo(appDescriptor.getOutputStream(outStream3));
}
Aggregations