use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.
the class TestMessageStreamImpl method testFlatMap.
@Test
public void testFlatMap() {
MessageStreamImpl<TestMessageEnvelope> inputStream = new MessageStreamImpl<>(mockGraph);
List<TestOutputMessageEnvelope> flatOuts = new ArrayList<TestOutputMessageEnvelope>() {
{
this.add(mock(TestOutputMessageEnvelope.class));
this.add(mock(TestOutputMessageEnvelope.class));
this.add(mock(TestOutputMessageEnvelope.class));
}
};
final List<TestMessageEnvelope> inputMsgs = new ArrayList<>();
FlatMapFunction<TestMessageEnvelope, TestOutputMessageEnvelope> xFlatMap = (TestMessageEnvelope message) -> {
inputMsgs.add(message);
return flatOuts;
};
MessageStream<TestOutputMessageEnvelope> outputStream = inputStream.flatMap(xFlatMap);
Collection<OperatorSpec> subs = inputStream.getRegisteredOperatorSpecs();
assertEquals(subs.size(), 1);
OperatorSpec<TestOutputMessageEnvelope> flatMapOp = subs.iterator().next();
assertTrue(flatMapOp instanceof StreamOperatorSpec);
assertEquals(flatMapOp.getNextStream(), outputStream);
assertEquals(((StreamOperatorSpec) flatMapOp).getTransformFn(), xFlatMap);
TestMessageEnvelope mockInput = mock(TestMessageEnvelope.class);
// assert that the transformation function is what we defined above
List<TestOutputMessageEnvelope> result = (List<TestOutputMessageEnvelope>) ((StreamOperatorSpec<TestMessageEnvelope, TestOutputMessageEnvelope>) flatMapOp).getTransformFn().apply(mockInput);
assertEquals(flatOuts, result);
assertEquals(inputMsgs.size(), 1);
assertEquals(inputMsgs.get(0), mockInput);
}
use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.
the class ExecutionPlanner method calculateJoinInputPartitions.
/**
* Calculate the partitions for the input streams of join operators
*/
/* package private */
static void calculateJoinInputPartitions(StreamGraphImpl streamGraph, JobGraph jobGraph) {
// mapping from a source stream to all join specs reachable from it
Multimap<OperatorSpec, StreamEdge> joinSpecToStreamEdges = HashMultimap.create();
// reverse mapping of the above
Multimap<StreamEdge, OperatorSpec> streamEdgeToJoinSpecs = HashMultimap.create();
// Mapping from the output stream to the join spec. Since StreamGraph creates two partial join operators for a join and they
// will have the same output stream, this mapping is used to choose one of them as the unique join spec representing this join
// (who register first in the map wins).
Map<MessageStream, OperatorSpec> outputStreamToJoinSpec = new HashMap<>();
// A queue of joins with known input partitions
Queue<OperatorSpec> joinQ = new LinkedList<>();
// The visited set keeps track of the join specs that have been already inserted in the queue before
Set<OperatorSpec> visited = new HashSet<>();
streamGraph.getInputStreams().entrySet().forEach(entry -> {
StreamEdge streamEdge = jobGraph.getOrCreateStreamEdge(entry.getKey());
findReachableJoins(entry.getValue(), streamEdge, joinSpecToStreamEdges, streamEdgeToJoinSpecs, outputStreamToJoinSpec, joinQ, visited);
});
// At this point, joinQ contains joinSpecs where at least one of the input stream edge partitions is known.
while (!joinQ.isEmpty()) {
OperatorSpec join = joinQ.poll();
int partitions = StreamEdge.PARTITIONS_UNKNOWN;
// loop through the input streams to the join and find the partition count
for (StreamEdge edge : joinSpecToStreamEdges.get(join)) {
int edgePartitions = edge.getPartitionCount();
if (edgePartitions != StreamEdge.PARTITIONS_UNKNOWN) {
if (partitions == StreamEdge.PARTITIONS_UNKNOWN) {
//if the partition is not assigned
partitions = edgePartitions;
} else if (partitions != edgePartitions) {
throw new SamzaException(String.format("Unable to resolve input partitions of stream %s for join. Expected: %d, Actual: %d", edge.getFormattedSystemStream(), partitions, edgePartitions));
}
}
}
// assign the partition count for intermediate streams
for (StreamEdge edge : joinSpecToStreamEdges.get(join)) {
if (edge.getPartitionCount() <= 0) {
edge.setPartitionCount(partitions);
// find other joins can be inferred by setting this edge
for (OperatorSpec op : streamEdgeToJoinSpecs.get(edge)) {
if (!visited.contains(op)) {
joinQ.add(op);
visited.add(op);
}
}
}
}
}
}
use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.
the class StreamGraphImpl method getAllOperatorSpecs.
/**
* Get all {@link OperatorSpec}s available in this {@link StreamGraphImpl}
*
* @return a set of all available {@link OperatorSpec}s
*/
public Collection<OperatorSpec> getAllOperatorSpecs() {
Collection<InputStreamInternal> inputStreams = inStreams.values();
Set<OperatorSpec> operatorSpecs = new HashSet<>();
for (InputStreamInternal stream : inputStreams) {
doGetOperatorSpecs((MessageStreamImpl) stream, operatorSpecs);
}
return operatorSpecs;
}
use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.
the class OperatorImplGraph method createAndRegisterOperatorImpl.
/**
* Helper method to recursively traverse the {@link OperatorSpec} DAG and instantiate and link the corresponding
* {@link OperatorImpl}s.
*
* @param operatorSpec the operatorSpec to create the {@link OperatorImpl} for
* @param config the {@link Config} required to instantiate operators
* @param context the {@link TaskContext} required to instantiate operators
* @param <M> type of input message
* @return the operator implementation for the operatorSpec
*/
private <M> OperatorImpl<M, ?> createAndRegisterOperatorImpl(OperatorSpec operatorSpec, Config config, TaskContext context) {
if (!operatorImpls.containsKey(operatorSpec)) {
OperatorImpl<M, ?> operatorImpl = createOperatorImpl(operatorSpec, config, context);
if (operatorImpls.putIfAbsent(operatorSpec, operatorImpl) == null) {
// this is the first time we've added the operatorImpl corresponding to the operatorSpec,
// so traverse and initialize and register the rest of the DAG.
// initialize the corresponding operator function
operatorImpl.init(config, context);
MessageStreamImpl nextStream = operatorSpec.getNextStream();
if (nextStream != null) {
Collection<OperatorSpec> registeredSpecs = nextStream.getRegisteredOperatorSpecs();
registeredSpecs.forEach(registeredSpec -> {
OperatorImpl subImpl = createAndRegisterOperatorImpl(registeredSpec, config, context);
operatorImpl.registerNextOperator(subImpl);
});
}
return operatorImpl;
}
}
// and registered, so we do not need to traverse the DAG further.
return operatorImpls.get(operatorSpec);
}
use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.
the class StreamGraphImpl method doGetOperatorSpecs.
private void doGetOperatorSpecs(MessageStreamImpl stream, Set<OperatorSpec> specs) {
Collection<OperatorSpec> registeredOperatorSpecs = stream.getRegisteredOperatorSpecs();
for (OperatorSpec spec : registeredOperatorSpecs) {
specs.add(spec);
MessageStreamImpl nextStream = spec.getNextStream();
if (nextStream != null) {
//Recursively traverse and obtain all reachable operators
doGetOperatorSpecs(nextStream, specs);
}
}
}
Aggregations