Search in sources :

Example 11 with OperatorSpec

use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.

the class TestMessageStreamImpl method testFlatMap.

@Test
public void testFlatMap() {
    MessageStreamImpl<TestMessageEnvelope> inputStream = new MessageStreamImpl<>(mockGraph);
    List<TestOutputMessageEnvelope> flatOuts = new ArrayList<TestOutputMessageEnvelope>() {

        {
            this.add(mock(TestOutputMessageEnvelope.class));
            this.add(mock(TestOutputMessageEnvelope.class));
            this.add(mock(TestOutputMessageEnvelope.class));
        }
    };
    final List<TestMessageEnvelope> inputMsgs = new ArrayList<>();
    FlatMapFunction<TestMessageEnvelope, TestOutputMessageEnvelope> xFlatMap = (TestMessageEnvelope message) -> {
        inputMsgs.add(message);
        return flatOuts;
    };
    MessageStream<TestOutputMessageEnvelope> outputStream = inputStream.flatMap(xFlatMap);
    Collection<OperatorSpec> subs = inputStream.getRegisteredOperatorSpecs();
    assertEquals(subs.size(), 1);
    OperatorSpec<TestOutputMessageEnvelope> flatMapOp = subs.iterator().next();
    assertTrue(flatMapOp instanceof StreamOperatorSpec);
    assertEquals(flatMapOp.getNextStream(), outputStream);
    assertEquals(((StreamOperatorSpec) flatMapOp).getTransformFn(), xFlatMap);
    TestMessageEnvelope mockInput = mock(TestMessageEnvelope.class);
    // assert that the transformation function is what we defined above
    List<TestOutputMessageEnvelope> result = (List<TestOutputMessageEnvelope>) ((StreamOperatorSpec<TestMessageEnvelope, TestOutputMessageEnvelope>) flatMapOp).getTransformFn().apply(mockInput);
    assertEquals(flatOuts, result);
    assertEquals(inputMsgs.size(), 1);
    assertEquals(inputMsgs.get(0), mockInput);
}
Also used : ArrayList(java.util.ArrayList) StreamOperatorSpec(org.apache.samza.operators.spec.StreamOperatorSpec) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) PartialJoinOperatorSpec(org.apache.samza.operators.spec.PartialJoinOperatorSpec) SinkOperatorSpec(org.apache.samza.operators.spec.SinkOperatorSpec) StreamOperatorSpec(org.apache.samza.operators.spec.StreamOperatorSpec) TestMessageEnvelope(org.apache.samza.operators.data.TestMessageEnvelope) TestOutputMessageEnvelope(org.apache.samza.operators.data.TestOutputMessageEnvelope) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Test(org.junit.Test)

Example 12 with OperatorSpec

use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.

the class ExecutionPlanner method calculateJoinInputPartitions.

/**
   * Calculate the partitions for the input streams of join operators
   */
/* package private */
static void calculateJoinInputPartitions(StreamGraphImpl streamGraph, JobGraph jobGraph) {
    // mapping from a source stream to all join specs reachable from it
    Multimap<OperatorSpec, StreamEdge> joinSpecToStreamEdges = HashMultimap.create();
    // reverse mapping of the above
    Multimap<StreamEdge, OperatorSpec> streamEdgeToJoinSpecs = HashMultimap.create();
    // Mapping from the output stream to the join spec. Since StreamGraph creates two partial join operators for a join and they
    // will have the same output stream, this mapping is used to choose one of them as the unique join spec representing this join
    // (who register first in the map wins).
    Map<MessageStream, OperatorSpec> outputStreamToJoinSpec = new HashMap<>();
    // A queue of joins with known input partitions
    Queue<OperatorSpec> joinQ = new LinkedList<>();
    // The visited set keeps track of the join specs that have been already inserted in the queue before
    Set<OperatorSpec> visited = new HashSet<>();
    streamGraph.getInputStreams().entrySet().forEach(entry -> {
        StreamEdge streamEdge = jobGraph.getOrCreateStreamEdge(entry.getKey());
        findReachableJoins(entry.getValue(), streamEdge, joinSpecToStreamEdges, streamEdgeToJoinSpecs, outputStreamToJoinSpec, joinQ, visited);
    });
    // At this point, joinQ contains joinSpecs where at least one of the input stream edge partitions is known.
    while (!joinQ.isEmpty()) {
        OperatorSpec join = joinQ.poll();
        int partitions = StreamEdge.PARTITIONS_UNKNOWN;
        // loop through the input streams to the join and find the partition count
        for (StreamEdge edge : joinSpecToStreamEdges.get(join)) {
            int edgePartitions = edge.getPartitionCount();
            if (edgePartitions != StreamEdge.PARTITIONS_UNKNOWN) {
                if (partitions == StreamEdge.PARTITIONS_UNKNOWN) {
                    //if the partition is not assigned
                    partitions = edgePartitions;
                } else if (partitions != edgePartitions) {
                    throw new SamzaException(String.format("Unable to resolve input partitions of stream %s for join. Expected: %d, Actual: %d", edge.getFormattedSystemStream(), partitions, edgePartitions));
                }
            }
        }
        // assign the partition count for intermediate streams
        for (StreamEdge edge : joinSpecToStreamEdges.get(join)) {
            if (edge.getPartitionCount() <= 0) {
                edge.setPartitionCount(partitions);
                // find other joins can be inferred by setting this edge
                for (OperatorSpec op : streamEdgeToJoinSpecs.get(edge)) {
                    if (!visited.contains(op)) {
                        joinQ.add(op);
                        visited.add(op);
                    }
                }
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) SamzaException(org.apache.samza.SamzaException) LinkedList(java.util.LinkedList) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) PartialJoinOperatorSpec(org.apache.samza.operators.spec.PartialJoinOperatorSpec) MessageStream(org.apache.samza.operators.MessageStream) HashSet(java.util.HashSet)

Example 13 with OperatorSpec

use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.

the class StreamGraphImpl method getAllOperatorSpecs.

/**
   * Get all {@link OperatorSpec}s available in this {@link StreamGraphImpl}
   *
   * @return  a set of all available {@link OperatorSpec}s
   */
public Collection<OperatorSpec> getAllOperatorSpecs() {
    Collection<InputStreamInternal> inputStreams = inStreams.values();
    Set<OperatorSpec> operatorSpecs = new HashSet<>();
    for (InputStreamInternal stream : inputStreams) {
        doGetOperatorSpecs((MessageStreamImpl) stream, operatorSpecs);
    }
    return operatorSpecs;
}
Also used : OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) InputStreamInternal(org.apache.samza.operators.stream.InputStreamInternal) HashSet(java.util.HashSet)

Example 14 with OperatorSpec

use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.

the class OperatorImplGraph method createAndRegisterOperatorImpl.

/**
   * Helper method to recursively traverse the {@link OperatorSpec} DAG and instantiate and link the corresponding
   * {@link OperatorImpl}s.
   *
   * @param operatorSpec  the operatorSpec to create the {@link OperatorImpl} for
   * @param config  the {@link Config} required to instantiate operators
   * @param context  the {@link TaskContext} required to instantiate operators
   * @param <M>  type of input message
   * @return  the operator implementation for the operatorSpec
   */
private <M> OperatorImpl<M, ?> createAndRegisterOperatorImpl(OperatorSpec operatorSpec, Config config, TaskContext context) {
    if (!operatorImpls.containsKey(operatorSpec)) {
        OperatorImpl<M, ?> operatorImpl = createOperatorImpl(operatorSpec, config, context);
        if (operatorImpls.putIfAbsent(operatorSpec, operatorImpl) == null) {
            // this is the first time we've added the operatorImpl corresponding to the operatorSpec,
            // so traverse and initialize and register the rest of the DAG.
            // initialize the corresponding operator function
            operatorImpl.init(config, context);
            MessageStreamImpl nextStream = operatorSpec.getNextStream();
            if (nextStream != null) {
                Collection<OperatorSpec> registeredSpecs = nextStream.getRegisteredOperatorSpecs();
                registeredSpecs.forEach(registeredSpec -> {
                    OperatorImpl subImpl = createAndRegisterOperatorImpl(registeredSpec, config, context);
                    operatorImpl.registerNextOperator(subImpl);
                });
            }
            return operatorImpl;
        }
    }
    // and registered, so we do not need to traverse the DAG further.
    return operatorImpls.get(operatorSpec);
}
Also used : StreamOperatorSpec(org.apache.samza.operators.spec.StreamOperatorSpec) WindowOperatorSpec(org.apache.samza.operators.spec.WindowOperatorSpec) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) PartialJoinOperatorSpec(org.apache.samza.operators.spec.PartialJoinOperatorSpec) SinkOperatorSpec(org.apache.samza.operators.spec.SinkOperatorSpec) MessageStreamImpl(org.apache.samza.operators.MessageStreamImpl)

Example 15 with OperatorSpec

use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.

the class StreamGraphImpl method doGetOperatorSpecs.

private void doGetOperatorSpecs(MessageStreamImpl stream, Set<OperatorSpec> specs) {
    Collection<OperatorSpec> registeredOperatorSpecs = stream.getRegisteredOperatorSpecs();
    for (OperatorSpec spec : registeredOperatorSpecs) {
        specs.add(spec);
        MessageStreamImpl nextStream = spec.getNextStream();
        if (nextStream != null) {
            //Recursively traverse and obtain all reachable operators
            doGetOperatorSpecs(nextStream, specs);
        }
    }
}
Also used : OperatorSpec(org.apache.samza.operators.spec.OperatorSpec)

Aggregations

OperatorSpec (org.apache.samza.operators.spec.OperatorSpec)15 PartialJoinOperatorSpec (org.apache.samza.operators.spec.PartialJoinOperatorSpec)12 SinkOperatorSpec (org.apache.samza.operators.spec.SinkOperatorSpec)11 StreamOperatorSpec (org.apache.samza.operators.spec.StreamOperatorSpec)9 TestMessageEnvelope (org.apache.samza.operators.data.TestMessageEnvelope)8 Test (org.junit.Test)7 ArrayList (java.util.ArrayList)5 TestOutputMessageEnvelope (org.apache.samza.operators.data.TestOutputMessageEnvelope)5 MessageType (org.apache.samza.operators.data.MessageType)4 ImmutableList (com.google.common.collect.ImmutableList)3 HashMap (java.util.HashMap)3 List (java.util.List)3 HashSet (java.util.HashSet)2 Config (org.apache.samza.config.Config)2 JobConfig (org.apache.samza.config.JobConfig)2 MapConfig (org.apache.samza.config.MapConfig)2 MessageStream (org.apache.samza.operators.MessageStream)2 MessageStreamImpl (org.apache.samza.operators.MessageStreamImpl)2 TestExtOutputMessageEnvelope (org.apache.samza.operators.data.TestExtOutputMessageEnvelope)2 TestInputMessageEnvelope (org.apache.samza.operators.data.TestInputMessageEnvelope)2