Search in sources :

Example 21 with OperatorSpec

use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.

the class StreamGraphImpl method doGetOperatorSpecs.

private void doGetOperatorSpecs(MessageStreamImpl stream, Set<OperatorSpec> specs) {
    Collection<OperatorSpec> registeredOperatorSpecs = stream.getRegisteredOperatorSpecs();
    for (OperatorSpec spec : registeredOperatorSpecs) {
        specs.add(spec);
        MessageStreamImpl nextStream = spec.getNextStream();
        if (nextStream != null) {
            //Recursively traverse and obtain all reachable operators
            doGetOperatorSpecs(nextStream, specs);
        }
    }
}
Also used : OperatorSpec(org.apache.samza.operators.spec.OperatorSpec)

Example 22 with OperatorSpec

use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.

the class ExecutionPlanner method calculateJoinInputPartitions.

/**
   * Calculate the partitions for the input streams of join operators
   */
/* package private */
static void calculateJoinInputPartitions(StreamGraphImpl streamGraph, JobGraph jobGraph) {
    // mapping from a source stream to all join specs reachable from it
    Multimap<OperatorSpec, StreamEdge> joinSpecToStreamEdges = HashMultimap.create();
    // reverse mapping of the above
    Multimap<StreamEdge, OperatorSpec> streamEdgeToJoinSpecs = HashMultimap.create();
    // Mapping from the output stream to the join spec. Since StreamGraph creates two partial join operators for a join and they
    // will have the same output stream, this mapping is used to choose one of them as the unique join spec representing this join
    // (who register first in the map wins).
    Map<MessageStream, OperatorSpec> outputStreamToJoinSpec = new HashMap<>();
    // A queue of joins with known input partitions
    Queue<OperatorSpec> joinQ = new LinkedList<>();
    // The visited set keeps track of the join specs that have been already inserted in the queue before
    Set<OperatorSpec> visited = new HashSet<>();
    streamGraph.getInputStreams().entrySet().forEach(entry -> {
        StreamEdge streamEdge = jobGraph.getOrCreateStreamEdge(entry.getKey());
        findReachableJoins(entry.getValue(), streamEdge, joinSpecToStreamEdges, streamEdgeToJoinSpecs, outputStreamToJoinSpec, joinQ, visited);
    });
    // At this point, joinQ contains joinSpecs where at least one of the input stream edge partitions is known.
    while (!joinQ.isEmpty()) {
        OperatorSpec join = joinQ.poll();
        int partitions = StreamEdge.PARTITIONS_UNKNOWN;
        // loop through the input streams to the join and find the partition count
        for (StreamEdge edge : joinSpecToStreamEdges.get(join)) {
            int edgePartitions = edge.getPartitionCount();
            if (edgePartitions != StreamEdge.PARTITIONS_UNKNOWN) {
                if (partitions == StreamEdge.PARTITIONS_UNKNOWN) {
                    //if the partition is not assigned
                    partitions = edgePartitions;
                } else if (partitions != edgePartitions) {
                    throw new SamzaException(String.format("Unable to resolve input partitions of stream %s for join. Expected: %d, Actual: %d", edge.getFormattedSystemStream(), partitions, edgePartitions));
                }
            }
        }
        // assign the partition count for intermediate streams
        for (StreamEdge edge : joinSpecToStreamEdges.get(join)) {
            if (edge.getPartitionCount() <= 0) {
                edge.setPartitionCount(partitions);
                // find other joins can be inferred by setting this edge
                for (OperatorSpec op : streamEdgeToJoinSpecs.get(edge)) {
                    if (!visited.contains(op)) {
                        joinQ.add(op);
                        visited.add(op);
                    }
                }
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) SamzaException(org.apache.samza.SamzaException) LinkedList(java.util.LinkedList) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) PartialJoinOperatorSpec(org.apache.samza.operators.spec.PartialJoinOperatorSpec) MessageStream(org.apache.samza.operators.MessageStream) HashSet(java.util.HashSet)

Example 23 with OperatorSpec

use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.

the class TestQueryTranslator method testTranslateGroupBy.

@Test
public void testTranslateGroupBy() {
    Map<String, String> config = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(configs, 10);
    String sql = "Insert into testavro.pageViewCountTopic(jobName, pageKey, `count`)" + " select 'SampleJob' as jobName, pv.pageKey, count(*) as `count`" + " from testavro.PAGEVIEW as pv" + " where pv.pageKey = 'job' or pv.pageKey = 'inbox'" + " group by (pv.pageKey)";
    config.put(SamzaSqlApplicationConfig.CFG_SQL_STMT, sql);
    Config samzaConfig = SamzaSqlApplicationRunner.computeSamzaConfigs(true, new MapConfig(config));
    List<String> sqlStmts = fetchSqlFromConfig(config);
    List<SamzaSqlQueryParser.QueryInfo> queryInfo = fetchQueryInfo(sqlStmts);
    SamzaSqlApplicationConfig samzaSqlApplicationConfig = new SamzaSqlApplicationConfig(new MapConfig(config), queryInfo.stream().map(SamzaSqlQueryParser.QueryInfo::getSources).flatMap(Collection::stream).collect(Collectors.toList()), queryInfo.stream().map(SamzaSqlQueryParser.QueryInfo::getSink).collect(Collectors.toList()));
    StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(streamApp -> {
    }, samzaConfig);
    QueryTranslator translator = new QueryTranslator(streamAppDesc, samzaSqlApplicationConfig);
    translator.translate(queryInfo.get(0), streamAppDesc, 0);
    OperatorSpecGraph specGraph = streamAppDesc.getOperatorSpecGraph();
    Assert.assertEquals(1, specGraph.getInputOperators().size());
    Assert.assertEquals(1, specGraph.getOutputStreams().size());
    assertTrue(specGraph.hasWindowOrJoins());
    Collection<OperatorSpec> operatorSpecs = specGraph.getAllOperatorSpecs();
}
Also used : SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) OperatorSpecGraph(org.apache.samza.operators.OperatorSpecGraph) StreamConfig(org.apache.samza.config.StreamConfig) MapConfig(org.apache.samza.config.MapConfig) SamzaSqlTestConfig(org.apache.samza.sql.util.SamzaSqlTestConfig) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) Config(org.apache.samza.config.Config) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) Collection(java.util.Collection) MapConfig(org.apache.samza.config.MapConfig) SamzaSqlQueryParser(org.apache.samza.sql.util.SamzaSqlQueryParser) Test(org.junit.Test)

Example 24 with OperatorSpec

use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.

the class TestMessageStreamImpl method testSink.

@Test
public void testSink() {
    StreamApplicationDescriptorImpl mockGraph = mock(StreamApplicationDescriptorImpl.class);
    OperatorSpec mockOpSpec = mock(OperatorSpec.class);
    MessageStreamImpl<TestMessageEnvelope> inputStream = new MessageStreamImpl<>(mockGraph, mockOpSpec);
    inputStream.sink(mock(SinkFunction.class));
    ArgumentCaptor<OperatorSpec> registeredOpCaptor = ArgumentCaptor.forClass(OperatorSpec.class);
    verify(mockOpSpec).registerNextOperatorSpec(registeredOpCaptor.capture());
    OperatorSpec<?, TestMessageEnvelope> registeredOpSpec = registeredOpCaptor.getValue();
    assertTrue(registeredOpSpec instanceof SinkOperatorSpec);
    assertNotNull(((SinkOperatorSpec) registeredOpSpec).getSinkFn());
    assertEquals(OpCode.SINK, registeredOpSpec.getOpCode());
}
Also used : StreamOperatorSpec(org.apache.samza.operators.spec.StreamOperatorSpec) PartitionByOperatorSpec(org.apache.samza.operators.spec.PartitionByOperatorSpec) JoinOperatorSpec(org.apache.samza.operators.spec.JoinOperatorSpec) SendToTableOperatorSpec(org.apache.samza.operators.spec.SendToTableOperatorSpec) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) SinkOperatorSpec(org.apache.samza.operators.spec.SinkOperatorSpec) OutputOperatorSpec(org.apache.samza.operators.spec.OutputOperatorSpec) WindowOperatorSpec(org.apache.samza.operators.spec.WindowOperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) IntermediateMessageStreamImpl(org.apache.samza.operators.stream.IntermediateMessageStreamImpl) TestMessageEnvelope(org.apache.samza.operators.data.TestMessageEnvelope) SinkFunction(org.apache.samza.operators.functions.SinkFunction) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) SinkOperatorSpec(org.apache.samza.operators.spec.SinkOperatorSpec) Test(org.junit.Test)

Example 25 with OperatorSpec

use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.

the class TestMessageStreamImpl method testMap.

@Test
public void testMap() {
    StreamApplicationDescriptorImpl mockGraph = mock(StreamApplicationDescriptorImpl.class);
    OperatorSpec mockOpSpec = mock(OperatorSpec.class);
    MessageStreamImpl<TestMessageEnvelope> inputStream = new MessageStreamImpl<>(mockGraph, mockOpSpec);
    MapFunction<TestMessageEnvelope, TestOutputMessageEnvelope> mockMapFn = mock(MapFunction.class);
    inputStream.map(mockMapFn);
    ArgumentCaptor<OperatorSpec> registeredOpCaptor = ArgumentCaptor.forClass(OperatorSpec.class);
    verify(mockOpSpec).registerNextOperatorSpec(registeredOpCaptor.capture());
    OperatorSpec<?, TestMessageEnvelope> registeredOpSpec = registeredOpCaptor.getValue();
    assertTrue(registeredOpSpec instanceof StreamOperatorSpec);
    FlatMapFunction transformFn = ((StreamOperatorSpec) registeredOpSpec).getTransformFn();
    assertNotNull(transformFn);
    assertEquals(OpCode.MAP, registeredOpSpec.getOpCode());
    TestOutputMessageEnvelope mockOutput = mock(TestOutputMessageEnvelope.class);
    when(mockMapFn.apply(anyObject())).thenReturn(mockOutput);
    assertTrue(transformFn.apply(new Object()).contains(mockOutput));
    when(mockMapFn.apply(anyObject())).thenReturn(null);
    assertTrue(transformFn.apply(null).isEmpty());
}
Also used : StreamOperatorSpec(org.apache.samza.operators.spec.StreamOperatorSpec) PartitionByOperatorSpec(org.apache.samza.operators.spec.PartitionByOperatorSpec) JoinOperatorSpec(org.apache.samza.operators.spec.JoinOperatorSpec) SendToTableOperatorSpec(org.apache.samza.operators.spec.SendToTableOperatorSpec) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) SinkOperatorSpec(org.apache.samza.operators.spec.SinkOperatorSpec) OutputOperatorSpec(org.apache.samza.operators.spec.OutputOperatorSpec) WindowOperatorSpec(org.apache.samza.operators.spec.WindowOperatorSpec) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) IntermediateMessageStreamImpl(org.apache.samza.operators.stream.IntermediateMessageStreamImpl) StreamOperatorSpec(org.apache.samza.operators.spec.StreamOperatorSpec) TestMessageEnvelope(org.apache.samza.operators.data.TestMessageEnvelope) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) FlatMapFunction(org.apache.samza.operators.functions.FlatMapFunction) TestOutputMessageEnvelope(org.apache.samza.operators.data.TestOutputMessageEnvelope) Matchers.anyObject(org.mockito.Matchers.anyObject) Test(org.junit.Test)

Aggregations

OperatorSpec (org.apache.samza.operators.spec.OperatorSpec)34 SinkOperatorSpec (org.apache.samza.operators.spec.SinkOperatorSpec)20 JoinOperatorSpec (org.apache.samza.operators.spec.JoinOperatorSpec)18 StreamOperatorSpec (org.apache.samza.operators.spec.StreamOperatorSpec)18 StreamTableJoinOperatorSpec (org.apache.samza.operators.spec.StreamTableJoinOperatorSpec)18 OutputOperatorSpec (org.apache.samza.operators.spec.OutputOperatorSpec)17 SendToTableOperatorSpec (org.apache.samza.operators.spec.SendToTableOperatorSpec)17 WindowOperatorSpec (org.apache.samza.operators.spec.WindowOperatorSpec)16 Test (org.junit.Test)16 TestMessageEnvelope (org.apache.samza.operators.data.TestMessageEnvelope)15 PartitionByOperatorSpec (org.apache.samza.operators.spec.PartitionByOperatorSpec)15 StreamApplicationDescriptorImpl (org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl)14 IntermediateMessageStreamImpl (org.apache.samza.operators.stream.IntermediateMessageStreamImpl)12 InputOperatorSpec (org.apache.samza.operators.spec.InputOperatorSpec)11 FlatMapFunction (org.apache.samza.operators.functions.FlatMapFunction)7 HashMap (java.util.HashMap)5 HashSet (java.util.HashSet)5 PartialJoinOperatorSpec (org.apache.samza.operators.spec.PartialJoinOperatorSpec)5 KVSerde (org.apache.samza.serializers.KVSerde)5 Collection (java.util.Collection)4