use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.
the class StreamGraphImpl method doGetOperatorSpecs.
private void doGetOperatorSpecs(MessageStreamImpl stream, Set<OperatorSpec> specs) {
Collection<OperatorSpec> registeredOperatorSpecs = stream.getRegisteredOperatorSpecs();
for (OperatorSpec spec : registeredOperatorSpecs) {
specs.add(spec);
MessageStreamImpl nextStream = spec.getNextStream();
if (nextStream != null) {
//Recursively traverse and obtain all reachable operators
doGetOperatorSpecs(nextStream, specs);
}
}
}
use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.
the class ExecutionPlanner method calculateJoinInputPartitions.
/**
* Calculate the partitions for the input streams of join operators
*/
/* package private */
static void calculateJoinInputPartitions(StreamGraphImpl streamGraph, JobGraph jobGraph) {
// mapping from a source stream to all join specs reachable from it
Multimap<OperatorSpec, StreamEdge> joinSpecToStreamEdges = HashMultimap.create();
// reverse mapping of the above
Multimap<StreamEdge, OperatorSpec> streamEdgeToJoinSpecs = HashMultimap.create();
// Mapping from the output stream to the join spec. Since StreamGraph creates two partial join operators for a join and they
// will have the same output stream, this mapping is used to choose one of them as the unique join spec representing this join
// (who register first in the map wins).
Map<MessageStream, OperatorSpec> outputStreamToJoinSpec = new HashMap<>();
// A queue of joins with known input partitions
Queue<OperatorSpec> joinQ = new LinkedList<>();
// The visited set keeps track of the join specs that have been already inserted in the queue before
Set<OperatorSpec> visited = new HashSet<>();
streamGraph.getInputStreams().entrySet().forEach(entry -> {
StreamEdge streamEdge = jobGraph.getOrCreateStreamEdge(entry.getKey());
findReachableJoins(entry.getValue(), streamEdge, joinSpecToStreamEdges, streamEdgeToJoinSpecs, outputStreamToJoinSpec, joinQ, visited);
});
// At this point, joinQ contains joinSpecs where at least one of the input stream edge partitions is known.
while (!joinQ.isEmpty()) {
OperatorSpec join = joinQ.poll();
int partitions = StreamEdge.PARTITIONS_UNKNOWN;
// loop through the input streams to the join and find the partition count
for (StreamEdge edge : joinSpecToStreamEdges.get(join)) {
int edgePartitions = edge.getPartitionCount();
if (edgePartitions != StreamEdge.PARTITIONS_UNKNOWN) {
if (partitions == StreamEdge.PARTITIONS_UNKNOWN) {
//if the partition is not assigned
partitions = edgePartitions;
} else if (partitions != edgePartitions) {
throw new SamzaException(String.format("Unable to resolve input partitions of stream %s for join. Expected: %d, Actual: %d", edge.getFormattedSystemStream(), partitions, edgePartitions));
}
}
}
// assign the partition count for intermediate streams
for (StreamEdge edge : joinSpecToStreamEdges.get(join)) {
if (edge.getPartitionCount() <= 0) {
edge.setPartitionCount(partitions);
// find other joins can be inferred by setting this edge
for (OperatorSpec op : streamEdgeToJoinSpecs.get(edge)) {
if (!visited.contains(op)) {
joinQ.add(op);
visited.add(op);
}
}
}
}
}
}
use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.
the class TestQueryTranslator method testTranslateGroupBy.
@Test
public void testTranslateGroupBy() {
Map<String, String> config = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(configs, 10);
String sql = "Insert into testavro.pageViewCountTopic(jobName, pageKey, `count`)" + " select 'SampleJob' as jobName, pv.pageKey, count(*) as `count`" + " from testavro.PAGEVIEW as pv" + " where pv.pageKey = 'job' or pv.pageKey = 'inbox'" + " group by (pv.pageKey)";
config.put(SamzaSqlApplicationConfig.CFG_SQL_STMT, sql);
Config samzaConfig = SamzaSqlApplicationRunner.computeSamzaConfigs(true, new MapConfig(config));
List<String> sqlStmts = fetchSqlFromConfig(config);
List<SamzaSqlQueryParser.QueryInfo> queryInfo = fetchQueryInfo(sqlStmts);
SamzaSqlApplicationConfig samzaSqlApplicationConfig = new SamzaSqlApplicationConfig(new MapConfig(config), queryInfo.stream().map(SamzaSqlQueryParser.QueryInfo::getSources).flatMap(Collection::stream).collect(Collectors.toList()), queryInfo.stream().map(SamzaSqlQueryParser.QueryInfo::getSink).collect(Collectors.toList()));
StreamApplicationDescriptorImpl streamAppDesc = new StreamApplicationDescriptorImpl(streamApp -> {
}, samzaConfig);
QueryTranslator translator = new QueryTranslator(streamAppDesc, samzaSqlApplicationConfig);
translator.translate(queryInfo.get(0), streamAppDesc, 0);
OperatorSpecGraph specGraph = streamAppDesc.getOperatorSpecGraph();
Assert.assertEquals(1, specGraph.getInputOperators().size());
Assert.assertEquals(1, specGraph.getOutputStreams().size());
assertTrue(specGraph.hasWindowOrJoins());
Collection<OperatorSpec> operatorSpecs = specGraph.getAllOperatorSpecs();
}
use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.
the class TestMessageStreamImpl method testSink.
@Test
public void testSink() {
StreamApplicationDescriptorImpl mockGraph = mock(StreamApplicationDescriptorImpl.class);
OperatorSpec mockOpSpec = mock(OperatorSpec.class);
MessageStreamImpl<TestMessageEnvelope> inputStream = new MessageStreamImpl<>(mockGraph, mockOpSpec);
inputStream.sink(mock(SinkFunction.class));
ArgumentCaptor<OperatorSpec> registeredOpCaptor = ArgumentCaptor.forClass(OperatorSpec.class);
verify(mockOpSpec).registerNextOperatorSpec(registeredOpCaptor.capture());
OperatorSpec<?, TestMessageEnvelope> registeredOpSpec = registeredOpCaptor.getValue();
assertTrue(registeredOpSpec instanceof SinkOperatorSpec);
assertNotNull(((SinkOperatorSpec) registeredOpSpec).getSinkFn());
assertEquals(OpCode.SINK, registeredOpSpec.getOpCode());
}
use of org.apache.samza.operators.spec.OperatorSpec in project samza by apache.
the class TestMessageStreamImpl method testMap.
@Test
public void testMap() {
StreamApplicationDescriptorImpl mockGraph = mock(StreamApplicationDescriptorImpl.class);
OperatorSpec mockOpSpec = mock(OperatorSpec.class);
MessageStreamImpl<TestMessageEnvelope> inputStream = new MessageStreamImpl<>(mockGraph, mockOpSpec);
MapFunction<TestMessageEnvelope, TestOutputMessageEnvelope> mockMapFn = mock(MapFunction.class);
inputStream.map(mockMapFn);
ArgumentCaptor<OperatorSpec> registeredOpCaptor = ArgumentCaptor.forClass(OperatorSpec.class);
verify(mockOpSpec).registerNextOperatorSpec(registeredOpCaptor.capture());
OperatorSpec<?, TestMessageEnvelope> registeredOpSpec = registeredOpCaptor.getValue();
assertTrue(registeredOpSpec instanceof StreamOperatorSpec);
FlatMapFunction transformFn = ((StreamOperatorSpec) registeredOpSpec).getTransformFn();
assertNotNull(transformFn);
assertEquals(OpCode.MAP, registeredOpSpec.getOpCode());
TestOutputMessageEnvelope mockOutput = mock(TestOutputMessageEnvelope.class);
when(mockMapFn.apply(anyObject())).thenReturn(mockOutput);
assertTrue(transformFn.apply(new Object()).contains(mockOutput));
when(mockMapFn.apply(anyObject())).thenReturn(null);
assertTrue(transformFn.apply(null).isEmpty());
}
Aggregations