use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.
the class TestJobGraphJsonGenerator method testTaskApplication.
@Test
public void testTaskApplication() throws Exception {
JobGraphJsonGenerator jsonGenerator = new JobGraphJsonGenerator();
JobGraph mockJobGraph = mock(JobGraph.class);
ApplicationConfig mockAppConfig = mock(ApplicationConfig.class);
when(mockAppConfig.getAppName()).thenReturn("testTaskApp");
when(mockAppConfig.getAppId()).thenReturn("testTaskAppId");
when(mockJobGraph.getApplicationConfig()).thenReturn(mockAppConfig);
// compute the three disjoint sets of the JobGraph: input only, output only, and intermediate streams
Set<StreamEdge> inEdges = new HashSet<>(mockJobNode.getInEdges().values());
Set<StreamEdge> outEdges = new HashSet<>(mockJobNode.getOutEdges().values());
Set<StreamEdge> intermediateEdges = new HashSet<>(inEdges);
// intermediate streams are the intersection between input and output
intermediateEdges.retainAll(outEdges);
// remove all intermediate streams from input
inEdges.removeAll(intermediateEdges);
// remove all intermediate streams from output
outEdges.removeAll(intermediateEdges);
// set the return values for mockJobGraph
when(mockJobGraph.getInputStreams()).thenReturn(inEdges);
when(mockJobGraph.getOutputStreams()).thenReturn(outEdges);
when(mockJobGraph.getIntermediateStreamEdges()).thenReturn(intermediateEdges);
Set<TableDescriptor> tables = new HashSet<>(mockJobNode.getTables().values());
when(mockJobGraph.getTables()).thenReturn(tables);
when(mockJobGraph.getJobNodes()).thenReturn(Collections.singletonList(mockJobNode));
String graphJson = jsonGenerator.toJson(mockJobGraph);
ObjectMapper objectMapper = new ObjectMapper();
JobGraphJsonGenerator.JobGraphJson jsonObject = objectMapper.readValue(graphJson.getBytes(), JobGraphJsonGenerator.JobGraphJson.class);
assertEquals("testTaskAppId", jsonObject.applicationId);
assertEquals("testTaskApp", jsonObject.applicationName);
Set<String> inStreamIds = inEdges.stream().map(stream -> stream.getStreamSpec().getId()).collect(Collectors.toSet());
assertThat(jsonObject.sourceStreams.keySet(), Matchers.containsInAnyOrder(inStreamIds.toArray()));
Set<String> outStreamIds = outEdges.stream().map(stream -> stream.getStreamSpec().getId()).collect(Collectors.toSet());
assertThat(jsonObject.sinkStreams.keySet(), Matchers.containsInAnyOrder(outStreamIds.toArray()));
Set<String> intStreamIds = intermediateEdges.stream().map(stream -> stream.getStreamSpec().getId()).collect(Collectors.toSet());
assertThat(jsonObject.intermediateStreams.keySet(), Matchers.containsInAnyOrder(intStreamIds.toArray()));
Set<String> tableIds = tables.stream().map(t -> t.getTableId()).collect(Collectors.toSet());
assertThat(jsonObject.tables.keySet(), Matchers.containsInAnyOrder(tableIds.toArray()));
JobGraphJsonGenerator.JobNodeJson expectedNodeJson = new JobGraphJsonGenerator.JobNodeJson();
expectedNodeJson.jobId = mockJobNode.getJobId();
expectedNodeJson.jobName = mockJobNode.getJobName();
assertEquals(1, jsonObject.jobs.size());
JobGraphJsonGenerator.JobNodeJson actualNodeJson = jsonObject.jobs.get(0);
assertEquals(expectedNodeJson.jobId, actualNodeJson.jobId);
assertEquals(expectedNodeJson.jobName, actualNodeJson.jobName);
assertEquals(3, actualNodeJson.operatorGraph.inputStreams.size());
assertEquals(2, actualNodeJson.operatorGraph.outputStreams.size());
assertEquals(0, actualNodeJson.operatorGraph.operators.size());
}
use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.
the class TestJobGraphJsonGenerator method setUp.
@Before
public void setUp() {
input1Spec = new StreamSpec("input1", "input1", "input-system");
input2Spec = new StreamSpec("input2", "input2", "input-system");
outputSpec = new StreamSpec("output", "output", "output-system");
repartitionSpec = new StreamSpec("jobName-jobId-partition_by-p1", "partition_by-p1", "intermediate-system");
defaultSerde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>());
inputSystemDescriptor = new GenericSystemDescriptor("input-system", "mockSystemFactoryClassName");
outputSystemDescriptor = new GenericSystemDescriptor("output-system", "mockSystemFactoryClassName");
intermediateSystemDescriptor = new GenericSystemDescriptor("intermediate-system", "mockSystemFactoryClassName");
input1Descriptor = inputSystemDescriptor.getInputDescriptor("input1", defaultSerde);
input2Descriptor = inputSystemDescriptor.getInputDescriptor("input2", defaultSerde);
outputDescriptor = outputSystemDescriptor.getOutputDescriptor("output", defaultSerde);
table1Descriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table1", defaultSerde);
table2Descriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table2", defaultSerde);
Map<String, String> configs = new HashMap<>();
configs.put(JobConfig.JOB_NAME, "jobName");
configs.put(JobConfig.JOB_ID, "jobId");
mockConfig = spy(new MapConfig(configs));
mockJobNode = mock(JobNode.class);
StreamEdge input1Edge = new StreamEdge(input1Spec, false, false, mockConfig);
StreamEdge input2Edge = new StreamEdge(input2Spec, false, false, mockConfig);
StreamEdge outputEdge = new StreamEdge(outputSpec, false, false, mockConfig);
StreamEdge repartitionEdge = new StreamEdge(repartitionSpec, true, false, mockConfig);
Map<String, StreamEdge> inputEdges = new HashMap<>();
inputEdges.put(input1Descriptor.getStreamId(), input1Edge);
inputEdges.put(input2Descriptor.getStreamId(), input2Edge);
inputEdges.put(repartitionSpec.getId(), repartitionEdge);
Map<String, StreamEdge> outputEdges = new HashMap<>();
outputEdges.put(outputDescriptor.getStreamId(), outputEdge);
outputEdges.put(repartitionSpec.getId(), repartitionEdge);
when(mockJobNode.getInEdges()).thenReturn(inputEdges);
when(mockJobNode.getOutEdges()).thenReturn(outputEdges);
when(mockJobNode.getConfig()).thenReturn(mockConfig);
when(mockJobNode.getJobName()).thenReturn("jobName");
when(mockJobNode.getJobId()).thenReturn("jobId");
when(mockJobNode.getJobNameAndId()).thenReturn(JobNode.createJobNameAndId("jobName", "jobId"));
Map<String, TableDescriptor> tables = new HashMap<>();
tables.put(table1Descriptor.getTableId(), table1Descriptor);
tables.put(table2Descriptor.getTableId(), table2Descriptor);
when(mockJobNode.getTables()).thenReturn(tables);
}
use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.
the class TestExecutionPlanner method createStreamGraphWithInvalidStreamTableJoinWithSideInputs.
private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamTableJoinWithSideInputs() {
/**
* Example stream-table join that is invalid due to disagreement in partition count between the
* stream behind table t and another joined stream. Table t is configured with input2 (16) as
* side-input stream.
*
* join-table t -> output1 (8)
* |
* input1 (64) —————————
*/
return new StreamApplicationDescriptorImpl(appDesc -> {
MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table-id", new KVSerde(new StringSerde(), new StringSerde())).withSideInputs(Arrays.asList("input2")).withSideInputsProcessor(mock(SideInputsProcessor.class));
Table table = appDesc.getTable(tableDescriptor);
messageStream1.join(table, mock(StreamTableJoinFunction.class)).sendTo(output1);
}, config);
}
use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.
the class ExecutionPlanner method createJobGraph.
/**
* Creates the physical graph from {@link ApplicationDescriptorImpl}
*/
/* package private */
JobGraph createJobGraph(ApplicationDescriptorImpl<? extends ApplicationDescriptor> appDesc) {
JobGraph jobGraph = new JobGraph(config, appDesc);
// Source streams contain both input and intermediate streams.
Set<StreamSpec> sourceStreams = getStreamSpecs(appDesc.getInputStreamIds(), streamConfig);
// Sink streams contain both output and intermediate streams.
Set<StreamSpec> sinkStreams = getStreamSpecs(appDesc.getOutputStreamIds(), streamConfig);
Set<StreamSpec> intermediateStreams = Sets.intersection(sourceStreams, sinkStreams);
Set<StreamSpec> inputStreams = Sets.difference(sourceStreams, intermediateStreams);
Set<StreamSpec> outputStreams = Sets.difference(sinkStreams, intermediateStreams);
Set<TableDescriptor> tables = appDesc.getTableDescriptors();
// Generate job.id and job.name configs from app.id and app.name if defined
MapConfig generatedJobConfigs = JobPlanner.generateSingleJobConfig(config);
String jobName = generatedJobConfigs.get(JobConfig.JOB_NAME);
String jobId = generatedJobConfigs.get(JobConfig.JOB_ID, "1");
// For this phase, we have a single job node for the whole DAG
JobNode node = jobGraph.getOrCreateJobNode(jobName, jobId);
// Add input streams
inputStreams.forEach(spec -> jobGraph.addInputStream(spec, node));
// Add output streams
outputStreams.forEach(spec -> jobGraph.addOutputStream(spec, node));
// Add intermediate streams
intermediateStreams.forEach(spec -> jobGraph.addIntermediateStream(spec, node, node));
// Add tables
for (TableDescriptor table : tables) {
jobGraph.addTable(table, node);
// Add side-input streams (if any)
if (table instanceof LocalTableDescriptor) {
LocalTableDescriptor localTable = (LocalTableDescriptor) table;
Iterable<String> sideInputs = ListUtils.emptyIfNull(localTable.getSideInputs());
for (String sideInput : sideInputs) {
jobGraph.addSideInputStream(getStreamSpec(sideInput, streamConfig));
}
}
}
if (!LegacyTaskApplication.class.isAssignableFrom(appDesc.getAppClass())) {
// skip the validation when input streamIds are empty. This is only possible for LegacyTaskApplication
jobGraph.validate();
}
return jobGraph;
}
use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.
the class QueryTranslator method sendToOutputStream.
private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
if (!tableDescriptor.isPresent()) {
KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
String systemName = sinkConfig.getSystemName();
DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
outputStream.sendTo(stm);
// Process system events only if the output is a stream.
if (sqlConfig.isProcessSystemEvents()) {
for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
MessageStream<KV<Object, Object>> systemEventStream = inputStream.filter(message -> message.getMetadata().isSystemMessage()).map(SamzaSqlInputMessage::getKeyAndMessageKV);
systemEventStream.sendTo(stm);
}
}
} else {
Table outputTable = appDesc.getTable(tableDescriptor.get());
if (outputTable == null) {
String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
throw new SamzaException(msg);
}
outputStream.sendTo(outputTable);
}
}
Aggregations