use of org.apache.samza.operators.KV in project samza by apache.
the class TestExecutionPlanner method testMaxPartitionLimit.
@Test
public void testMaxPartitionLimit() {
int partitionLimit = IntermediateStreamManager.MAX_INFERRED_PARTITIONS;
ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
MessageStream<KV<Object, Object>> input1 = appDesc.getInputStream(input4Descriptor);
OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
input1.partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1").map(kv -> kv).sendTo(output1);
}, config);
JobGraph jobGraph = (JobGraph) planner.plan(graphSpec);
// Partitions should be the same as input1
jobGraph.getIntermediateStreams().forEach(edge -> {
// max of input1 and output1
assertEquals(partitionLimit, edge.getPartitionCount());
});
}
use of org.apache.samza.operators.KV in project samza by apache.
the class TestExecutionPlanner method createStreamGraphWithStreamTableJoinAndSendToSameTable.
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoinAndSendToSameTable() {
/**
* A special example of stream-table join where a stream is joined with a table, and the result is
* sent to the same table. This example is necessary to ensure {@link ExecutionPlanner} does not
* get stuck traversing the virtual cycle between stream-table-join and send-to-table operator specs
* indefinitely.
*
* The reason such virtual cycle is present is to support computing partitions of intermediate
* streams participating in stream-table joins. Please, refer to SAMZA SEP-16 for more details.
*/
return new StreamApplicationDescriptorImpl(appDesc -> {
MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table-id", new KVSerde(new StringSerde(), new StringSerde()));
Table table = appDesc.getTable(tableDescriptor);
messageStream1.join(table, mock(StreamTableJoinFunction.class)).sendTo(table);
}, config);
}
use of org.apache.samza.operators.KV in project samza by apache.
the class TestJobNodeConfigurationGenerator method testTaskApplicationWithTableAndSideInput.
@Test
public void testTaskApplicationWithTableAndSideInput() {
// add table to the RepartitionJoinStreamApplication
GenericInputDescriptor<KV<String, Object>> sideInput1 = inputSystemDescriptor.getInputDescriptor("sideInput1", defaultSerde);
BaseTableDescriptor mockTableDescriptor = new MockLocalTableDescriptor("testTable", defaultSerde).withSideInputs(Arrays.asList(sideInput1.getStreamId())).withSideInputsProcessor(mock(SideInputsProcessor.class, withSettings().serializable())).withConfig("mock.table.provider.config", "mock.config.value");
StreamEdge sideInputEdge = new StreamEdge(new StreamSpec(sideInput1.getStreamId(), "sideInput1", inputSystemDescriptor.getSystemName()), false, false, mockConfig);
// need to put the sideInput related stream configuration to the original config
// TODO: this is confusing since part of the system and stream related configuration is generated outside the JobGraphConfigureGenerator
// It would be nice if all system and stream related configuration is generated in one place and only intermediate stream
// configuration is generated by JobGraphConfigureGenerator
Map<String, String> configs = new HashMap<>(mockConfig);
configs.putAll(sideInputEdge.generateConfig());
mockConfig = spy(new MapConfig(configs));
// set the application to TaskApplication, which still wire up all input/output/intermediate streams
TaskApplicationDescriptorImpl taskAppDesc = new TaskApplicationDescriptorImpl(getTaskApplication(), mockConfig);
// add table to the task application
taskAppDesc.withTable(mockTableDescriptor);
taskAppDesc.withInputStream(inputSystemDescriptor.getInputDescriptor("sideInput1", defaultSerde));
configureJobNode(taskAppDesc);
// create the JobGraphConfigureGenerator and generate the jobConfig for the jobNode
JobNodeConfigurationGenerator configureGenerator = new JobNodeConfigurationGenerator();
JobConfig jobConfig = configureGenerator.generateJobConfig(mockJobNode, "testJobGraphJson");
// Verify the results
Config expectedJobConfig = getExpectedJobConfig(mockConfig, mockJobNode.getInEdges());
validateJobConfig(expectedJobConfig, jobConfig);
Map<String, Serde> deserializedSerdes = validateAndGetDeserializedSerdes(jobConfig, 2);
validateStreamConfigures(jobConfig, deserializedSerdes);
validateTableConfigure(jobConfig, deserializedSerdes, mockTableDescriptor);
}
Aggregations