Search in sources :

Example 66 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TestExecutionPlanner method testMaxPartitionLimit.

@Test
public void testMaxPartitionLimit() {
    int partitionLimit = IntermediateStreamManager.MAX_INFERRED_PARTITIONS;
    ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        MessageStream<KV<Object, Object>> input1 = appDesc.getInputStream(input4Descriptor);
        OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
        input1.partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1").map(kv -> kv).sendTo(output1);
    }, config);
    JobGraph jobGraph = (JobGraph) planner.plan(graphSpec);
    // Partitions should be the same as input1
    jobGraph.getIntermediateStreams().forEach(edge -> {
        // max of input1 and output1
        assertEquals(partitionLimit, edge.getPartitionCount());
    });
}
Also used : Arrays(java.util.Arrays) TaskApplicationDescriptorImpl(org.apache.samza.application.descriptors.TaskApplicationDescriptorImpl) LegacyTaskApplication(org.apache.samza.application.LegacyTaskApplication) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) Duration(java.time.Duration) Map(java.util.Map) SamzaApplication(org.apache.samza.application.SamzaApplication) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Mockito.doReturn(org.mockito.Mockito.doReturn) OutputDescriptor(org.apache.samza.system.descriptors.OutputDescriptor) Table(org.apache.samza.table.Table) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) Collection(java.util.Collection) Set(java.util.Set) Collectors(java.util.stream.Collectors) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) SystemAdmins(org.apache.samza.system.SystemAdmins) Mockito.mock(org.mockito.Mockito.mock) SystemDescriptor(org.apache.samza.system.descriptors.SystemDescriptor) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) Serde(org.apache.samza.serializers.Serde) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) StreamConfig(org.apache.samza.config.StreamConfig) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) ApplicationDescriptor(org.apache.samza.application.descriptors.ApplicationDescriptor) MessageStream(org.apache.samza.operators.MessageStream) Before(org.junit.Before) InputDescriptor(org.apache.samza.system.descriptors.InputDescriptor) Windows(org.apache.samza.operators.windows.Windows) TaskConfig(org.apache.samza.config.TaskConfig) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) StreamSpec(org.apache.samza.system.StreamSpec) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) SideInputsProcessor(org.apache.samza.storage.SideInputsProcessor) SamzaException(org.apache.samza.SamzaException) SystemAdmin(org.apache.samza.system.SystemAdmin) Assert(org.junit.Assert) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) KVSerde(org.apache.samza.serializers.KVSerde) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) KV(org.apache.samza.operators.KV) Test(org.junit.Test)

Example 67 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TestExecutionPlanner method createStreamGraphWithStreamTableJoinAndSendToSameTable.

private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoinAndSendToSameTable() {
    /**
     * A special example of stream-table join where a stream is joined with a table, and the result is
     * sent to the same table. This example is necessary to ensure {@link ExecutionPlanner} does not
     * get stuck traversing the virtual cycle between stream-table-join and send-to-table operator specs
     * indefinitely.
     *
     * The reason such virtual cycle is present is to support computing partitions of intermediate
     * streams participating in stream-table joins. Please, refer to SAMZA SEP-16 for more details.
     */
    return new StreamApplicationDescriptorImpl(appDesc -> {
        MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
        TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table-id", new KVSerde(new StringSerde(), new StringSerde()));
        Table table = appDesc.getTable(tableDescriptor);
        messageStream1.join(table, mock(StreamTableJoinFunction.class)).sendTo(table);
    }, config);
}
Also used : KVSerde(org.apache.samza.serializers.KVSerde) StringSerde(org.apache.samza.serializers.StringSerde) Table(org.apache.samza.table.Table) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) KV(org.apache.samza.operators.KV) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor)

Example 68 with KV

use of org.apache.samza.operators.KV in project samza by apache.

the class TestJobNodeConfigurationGenerator method testTaskApplicationWithTableAndSideInput.

@Test
public void testTaskApplicationWithTableAndSideInput() {
    // add table to the RepartitionJoinStreamApplication
    GenericInputDescriptor<KV<String, Object>> sideInput1 = inputSystemDescriptor.getInputDescriptor("sideInput1", defaultSerde);
    BaseTableDescriptor mockTableDescriptor = new MockLocalTableDescriptor("testTable", defaultSerde).withSideInputs(Arrays.asList(sideInput1.getStreamId())).withSideInputsProcessor(mock(SideInputsProcessor.class, withSettings().serializable())).withConfig("mock.table.provider.config", "mock.config.value");
    StreamEdge sideInputEdge = new StreamEdge(new StreamSpec(sideInput1.getStreamId(), "sideInput1", inputSystemDescriptor.getSystemName()), false, false, mockConfig);
    // need to put the sideInput related stream configuration to the original config
    // TODO: this is confusing since part of the system and stream related configuration is generated outside the JobGraphConfigureGenerator
    // It would be nice if all system and stream related configuration is generated in one place and only intermediate stream
    // configuration is generated by JobGraphConfigureGenerator
    Map<String, String> configs = new HashMap<>(mockConfig);
    configs.putAll(sideInputEdge.generateConfig());
    mockConfig = spy(new MapConfig(configs));
    // set the application to TaskApplication, which still wire up all input/output/intermediate streams
    TaskApplicationDescriptorImpl taskAppDesc = new TaskApplicationDescriptorImpl(getTaskApplication(), mockConfig);
    // add table to the task application
    taskAppDesc.withTable(mockTableDescriptor);
    taskAppDesc.withInputStream(inputSystemDescriptor.getInputDescriptor("sideInput1", defaultSerde));
    configureJobNode(taskAppDesc);
    // create the JobGraphConfigureGenerator and generate the jobConfig for the jobNode
    JobNodeConfigurationGenerator configureGenerator = new JobNodeConfigurationGenerator();
    JobConfig jobConfig = configureGenerator.generateJobConfig(mockJobNode, "testJobGraphJson");
    // Verify the results
    Config expectedJobConfig = getExpectedJobConfig(mockConfig, mockJobNode.getInEdges());
    validateJobConfig(expectedJobConfig, jobConfig);
    Map<String, Serde> deserializedSerdes = validateAndGetDeserializedSerdes(jobConfig, 2);
    validateStreamConfigures(jobConfig, deserializedSerdes);
    validateTableConfigure(jobConfig, deserializedSerdes, mockTableDescriptor);
}
Also used : Serde(org.apache.samza.serializers.Serde) StringSerde(org.apache.samza.serializers.StringSerde) SerializableSerde(org.apache.samza.serializers.SerializableSerde) TimestampedValueSerde(org.apache.samza.operators.impl.store.TimestampedValueSerde) StreamSpec(org.apache.samza.system.StreamSpec) HashMap(java.util.HashMap) JobConfig(org.apache.samza.config.JobConfig) MapConfig(org.apache.samza.config.MapConfig) SerializerConfig(org.apache.samza.config.SerializerConfig) TaskConfig(org.apache.samza.config.TaskConfig) Config(org.apache.samza.config.Config) TaskApplicationDescriptorImpl(org.apache.samza.application.descriptors.TaskApplicationDescriptorImpl) KV(org.apache.samza.operators.KV) MockLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor.MockLocalTableDescriptor) BaseTableDescriptor(org.apache.samza.table.descriptors.BaseTableDescriptor) JobConfig(org.apache.samza.config.JobConfig) MapConfig(org.apache.samza.config.MapConfig) Test(org.junit.Test)

Aggregations

KV (org.apache.samza.operators.KV)68 Test (org.junit.Test)38 StringSerde (org.apache.samza.serializers.StringSerde)33 KVSerde (org.apache.samza.serializers.KVSerde)30 HashMap (java.util.HashMap)28 NoOpSerde (org.apache.samza.serializers.NoOpSerde)26 List (java.util.List)25 Duration (java.time.Duration)24 ArrayList (java.util.ArrayList)24 StreamApplication (org.apache.samza.application.StreamApplication)22 Config (org.apache.samza.config.Config)22 Map (java.util.Map)20 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)20 Table (org.apache.samza.table.Table)19 MapConfig (org.apache.samza.config.MapConfig)18 MessageStream (org.apache.samza.operators.MessageStream)18 GenericInputDescriptor (org.apache.samza.system.descriptors.GenericInputDescriptor)18 InMemorySystemDescriptor (org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor)17 Collectors (java.util.stream.Collectors)16 SamzaException (org.apache.samza.SamzaException)16