Search in sources :

Example 16 with Table

use of org.apache.samza.table.Table in project samza by apache.

the class TestLocalTableEndToEnd method testSendTo.

@Test
public void testSendTo() {
    MyMapFunction mapFn = new MyMapFunction();
    StreamApplication app = appDesc -> {
        Table<KV<Integer, Profile>> table = appDesc.getTable(new InMemoryTableDescriptor<>("t1", KVSerde.of(new IntegerSerde(), new ProfileJsonSerde())));
        DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor(SYSTEM_NAME);
        GenericInputDescriptor<Profile> isd = ksd.getInputDescriptor(PROFILE_STREAM, new NoOpSerde<>());
        appDesc.getInputStream(isd).map(mapFn).sendTo(table);
    };
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor(SYSTEM_NAME);
    InMemoryInputDescriptor<Profile> profileStreamDesc = isd.getInputDescriptor(PROFILE_STREAM, new NoOpSerde<>());
    int numProfilesPerPartition = 10;
    int numInputPartitions = 4;
    Map<Integer, List<Profile>> inputProfiles = TestTableData.generatePartitionedProfiles(numProfilesPerPartition * numInputPartitions, numInputPartitions);
    TestRunner.of(app).addInputStream(profileStreamDesc, inputProfiles).run(Duration.ofSeconds(10));
    for (int i = 0; i < numInputPartitions; i++) {
        MyMapFunction mapFnCopy = MyMapFunction.getMapFunctionByTask(String.format("Partition %d", i));
        assertEquals(numProfilesPerPartition, mapFnCopy.received.size());
        mapFnCopy.received.forEach(p -> assertNotNull(mapFnCopy.table.get(p.getMemberId())));
    }
}
Also used : PageViewJsonSerdeFactory(org.apache.samza.test.table.TestTableData.PageViewJsonSerdeFactory) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) SingleContainerGrouperFactory(org.apache.samza.container.grouper.task.SingleContainerGrouperFactory) Profile(org.apache.samza.test.table.TestTableData.Profile) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) ArrayList(java.util.ArrayList) MapFunction(org.apache.samza.operators.functions.MapFunction) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) Map(java.util.Map) ArraySystemFactory(org.apache.samza.test.util.ArraySystemFactory) LinkedList(java.util.LinkedList) KV(org.apache.samza.operators.KV) IntegerSerde(org.apache.samza.serializers.IntegerSerde) NoOpSerde(org.apache.samza.serializers.NoOpSerde) ProfileJsonSerde(org.apache.samza.test.table.TestTableData.ProfileJsonSerde) InMemoryTableDescriptor(org.apache.samza.storage.kv.inmemory.descriptors.InMemoryTableDescriptor) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) TaskConfig(org.apache.samza.config.TaskConfig) Assert.assertNotNull(org.junit.Assert.assertNotNull) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) Test(org.junit.Test) PageViewJsonSerde(org.apache.samza.test.table.TestTableData.PageViewJsonSerde) PassthroughJobCoordinatorFactory(org.apache.samza.standalone.PassthroughJobCoordinatorFactory) Context(org.apache.samza.context.Context) TestRunner(org.apache.samza.test.framework.TestRunner) List(java.util.List) ReadWriteUpdateTable(org.apache.samza.table.ReadWriteUpdateTable) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) JobCoordinatorConfig(org.apache.samza.config.JobCoordinatorConfig) KVSerde(org.apache.samza.serializers.KVSerde) PageView(org.apache.samza.test.table.TestTableData.PageView) StreamApplication(org.apache.samza.application.StreamApplication) Assert.assertEquals(org.junit.Assert.assertEquals) Table(org.apache.samza.table.Table) ReadWriteUpdateTable(org.apache.samza.table.ReadWriteUpdateTable) ProfileJsonSerde(org.apache.samza.test.table.TestTableData.ProfileJsonSerde) StreamApplication(org.apache.samza.application.StreamApplication) Profile(org.apache.samza.test.table.TestTableData.Profile) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) IntegerSerde(org.apache.samza.serializers.IntegerSerde) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) NoOpSerde(org.apache.samza.serializers.NoOpSerde) InMemoryTableDescriptor(org.apache.samza.storage.kv.inmemory.descriptors.InMemoryTableDescriptor) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) Test(org.junit.Test)

Example 17 with Table

use of org.apache.samza.table.Table in project samza by apache.

the class TestExecutionPlanner method createStreamGraphWithStreamTableJoin.

private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoin() {
    /**
     * Example stream-table join app. Expected partition counts of intermediate streams introduced
     * by partitionBy operations are enclosed in quotes.
     *
     *    input2 (16) -> partitionBy ("32") -> send-to-table t
     *
     *                                      join-table t —————
     *                                       |                |
     *    input1 (64) -> partitionBy ("32") _|                |
     *                                                       join -> output1 (8)
     *                                                        |
     *                                      input3 (32) ——————
     */
    return new StreamApplicationDescriptorImpl(appDesc -> {
        MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
        MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor);
        MessageStream<KV<Object, Object>> messageStream3 = appDesc.getInputStream(input3Descriptor);
        OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
        TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table-id", new KVSerde(new StringSerde(), new StringSerde()));
        Table table = appDesc.getTable(tableDescriptor);
        messageStream2.partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1").sendTo(table);
        messageStream1.partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p2").join(table, mock(StreamTableJoinFunction.class)).join(messageStream3, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2").sendTo(output1);
    }, config);
}
Also used : Arrays(java.util.Arrays) TaskApplicationDescriptorImpl(org.apache.samza.application.descriptors.TaskApplicationDescriptorImpl) LegacyTaskApplication(org.apache.samza.application.LegacyTaskApplication) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) Duration(java.time.Duration) Map(java.util.Map) SamzaApplication(org.apache.samza.application.SamzaApplication) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Mockito.doReturn(org.mockito.Mockito.doReturn) OutputDescriptor(org.apache.samza.system.descriptors.OutputDescriptor) Table(org.apache.samza.table.Table) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) Collection(java.util.Collection) Set(java.util.Set) Collectors(java.util.stream.Collectors) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) SystemAdmins(org.apache.samza.system.SystemAdmins) Mockito.mock(org.mockito.Mockito.mock) SystemDescriptor(org.apache.samza.system.descriptors.SystemDescriptor) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) Serde(org.apache.samza.serializers.Serde) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) StreamConfig(org.apache.samza.config.StreamConfig) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) ApplicationDescriptor(org.apache.samza.application.descriptors.ApplicationDescriptor) MessageStream(org.apache.samza.operators.MessageStream) Before(org.junit.Before) InputDescriptor(org.apache.samza.system.descriptors.InputDescriptor) Windows(org.apache.samza.operators.windows.Windows) TaskConfig(org.apache.samza.config.TaskConfig) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) StreamSpec(org.apache.samza.system.StreamSpec) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) SideInputsProcessor(org.apache.samza.storage.SideInputsProcessor) SamzaException(org.apache.samza.SamzaException) SystemAdmin(org.apache.samza.system.SystemAdmin) Assert(org.junit.Assert) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) KVSerde(org.apache.samza.serializers.KVSerde) StringSerde(org.apache.samza.serializers.StringSerde) Table(org.apache.samza.table.Table) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) KV(org.apache.samza.operators.KV) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor)

Example 18 with Table

use of org.apache.samza.table.Table in project samza by apache.

the class TestExecutionPlanner method createStreamGraphWithInvalidStreamTableJoin.

private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamTableJoin() {
    /**
     * Example stream-table join that is invalid due to disagreement in partition count
     * between the 2 input streams.
     *
     *    input1 (64) -> send-to-table t
     *
     *                   join-table t -> output1 (8)
     *                         |
     *    input2 (16) —————————
     */
    return new StreamApplicationDescriptorImpl(appDesc -> {
        MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
        MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor);
        OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
        TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table-id", new KVSerde(new StringSerde(), new StringSerde()));
        Table table = appDesc.getTable(tableDescriptor);
        messageStream1.sendTo(table);
        messageStream1.join(table, mock(StreamTableJoinFunction.class)).join(messageStream2, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2").sendTo(output1);
    }, config);
}
Also used : KVSerde(org.apache.samza.serializers.KVSerde) StringSerde(org.apache.samza.serializers.StringSerde) Table(org.apache.samza.table.Table) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) KV(org.apache.samza.operators.KV) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor)

Example 19 with Table

use of org.apache.samza.table.Table in project samza by apache.

the class TestExecutionPlanner method createStreamGraphWithStreamTableJoinAndSendToSameTable.

private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoinAndSendToSameTable() {
    /**
     * A special example of stream-table join where a stream is joined with a table, and the result is
     * sent to the same table. This example is necessary to ensure {@link ExecutionPlanner} does not
     * get stuck traversing the virtual cycle between stream-table-join and send-to-table operator specs
     * indefinitely.
     *
     * The reason such virtual cycle is present is to support computing partitions of intermediate
     * streams participating in stream-table joins. Please, refer to SAMZA SEP-16 for more details.
     */
    return new StreamApplicationDescriptorImpl(appDesc -> {
        MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
        TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table-id", new KVSerde(new StringSerde(), new StringSerde()));
        Table table = appDesc.getTable(tableDescriptor);
        messageStream1.join(table, mock(StreamTableJoinFunction.class)).sendTo(table);
    }, config);
}
Also used : KVSerde(org.apache.samza.serializers.KVSerde) StringSerde(org.apache.samza.serializers.StringSerde) Table(org.apache.samza.table.Table) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) KV(org.apache.samza.operators.KV) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor)

Aggregations

Table (org.apache.samza.table.Table)19 KV (org.apache.samza.operators.KV)17 Map (java.util.Map)13 TableDescriptor (org.apache.samza.table.descriptors.TableDescriptor)12 HashMap (java.util.HashMap)11 SamzaException (org.apache.samza.SamzaException)11 NoOpSerde (org.apache.samza.serializers.NoOpSerde)11 Test (org.junit.Test)11 Duration (java.time.Duration)10 ArrayList (java.util.ArrayList)10 List (java.util.List)10 Collectors (java.util.stream.Collectors)10 DelegatingSystemDescriptor (org.apache.samza.system.descriptors.DelegatingSystemDescriptor)10 GenericInputDescriptor (org.apache.samza.system.descriptors.GenericInputDescriptor)10 Assert (org.junit.Assert)10 Arrays (java.util.Arrays)9 StreamApplication (org.apache.samza.application.StreamApplication)9 TestRunner (org.apache.samza.test.framework.TestRunner)9 InMemoryInputDescriptor (org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor)9 InMemorySystemDescriptor (org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor)9