Search in sources :

Example 1 with PageView

use of org.apache.samza.test.table.TestTableData.PageView in project samza by apache.

the class TestLocalTableWithSideInputsEndToEnd method buildExpectedEnrichedPageViews.

private static List<EnrichedPageView> buildExpectedEnrichedPageViews(Map<Integer, List<PageView>> pageViews, Map<Integer, List<Profile>> profiles) {
    ImmutableMap.Builder<Integer, Profile> profilesByMemberIdBuilder = new ImmutableMap.Builder<>();
    profiles.values().stream().flatMap(List::stream).forEach(profile -> profilesByMemberIdBuilder.put(profile.getMemberId(), profile));
    Map<Integer, Profile> profilesByMemberId = profilesByMemberIdBuilder.build();
    ImmutableList.Builder<EnrichedPageView> enrichedPageViewsBuilder = new ImmutableList.Builder<>();
    pageViews.values().stream().flatMap(List::stream).forEach(pageView -> Optional.ofNullable(profilesByMemberId.get(pageView.getMemberId())).ifPresent(profile -> enrichedPageViewsBuilder.add(new EnrichedPageView(pageView.getPageKey(), profile.getMemberId(), profile.getCompany()))));
    return enrichedPageViewsBuilder.build();
}
Also used : RocksDbTableDescriptor(org.apache.samza.storage.kv.descriptors.RocksDbTableDescriptor) Arrays(java.util.Arrays) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) Profile(org.apache.samza.test.table.TestTableData.Profile) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) ImmutableList(com.google.common.collect.ImmutableList) InitableTask(org.apache.samza.task.InitableTask) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) Map(java.util.Map) StreamTask(org.apache.samza.task.StreamTask) SamzaApplication(org.apache.samza.application.SamzaApplication) ApplicationDescriptor(org.apache.samza.application.descriptors.ApplicationDescriptor) KV(org.apache.samza.operators.KV) IntegerSerde(org.apache.samza.serializers.IntegerSerde) NoOpSerde(org.apache.samza.serializers.NoOpSerde) ProfileJsonSerde(org.apache.samza.test.table.TestTableData.ProfileJsonSerde) InMemoryTableDescriptor(org.apache.samza.storage.kv.inmemory.descriptors.InMemoryTableDescriptor) Table(org.apache.samza.table.Table) StreamAssert(org.apache.samza.test.framework.StreamAssert) TaskApplicationDescriptor(org.apache.samza.application.descriptors.TaskApplicationDescriptor) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) InMemoryOutputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor) ImmutableMap(com.google.common.collect.ImmutableMap) TaskApplication(org.apache.samza.application.TaskApplication) StreamTaskFactory(org.apache.samza.task.StreamTaskFactory) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) Test(org.junit.Test) Collectors(java.util.stream.Collectors) TaskCoordinator(org.apache.samza.task.TaskCoordinator) Context(org.apache.samza.context.Context) TestRunner(org.apache.samza.test.framework.TestRunner) List(java.util.List) Entry(org.apache.samza.storage.kv.Entry) ReadWriteUpdateTable(org.apache.samza.table.ReadWriteUpdateTable) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) Optional(java.util.Optional) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) KVSerde(org.apache.samza.serializers.KVSerde) PageView(org.apache.samza.test.table.TestTableData.PageView) StreamApplication(org.apache.samza.application.StreamApplication) ImmutableList(com.google.common.collect.ImmutableList) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) ImmutableMap(com.google.common.collect.ImmutableMap) Profile(org.apache.samza.test.table.TestTableData.Profile)

Example 2 with PageView

use of org.apache.samza.test.table.TestTableData.PageView in project samza by apache.

the class TestLocalTableWithSideInputsEndToEnd method runTest.

private <T extends ApplicationDescriptor<?>> void runTest(SamzaApplication<T> app, Map<Integer, List<PageView>> pageViews, Map<Integer, List<Profile>> profiles) throws InterruptedException {
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor(SYSTEM_NAME);
    InMemoryInputDescriptor<PageView> pageViewStreamDesc = isd.getInputDescriptor(PAGEVIEW_STREAM, new NoOpSerde<>());
    InMemoryInputDescriptor<Profile> profileStreamDesc = isd.getInputDescriptor(PROFILE_STREAM, new NoOpSerde<>());
    InMemoryOutputDescriptor<EnrichedPageView> outputStreamDesc = isd.getOutputDescriptor(ENRICHED_PAGEVIEW_STREAM, new NoOpSerde<>());
    TestRunner.of(app).addInputStream(pageViewStreamDesc, pageViews).addInputStream(profileStreamDesc, profiles).addOutputStream(outputStreamDesc, 1).run(Duration.ofSeconds(10));
    List<EnrichedPageView> expectedEnrichedPageViews = buildExpectedEnrichedPageViews(pageViews, profiles);
    StreamAssert.containsInAnyOrder(expectedEnrichedPageViews, outputStreamDesc, Duration.ofSeconds(1));
}
Also used : EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) PageView(org.apache.samza.test.table.TestTableData.PageView) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) Profile(org.apache.samza.test.table.TestTableData.Profile)

Example 3 with PageView

use of org.apache.samza.test.table.TestTableData.PageView in project samza by apache.

the class TestLocalTableWithSideInputsEndToEnd method testLowLevelJoinWithSideInputsTable.

@Test
public void testLowLevelJoinWithSideInputsTable() throws InterruptedException {
    int partitionCount = 4;
    IntegerSerde integerSerde = new IntegerSerde();
    // for low-level, need to pre-partition the input in the same way that the profiles are partitioned
    Map<Integer, List<PageView>> pageViewsPartitionedByMemberId = TestTableData.generatePartitionedPageViews(20, partitionCount).values().stream().flatMap(List::stream).collect(Collectors.groupingBy(pageView -> Math.abs(Arrays.hashCode(integerSerde.toBytes(pageView.getMemberId()))) % partitionCount));
    runTest(new LowLevelPageViewProfileJoin(), pageViewsPartitionedByMemberId, TestTableData.generatePartitionedProfiles(10, partitionCount));
}
Also used : RocksDbTableDescriptor(org.apache.samza.storage.kv.descriptors.RocksDbTableDescriptor) Arrays(java.util.Arrays) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) Profile(org.apache.samza.test.table.TestTableData.Profile) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) ImmutableList(com.google.common.collect.ImmutableList) InitableTask(org.apache.samza.task.InitableTask) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) Map(java.util.Map) StreamTask(org.apache.samza.task.StreamTask) SamzaApplication(org.apache.samza.application.SamzaApplication) ApplicationDescriptor(org.apache.samza.application.descriptors.ApplicationDescriptor) KV(org.apache.samza.operators.KV) IntegerSerde(org.apache.samza.serializers.IntegerSerde) NoOpSerde(org.apache.samza.serializers.NoOpSerde) ProfileJsonSerde(org.apache.samza.test.table.TestTableData.ProfileJsonSerde) InMemoryTableDescriptor(org.apache.samza.storage.kv.inmemory.descriptors.InMemoryTableDescriptor) Table(org.apache.samza.table.Table) StreamAssert(org.apache.samza.test.framework.StreamAssert) TaskApplicationDescriptor(org.apache.samza.application.descriptors.TaskApplicationDescriptor) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) InMemoryOutputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor) ImmutableMap(com.google.common.collect.ImmutableMap) TaskApplication(org.apache.samza.application.TaskApplication) StreamTaskFactory(org.apache.samza.task.StreamTaskFactory) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) Test(org.junit.Test) Collectors(java.util.stream.Collectors) TaskCoordinator(org.apache.samza.task.TaskCoordinator) Context(org.apache.samza.context.Context) TestRunner(org.apache.samza.test.framework.TestRunner) List(java.util.List) Entry(org.apache.samza.storage.kv.Entry) ReadWriteUpdateTable(org.apache.samza.table.ReadWriteUpdateTable) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) Optional(java.util.Optional) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) KVSerde(org.apache.samza.serializers.KVSerde) PageView(org.apache.samza.test.table.TestTableData.PageView) StreamApplication(org.apache.samza.application.StreamApplication) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) IntegerSerde(org.apache.samza.serializers.IntegerSerde) Test(org.junit.Test)

Example 4 with PageView

use of org.apache.samza.test.table.TestTableData.PageView in project samza by apache.

the class TestContext method testStatefulTaskWithLocalTable.

@Test
public void testStatefulTaskWithLocalTable() {
    List<PageView> pageViews = Arrays.asList(TestTableData.generatePageViews(10));
    List<Profile> profiles = Arrays.asList(TestTableData.generateProfiles(10));
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<TestTableData.PageView> pageViewStreamDesc = isd.getInputDescriptor("PageView", new NoOpSerde<TestTableData.PageView>());
    InMemoryInputDescriptor<TestTableData.Profile> profileStreamDesc = isd.getInputDescriptor("Profile", new NoOpSerde<TestTableData.Profile>()).shouldBootstrap();
    InMemoryOutputDescriptor<TestTableData.EnrichedPageView> outputStreamDesc = isd.getOutputDescriptor("EnrichedPageView", new NoOpSerde<>());
    TestRunner.of(new JoinTaskApplication()).addInputStream(pageViewStreamDesc, pageViews).addInputStream(profileStreamDesc, profiles).addOutputStream(outputStreamDesc, 1).run(Duration.ofSeconds(2));
    Assert.assertEquals(10, TestRunner.consumeStream(outputStreamDesc, Duration.ofSeconds(1)).get(0).size());
}
Also used : EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) PageView(org.apache.samza.test.table.TestTableData.PageView) Profile(org.apache.samza.test.table.TestTableData.Profile) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) NoOpSerde(org.apache.samza.serializers.NoOpSerde) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) TestTableData(org.apache.samza.test.table.TestTableData) Test(org.junit.Test)

Example 5 with PageView

use of org.apache.samza.test.table.TestTableData.PageView in project samza by apache.

the class TestRemoteTableEndToEnd method testSendToUpdatesWithoutUpdateOptions.

// Test will fail as we use sendTo with KV<K, UpdateMessage> stream without UpdateOptions
@Test(expected = SamzaException.class)
public void testSendToUpdatesWithoutUpdateOptions() throws Exception {
    // max member id for page views is 10
    final String profiles = Base64Serializer.serialize(generateProfiles(10));
    final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
    final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
    final StreamApplication app = appDesc -> {
        final RemoteTableDescriptor joinTableDesc = new RemoteTableDescriptor<Integer, TestTableData.Profile, Void>("profile-table-1").withReadFunction(InMemoryProfileReadFunction.getInMemoryReadFunction(profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
        final RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView>("enriched-page-view-table-1").withReadFunction(new NoOpTableReadFunction<>()).withReadRateLimiterDisabled().withWriteFunction(new InMemoryEnrichedPageViewWriteFunction2("testUpdateWithoutUpdateOptions", false)).withWriteRateLimit(1000);
        final Table<KV<Integer, Profile>> outputTable = appDesc.getTable(outputTableDesc);
        final Table<KV<Integer, Profile>> joinTable = appDesc.getTable(joinTableDesc);
        final DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
        final GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
        appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(outputTable);
    };
    int numPageViews = 40;
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
    TestRunner.of(app).addInputStream(inputDescriptor, TestTableData.generatePartitionedPageViews(numPageViews, 4)).run(Duration.ofSeconds(10));
}
Also used : GuavaCacheTableDescriptor(org.apache.samza.table.descriptors.GuavaCacheTableDescriptor) Arrays(java.util.Arrays) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) ObjectInputStream(java.io.ObjectInputStream) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) RemoteTable(org.apache.samza.table.remote.RemoteTable) CachingTableDescriptor(org.apache.samza.table.descriptors.CachingTableDescriptor) Counter(org.apache.samza.metrics.Counter) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) Map(java.util.Map) NoOpTableReadFunction(org.apache.samza.table.remote.NoOpTableReadFunction) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Table(org.apache.samza.table.Table) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) Context(org.apache.samza.context.Context) RecordNotFoundException(org.apache.samza.table.RecordNotFoundException) TestRunner(org.apache.samza.test.framework.TestRunner) Matchers.any(org.mockito.Matchers.any) List(java.util.List) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) TestTableData.generateProfiles(org.apache.samza.test.table.TestTableData.generateProfiles) CacheBuilder(com.google.common.cache.CacheBuilder) PageView(org.apache.samza.test.table.TestTableData.PageView) StreamApplication(org.apache.samza.application.StreamApplication) UpdateMessage(org.apache.samza.operators.UpdateMessage) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Profile(org.apache.samza.test.table.TestTableData.Profile) Function(java.util.function.Function) TableReadFunction(org.apache.samza.table.remote.TableReadFunction) Matchers.anyString(org.mockito.Matchers.anyString) ArrayList(java.util.ArrayList) Base64Serializer(org.apache.samza.test.util.Base64Serializer) MockContext(org.apache.samza.context.MockContext) TableWriteFunction(org.apache.samza.table.remote.TableWriteFunction) ExpectedException(org.junit.rules.ExpectedException) Timer(org.apache.samza.metrics.Timer) BaseTableFunction(org.apache.samza.table.remote.BaseTableFunction) IOException(java.io.IOException) Test(org.junit.Test) SamzaException(org.apache.samza.SamzaException) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Rule(org.junit.Rule) RateLimiter(org.apache.samza.util.RateLimiter) UpdateOptions(org.apache.samza.operators.UpdateOptions) Assert(org.junit.Assert) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) Matchers.anyString(org.mockito.Matchers.anyString) Profile(org.apache.samza.test.table.TestTableData.Profile) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) PageView(org.apache.samza.test.table.TestTableData.PageView) RemoteTable(org.apache.samza.table.remote.RemoteTable) Table(org.apache.samza.table.Table) StreamApplication(org.apache.samza.application.StreamApplication) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) KV(org.apache.samza.operators.KV) RateLimiter(org.apache.samza.util.RateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) NoOpSerde(org.apache.samza.serializers.NoOpSerde) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) Test(org.junit.Test)

Aggregations

InMemorySystemDescriptor (org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor)12 PageView (org.apache.samza.test.table.TestTableData.PageView)12 EnrichedPageView (org.apache.samza.test.table.TestTableData.EnrichedPageView)11 Profile (org.apache.samza.test.table.TestTableData.Profile)11 Test (org.junit.Test)11 List (java.util.List)10 NoOpSerde (org.apache.samza.serializers.NoOpSerde)9 Duration (java.time.Duration)8 ArrayList (java.util.ArrayList)8 StreamApplication (org.apache.samza.application.StreamApplication)8 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)8 KV (org.apache.samza.operators.KV)8 DelegatingSystemDescriptor (org.apache.samza.system.descriptors.DelegatingSystemDescriptor)8 TestRunner (org.apache.samza.test.framework.TestRunner)8 InMemoryInputDescriptor (org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor)8 Arrays (java.util.Arrays)7 Map (java.util.Map)7 Collectors (java.util.stream.Collectors)7 Context (org.apache.samza.context.Context)7 Table (org.apache.samza.table.Table)7