Search in sources :

Example 11 with Table

use of org.apache.samza.table.Table in project samza by apache.

the class JoinTranslator method getTable.

private Table getTable(JoinInputNode tableNode, TranslatorContext context) {
    SqlIOConfig sourceTableConfig = resolveSQlIOForTable(tableNode.getRelNode(), context.getExecutionContext().getSamzaSqlApplicationConfig().getInputSystemStreamConfigBySource());
    if (sourceTableConfig == null || !sourceTableConfig.getTableDescriptor().isPresent()) {
        String errMsg = "Failed to resolve table source in join operation: node=" + tableNode.getRelNode();
        log.error(errMsg);
        throw new SamzaException(errMsg);
    }
    Table<KV<SamzaSqlRelRecord, SamzaSqlRelMessage>> table = context.getStreamAppDescriptor().getTable(sourceTableConfig.getTableDescriptor().get());
    if (tableNode.isRemoteTable()) {
        return table;
    }
    // If local table, load the table.
    // Load the local table with the fields in the join condition as composite key and relational message as the value.
    // Send the messages from the input stream denoted as 'table' to the created table store.
    MessageStream<SamzaSqlRelMessage> relOutputStream = context.getMessageStream(tableNode.getRelNode().getId());
    SamzaSqlRelRecordSerdeFactory.SamzaSqlRelRecordSerde keySerde = (SamzaSqlRelRecordSerdeFactory.SamzaSqlRelRecordSerde) new SamzaSqlRelRecordSerdeFactory().getSerde(null, null);
    SamzaSqlRelMessageSerdeFactory.SamzaSqlRelMessageSerde valueSerde = (SamzaSqlRelMessageSerdeFactory.SamzaSqlRelMessageSerde) new SamzaSqlRelMessageSerdeFactory().getSerde(null, null);
    List<Integer> tableKeyIds = tableNode.getKeyIds();
    // Let's always repartition by the join fields as key before sending the key and value to the table.
    // We need to repartition the stream denoted as table to ensure that both the stream and table that are joined
    // have the same partitioning scheme with the same partition key and number. Please note that bootstrap semantic is
    // not propagated to the intermediate streams. Please refer SAMZA-1613 for more details on this. Subsequently, the
    // results are consistent only after the local table is caught up.
    relOutputStream.partitionBy(m -> createSamzaSqlCompositeKey(m, tableKeyIds), m -> m, KVSerde.of(keySerde, valueSerde), intermediateStreamPrefix + "table_" + logicalOpId).sendTo(table);
    return table;
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) TableScan(org.apache.calcite.rel.core.TableScan) LogicalFilter(org.apache.calcite.rel.logical.LogicalFilter) LoggerFactory(org.slf4j.LoggerFactory) RelOptUtil(org.apache.calcite.plan.RelOptUtil) ArrayList(java.util.ArrayList) LogicalJoin(org.apache.calcite.rel.logical.LogicalJoin) SamzaSqlRelRecordSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelRecordSerdeFactory) RexNode(org.apache.calcite.rex.RexNode) Map(java.util.Map) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) LinkedList(java.util.LinkedList) KV(org.apache.samza.operators.KV) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) SqlKind(org.apache.calcite.sql.SqlKind) Logger(org.slf4j.Logger) LogicalProject(org.apache.calcite.rel.logical.LogicalProject) SqlTypeName(org.apache.calcite.sql.type.SqlTypeName) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) RexLiteral(org.apache.calcite.rex.RexLiteral) SqlExplainLevel(org.apache.calcite.sql.SqlExplainLevel) SamzaSqlRelMessage.getSamzaSqlCompositeKeyFieldNames(org.apache.samza.sql.data.SamzaSqlRelMessage.getSamzaSqlCompositeKeyFieldNames) RelNode(org.apache.calcite.rel.RelNode) Collectors(java.util.stream.Collectors) SamzaSqlRelMessage.createSamzaSqlCompositeKey(org.apache.samza.sql.data.SamzaSqlRelMessage.createSamzaSqlCompositeKey) SamzaException(org.apache.samza.SamzaException) SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) RexInputRef(org.apache.calcite.rex.RexInputRef) List(java.util.List) Validate(org.apache.commons.lang3.Validate) SamzaSqlRelRecord(org.apache.samza.sql.SamzaSqlRelRecord) HepRelVertex(org.apache.calcite.plan.hep.HepRelVertex) JoinRelType(org.apache.calcite.rel.core.JoinRelType) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) KVSerde(org.apache.samza.serializers.KVSerde) RexShuttle(org.apache.calcite.rex.RexShuttle) SamzaSqlRelMessageSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelMessageSerdeFactory) Collections(java.util.Collections) SqlExplainFormat(org.apache.calcite.sql.SqlExplainFormat) RexCall(org.apache.calcite.rex.RexCall) SamzaSqlRelRecordSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelRecordSerdeFactory) KV(org.apache.samza.operators.KV) SamzaException(org.apache.samza.SamzaException) SamzaSqlRelMessageSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelMessageSerdeFactory) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 12 with Table

use of org.apache.samza.table.Table in project samza by apache.

the class JoinTranslator method translate.

void translate(final LogicalJoin join, final TranslatorContext translatorContext) {
    JoinInputNode.InputType inputTypeOnLeft = JoinInputNode.getInputType(join.getLeft(), translatorContext.getExecutionContext().getSamzaSqlApplicationConfig().getInputSystemStreamConfigBySource());
    JoinInputNode.InputType inputTypeOnRight = JoinInputNode.getInputType(join.getRight(), translatorContext.getExecutionContext().getSamzaSqlApplicationConfig().getInputSystemStreamConfigBySource());
    // Do the validation of join query
    validateJoinQuery(join, inputTypeOnLeft, inputTypeOnRight);
    // At this point, one of the sides is a table. Let's figure out if it is on left or right side.
    boolean isTablePosOnRight = inputTypeOnRight != JoinInputNode.InputType.STREAM;
    // stream and table keyIds are used to extract the join condition field (key) names and values out of the stream
    // and table records.
    List<Integer> streamKeyIds = new LinkedList<>();
    List<Integer> tableKeyIds = new LinkedList<>();
    // Fetch the stream and table indices corresponding to the fields given in the join condition.
    final int leftSideSize = join.getLeft().getRowType().getFieldCount();
    final int tableStartIdx = isTablePosOnRight ? leftSideSize : 0;
    final int streamStartIdx = isTablePosOnRight ? 0 : leftSideSize;
    final int tableEndIdx = isTablePosOnRight ? join.getRowType().getFieldCount() : leftSideSize;
    join.getCondition().accept(new RexShuttle() {

        @Override
        public RexNode visitInputRef(RexInputRef inputRef) {
            // Validate the type of the input ref.
            validateJoinKeyType(inputRef);
            int index = inputRef.getIndex();
            if (index >= tableStartIdx && index < tableEndIdx) {
                tableKeyIds.add(index - tableStartIdx);
            } else {
                streamKeyIds.add(index - streamStartIdx);
            }
            return inputRef;
        }
    });
    Collections.sort(tableKeyIds);
    Collections.sort(streamKeyIds);
    // Get the two input nodes (stream and table nodes) for the join.
    JoinInputNode streamNode = new JoinInputNode(isTablePosOnRight ? join.getLeft() : join.getRight(), streamKeyIds, isTablePosOnRight ? inputTypeOnLeft : inputTypeOnRight, !isTablePosOnRight);
    JoinInputNode tableNode = new JoinInputNode(isTablePosOnRight ? join.getRight() : join.getLeft(), tableKeyIds, isTablePosOnRight ? inputTypeOnRight : inputTypeOnLeft, isTablePosOnRight);
    MessageStream<SamzaSqlRelMessage> inputStream = translatorContext.getMessageStream(streamNode.getRelNode().getId());
    Table table = getTable(tableNode, translatorContext);
    MessageStream<SamzaSqlRelMessage> outputStream = joinStreamWithTable(inputStream, table, streamNode, tableNode, join, translatorContext);
    translatorContext.registerMessageStream(join.getId(), outputStream);
    outputStream.map(outputMetricsMF);
}
Also used : RexShuttle(org.apache.calcite.rex.RexShuttle) Table(org.apache.samza.table.Table) LinkedList(java.util.LinkedList) RexInputRef(org.apache.calcite.rex.RexInputRef) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) RexNode(org.apache.calcite.rex.RexNode)

Example 13 with Table

use of org.apache.samza.table.Table in project samza by apache.

the class TestRemoteTableWithBatchEndToEnd method doTestStreamTableJoinRemoteTablePartialUpdates.

private void doTestStreamTableJoinRemoteTablePartialUpdates(String testName, boolean isCompactBatch) throws Exception {
    final InMemoryWriteFunction writer = new InMemoryWriteFunction(testName);
    BATCH_READS.put(testName, new AtomicInteger());
    BATCH_WRITES.put(testName, new AtomicInteger());
    WRITTEN_RECORDS.put(testName, new HashMap<>());
    int count = 16;
    int batchSize = 4;
    String profiles = Base64Serializer.serialize(generateProfiles(count));
    final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
    final RateLimiter writeRateLimiter = mock(RateLimiter.class, withSettings().serializable());
    final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
    final StreamApplication app = appDesc -> {
        RemoteTableDescriptor<Integer, Profile, Void> inputTableDesc = new RemoteTableDescriptor<>("profile-table-1");
        inputTableDesc.withReadFunction(InMemoryReadFunction.getInMemoryReadFunction(testName, profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
        // dummy reader
        TableReadFunction<Integer, EnrichedPageView> readFn = new MyReadFunction();
        RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView> outputTableDesc = new RemoteTableDescriptor<>("enriched-page-view-table-1");
        outputTableDesc.withReadFunction(readFn).withWriteFunction(writer).withRateLimiter(writeRateLimiter, creditFunction, creditFunction);
        if (isCompactBatch) {
            outputTableDesc.withBatchProvider(new CompactBatchProvider<Integer, EnrichedPageView, EnrichedPageView>().withMaxBatchSize(batchSize).withMaxBatchDelay(Duration.ofHours(1)));
        } else {
            outputTableDesc.withBatchProvider(new CompleteBatchProvider<Integer, EnrichedPageView, EnrichedPageView>().withMaxBatchSize(batchSize).withMaxBatchDelay(Duration.ofHours(1)));
        }
        Table<KV<Integer, EnrichedPageView>> table = appDesc.getTable(outputTableDesc);
        Table<KV<Integer, Profile>> inputTable = appDesc.getTable(inputTableDesc);
        DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
        GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
        appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(inputTable, new PageViewToProfileJoinFunction()).map(m -> new KV<>(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(table, UpdateOptions.UPDATE_WITH_DEFAULTS);
    };
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
    TestRunner.of(app).addInputStream(inputDescriptor, Arrays.asList(generatePageViewsWithDistinctKeys(count))).addConfig("task.max.concurrency", String.valueOf(count)).addConfig("task.async.commit", String.valueOf(true)).run(Duration.ofSeconds(10));
    Assert.assertEquals(count, WRITTEN_RECORDS.get(testName).size());
    Assert.assertNotNull(WRITTEN_RECORDS.get(testName).get(0));
    Assert.assertEquals(count / batchSize, BATCH_WRITES.get(testName).get());
}
Also used : Arrays(java.util.Arrays) UpdateMessage(org.apache.samza.operators.UpdateMessage) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) ObjectInputStream(java.io.ObjectInputStream) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) Function(java.util.function.Function) TableReadFunction(org.apache.samza.table.remote.TableReadFunction) Base64Serializer(org.apache.samza.test.util.Base64Serializer) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TestTableData(org.apache.samza.test.table.TestTableData) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) Map(java.util.Map) TableWriteFunction(org.apache.samza.table.remote.TableWriteFunction) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Table(org.apache.samza.table.Table) Collection(java.util.Collection) BaseTableFunction(org.apache.samza.table.remote.BaseTableFunction) IOException(java.io.IOException) Test(org.junit.Test) Collectors(java.util.stream.Collectors) SamzaException(org.apache.samza.SamzaException) TestRunner(org.apache.samza.test.framework.TestRunner) Mockito(org.mockito.Mockito) Entry(org.apache.samza.storage.kv.Entry) RateLimiter(org.apache.samza.util.RateLimiter) UpdateOptions(org.apache.samza.operators.UpdateOptions) CompactBatchProvider(org.apache.samza.table.batching.CompactBatchProvider) StreamApplication(org.apache.samza.application.StreamApplication) Assert(org.junit.Assert) CompleteBatchProvider(org.apache.samza.table.batching.CompleteBatchProvider) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) TableReadFunction(org.apache.samza.table.remote.TableReadFunction) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) CompleteBatchProvider(org.apache.samza.table.batching.CompleteBatchProvider) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) Table(org.apache.samza.table.Table) StreamApplication(org.apache.samza.application.StreamApplication) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) RateLimiter(org.apache.samza.util.RateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) NoOpSerde(org.apache.samza.serializers.NoOpSerde) CompactBatchProvider(org.apache.samza.table.batching.CompactBatchProvider)

Example 14 with Table

use of org.apache.samza.table.Table in project samza by apache.

the class TestRemoteTableEndToEnd method doTestStreamTableJoinRemoteTable.

private void doTestStreamTableJoinRemoteTable(boolean withCache, boolean defaultCache, boolean withUpdate, String testName) throws Exception {
    WRITTEN_RECORDS.put(testName, new ArrayList<>());
    // max member id for page views is 10
    final String profiles = Base64Serializer.serialize(generateProfiles(10));
    final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
    final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
    final StreamApplication app = appDesc -> {
        final RemoteTableDescriptor joinTableDesc = new RemoteTableDescriptor<Integer, TestTableData.Profile, Void>("profile-table-1").withReadFunction(InMemoryProfileReadFunction.getInMemoryReadFunction(profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
        final RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView>("enriched-page-view-table-1").withReadFunction(new NoOpTableReadFunction<>()).withReadRateLimiterDisabled().withWriteFunction(new InMemoryEnrichedPageViewWriteFunction(testName)).withWriteRateLimit(1000);
        final Table<KV<Integer, Profile>> outputTable = withCache ? getCachingTable(outputTableDesc, defaultCache, appDesc) : appDesc.getTable(outputTableDesc);
        final Table<KV<Integer, Profile>> joinTable = withCache ? getCachingTable(joinTableDesc, defaultCache, appDesc) : appDesc.getTable(joinTableDesc);
        final DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
        final GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
        if (withUpdate) {
            appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(outputTable, UpdateOptions.UPDATE_WITH_DEFAULTS);
        } else {
            appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> KV.of(m.getMemberId(), m)).sendTo(outputTable);
        }
    };
    int numPageViews = 40;
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
    TestRunner.of(app).addInputStream(inputDescriptor, TestTableData.generatePartitionedPageViews(numPageViews, 4)).run(Duration.ofSeconds(10));
    Assert.assertEquals(numPageViews, WRITTEN_RECORDS.get(testName).size());
    Assert.assertNotNull(WRITTEN_RECORDS.get(testName).get(0));
    WRITTEN_RECORDS.get(testName).forEach(epv -> Assert.assertFalse(epv.company.contains("-")));
}
Also used : GuavaCacheTableDescriptor(org.apache.samza.table.descriptors.GuavaCacheTableDescriptor) Arrays(java.util.Arrays) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) ObjectInputStream(java.io.ObjectInputStream) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) RemoteTable(org.apache.samza.table.remote.RemoteTable) CachingTableDescriptor(org.apache.samza.table.descriptors.CachingTableDescriptor) Counter(org.apache.samza.metrics.Counter) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) Map(java.util.Map) NoOpTableReadFunction(org.apache.samza.table.remote.NoOpTableReadFunction) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Table(org.apache.samza.table.Table) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) Context(org.apache.samza.context.Context) RecordNotFoundException(org.apache.samza.table.RecordNotFoundException) TestRunner(org.apache.samza.test.framework.TestRunner) Matchers.any(org.mockito.Matchers.any) List(java.util.List) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) TestTableData.generateProfiles(org.apache.samza.test.table.TestTableData.generateProfiles) CacheBuilder(com.google.common.cache.CacheBuilder) PageView(org.apache.samza.test.table.TestTableData.PageView) StreamApplication(org.apache.samza.application.StreamApplication) UpdateMessage(org.apache.samza.operators.UpdateMessage) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Profile(org.apache.samza.test.table.TestTableData.Profile) Function(java.util.function.Function) TableReadFunction(org.apache.samza.table.remote.TableReadFunction) Matchers.anyString(org.mockito.Matchers.anyString) ArrayList(java.util.ArrayList) Base64Serializer(org.apache.samza.test.util.Base64Serializer) MockContext(org.apache.samza.context.MockContext) TableWriteFunction(org.apache.samza.table.remote.TableWriteFunction) ExpectedException(org.junit.rules.ExpectedException) Timer(org.apache.samza.metrics.Timer) BaseTableFunction(org.apache.samza.table.remote.BaseTableFunction) IOException(java.io.IOException) Test(org.junit.Test) SamzaException(org.apache.samza.SamzaException) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Rule(org.junit.Rule) RateLimiter(org.apache.samza.util.RateLimiter) UpdateOptions(org.apache.samza.operators.UpdateOptions) Assert(org.junit.Assert) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) Matchers.anyString(org.mockito.Matchers.anyString) Profile(org.apache.samza.test.table.TestTableData.Profile) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) PageView(org.apache.samza.test.table.TestTableData.PageView) RemoteTable(org.apache.samza.table.remote.RemoteTable) Table(org.apache.samza.table.Table) StreamApplication(org.apache.samza.application.StreamApplication) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) KV(org.apache.samza.operators.KV) RateLimiter(org.apache.samza.util.RateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) NoOpSerde(org.apache.samza.serializers.NoOpSerde) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView)

Example 15 with Table

use of org.apache.samza.table.Table in project samza by apache.

the class TestRemoteTableEndToEnd method doTestStreamTableJoinRemoteTableWithFirstTimeUpdates.

private void doTestStreamTableJoinRemoteTableWithFirstTimeUpdates(String testName, boolean withDefaults, boolean failUpdatesAlways) throws IOException {
    final String profiles = Base64Serializer.serialize(generateProfiles(30));
    final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
    final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
    final StreamApplication app = appDesc -> {
        final RemoteTableDescriptor joinTableDesc = new RemoteTableDescriptor<Integer, TestTableData.Profile, Void>("profile-table-1").withReadFunction(InMemoryProfileReadFunction.getInMemoryReadFunction(profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
        final RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView>("enriched-page-view-table-1").withReadFunction(new NoOpTableReadFunction<>()).withReadRateLimiterDisabled().withWriteFunction(new InMemoryEnrichedPageViewWriteFunction2(testName, failUpdatesAlways)).withWriteRateLimit(1000);
        // counters to count puts and updates
        COUNTERS.put(testName + "-put", new AtomicInteger());
        COUNTERS.put(testName + "-update", new AtomicInteger());
        final Table<KV<Integer, Profile>> outputTable = appDesc.getTable(outputTableDesc);
        final Table<KV<Integer, Profile>> joinTable = appDesc.getTable(joinTableDesc);
        final DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
        final GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
        if (withDefaults) {
            appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(outputTable, UpdateOptions.UPDATE_WITH_DEFAULTS);
        } else {
            appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m))).sendTo(outputTable, UpdateOptions.UPDATE_ONLY);
        }
    };
    int numPageViews = 15;
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
    Map<Integer, List<PageView>> integerListMap = TestTableData.generatePartitionedPageViews(numPageViews, 1);
    TestRunner.of(app).addInputStream(inputDescriptor, integerListMap).run(Duration.ofSeconds(10));
    if (withDefaults) {
        Assert.assertEquals(10, COUNTERS.get(testName + "-put").intValue());
        Assert.assertEquals(15, COUNTERS.get(testName + "-update").intValue());
    }
}
Also used : GuavaCacheTableDescriptor(org.apache.samza.table.descriptors.GuavaCacheTableDescriptor) Arrays(java.util.Arrays) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) ObjectInputStream(java.io.ObjectInputStream) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) RemoteTable(org.apache.samza.table.remote.RemoteTable) CachingTableDescriptor(org.apache.samza.table.descriptors.CachingTableDescriptor) Counter(org.apache.samza.metrics.Counter) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) Map(java.util.Map) NoOpTableReadFunction(org.apache.samza.table.remote.NoOpTableReadFunction) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Table(org.apache.samza.table.Table) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) Context(org.apache.samza.context.Context) RecordNotFoundException(org.apache.samza.table.RecordNotFoundException) TestRunner(org.apache.samza.test.framework.TestRunner) Matchers.any(org.mockito.Matchers.any) List(java.util.List) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) TestTableData.generateProfiles(org.apache.samza.test.table.TestTableData.generateProfiles) CacheBuilder(com.google.common.cache.CacheBuilder) PageView(org.apache.samza.test.table.TestTableData.PageView) StreamApplication(org.apache.samza.application.StreamApplication) UpdateMessage(org.apache.samza.operators.UpdateMessage) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Profile(org.apache.samza.test.table.TestTableData.Profile) Function(java.util.function.Function) TableReadFunction(org.apache.samza.table.remote.TableReadFunction) Matchers.anyString(org.mockito.Matchers.anyString) ArrayList(java.util.ArrayList) Base64Serializer(org.apache.samza.test.util.Base64Serializer) MockContext(org.apache.samza.context.MockContext) TableWriteFunction(org.apache.samza.table.remote.TableWriteFunction) ExpectedException(org.junit.rules.ExpectedException) Timer(org.apache.samza.metrics.Timer) BaseTableFunction(org.apache.samza.table.remote.BaseTableFunction) IOException(java.io.IOException) Test(org.junit.Test) SamzaException(org.apache.samza.SamzaException) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Rule(org.junit.Rule) RateLimiter(org.apache.samza.util.RateLimiter) UpdateOptions(org.apache.samza.operators.UpdateOptions) Assert(org.junit.Assert) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) Matchers.anyString(org.mockito.Matchers.anyString) Profile(org.apache.samza.test.table.TestTableData.Profile) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) List(java.util.List) ArrayList(java.util.ArrayList) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) PageView(org.apache.samza.test.table.TestTableData.PageView) RemoteTable(org.apache.samza.table.remote.RemoteTable) Table(org.apache.samza.table.Table) StreamApplication(org.apache.samza.application.StreamApplication) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) KV(org.apache.samza.operators.KV) RateLimiter(org.apache.samza.util.RateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) NoOpSerde(org.apache.samza.serializers.NoOpSerde) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView)

Aggregations

Table (org.apache.samza.table.Table)19 KV (org.apache.samza.operators.KV)17 Map (java.util.Map)13 TableDescriptor (org.apache.samza.table.descriptors.TableDescriptor)12 HashMap (java.util.HashMap)11 SamzaException (org.apache.samza.SamzaException)11 NoOpSerde (org.apache.samza.serializers.NoOpSerde)11 Test (org.junit.Test)11 Duration (java.time.Duration)10 ArrayList (java.util.ArrayList)10 List (java.util.List)10 Collectors (java.util.stream.Collectors)10 DelegatingSystemDescriptor (org.apache.samza.system.descriptors.DelegatingSystemDescriptor)10 GenericInputDescriptor (org.apache.samza.system.descriptors.GenericInputDescriptor)10 Assert (org.junit.Assert)10 Arrays (java.util.Arrays)9 StreamApplication (org.apache.samza.application.StreamApplication)9 TestRunner (org.apache.samza.test.framework.TestRunner)9 InMemoryInputDescriptor (org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor)9 InMemorySystemDescriptor (org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor)9