Search in sources :

Example 6 with DelegatingSystemDescriptor

use of org.apache.samza.system.descriptors.DelegatingSystemDescriptor in project samza by apache.

the class TestRemoteTableEndToEnd method testSendToWithDefaultsAndUpdateOnly.

// Test fails with the following exception:
// org.apache.samza.SamzaException: Put default failed for update as the UpdateOptions was set to UPDATE_ONLY.
// Please use UpdateOptions.UPDATE_WITH_DEFAULTS instead.
@Test(expected = SamzaException.class)
public void testSendToWithDefaultsAndUpdateOnly() throws Exception {
    String testName = "testSendToWithDefaultsAndUpdateOnly";
    final String profiles = Base64Serializer.serialize(generateProfiles(30));
    final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
    final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
    final StreamApplication app = appDesc -> {
        final RemoteTableDescriptor joinTableDesc = new RemoteTableDescriptor<Integer, TestTableData.Profile, Void>("profile-table-1").withReadFunction(InMemoryProfileReadFunction.getInMemoryReadFunction(profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
        final RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView>("enriched-page-view-table-1").withReadFunction(new NoOpTableReadFunction<>()).withReadRateLimiterDisabled().withWriteFunction(new InMemoryEnrichedPageViewWriteFunction2(testName, false)).withWriteRateLimit(1000);
        // counters to count puts and updates
        COUNTERS.put(testName + "-put", new AtomicInteger());
        COUNTERS.put(testName + "-update", new AtomicInteger());
        final Table<KV<Integer, Profile>> outputTable = appDesc.getTable(outputTableDesc);
        final Table<KV<Integer, Profile>> joinTable = appDesc.getTable(joinTableDesc);
        final DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
        final GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
        appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(outputTable, UpdateOptions.UPDATE_ONLY);
    };
    int numPageViews = 15;
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
    Map<Integer, List<PageView>> integerListMap = TestTableData.generatePartitionedPageViews(numPageViews, 1);
    TestRunner.of(app).addInputStream(inputDescriptor, integerListMap).run(Duration.ofSeconds(10));
}
Also used : GuavaCacheTableDescriptor(org.apache.samza.table.descriptors.GuavaCacheTableDescriptor) Arrays(java.util.Arrays) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) ObjectInputStream(java.io.ObjectInputStream) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) RemoteTable(org.apache.samza.table.remote.RemoteTable) CachingTableDescriptor(org.apache.samza.table.descriptors.CachingTableDescriptor) Counter(org.apache.samza.metrics.Counter) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) Map(java.util.Map) NoOpTableReadFunction(org.apache.samza.table.remote.NoOpTableReadFunction) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Table(org.apache.samza.table.Table) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) Context(org.apache.samza.context.Context) RecordNotFoundException(org.apache.samza.table.RecordNotFoundException) TestRunner(org.apache.samza.test.framework.TestRunner) Matchers.any(org.mockito.Matchers.any) List(java.util.List) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) TestTableData.generateProfiles(org.apache.samza.test.table.TestTableData.generateProfiles) CacheBuilder(com.google.common.cache.CacheBuilder) PageView(org.apache.samza.test.table.TestTableData.PageView) StreamApplication(org.apache.samza.application.StreamApplication) UpdateMessage(org.apache.samza.operators.UpdateMessage) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Profile(org.apache.samza.test.table.TestTableData.Profile) Function(java.util.function.Function) TableReadFunction(org.apache.samza.table.remote.TableReadFunction) Matchers.anyString(org.mockito.Matchers.anyString) ArrayList(java.util.ArrayList) Base64Serializer(org.apache.samza.test.util.Base64Serializer) MockContext(org.apache.samza.context.MockContext) TableWriteFunction(org.apache.samza.table.remote.TableWriteFunction) ExpectedException(org.junit.rules.ExpectedException) Timer(org.apache.samza.metrics.Timer) BaseTableFunction(org.apache.samza.table.remote.BaseTableFunction) IOException(java.io.IOException) Test(org.junit.Test) SamzaException(org.apache.samza.SamzaException) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Rule(org.junit.Rule) RateLimiter(org.apache.samza.util.RateLimiter) UpdateOptions(org.apache.samza.operators.UpdateOptions) Assert(org.junit.Assert) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) Matchers.anyString(org.mockito.Matchers.anyString) Profile(org.apache.samza.test.table.TestTableData.Profile) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) List(java.util.List) ArrayList(java.util.ArrayList) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) PageView(org.apache.samza.test.table.TestTableData.PageView) RemoteTable(org.apache.samza.table.remote.RemoteTable) Table(org.apache.samza.table.Table) StreamApplication(org.apache.samza.application.StreamApplication) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) KV(org.apache.samza.operators.KV) RateLimiter(org.apache.samza.util.RateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) NoOpSerde(org.apache.samza.serializers.NoOpSerde) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) Test(org.junit.Test)

Example 7 with DelegatingSystemDescriptor

use of org.apache.samza.system.descriptors.DelegatingSystemDescriptor in project samza by apache.

the class QueryTranslator method sendToOutputStream.

private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
    SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
    MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
    MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
    Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
    if (!tableDescriptor.isPresent()) {
        KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
        String systemName = sinkConfig.getSystemName();
        DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
        GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
        OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
        outputStream.sendTo(stm);
        // Process system events only if the output is a stream.
        if (sqlConfig.isProcessSystemEvents()) {
            for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
                MessageStream<KV<Object, Object>> systemEventStream = inputStream.filter(message -> message.getMetadata().isSystemMessage()).map(SamzaSqlInputMessage::getKeyAndMessageKV);
                systemEventStream.sendTo(stm);
            }
        }
    } else {
        Table outputTable = appDesc.getTable(tableDescriptor.get());
        if (outputTable == null) {
            String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
            throw new SamzaException(msg);
        }
        outputStream.sendTo(outputTable);
    }
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) TableScan(org.apache.calcite.rel.core.TableScan) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) LogicalFilter(org.apache.calcite.rel.logical.LogicalFilter) RelShuttleImpl(org.apache.calcite.rel.RelShuttleImpl) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) RelRoot(org.apache.calcite.rel.RelRoot) TaskContext(org.apache.samza.context.TaskContext) MapFunction(org.apache.samza.operators.functions.MapFunction) Counter(org.apache.samza.metrics.Counter) LogicalJoin(org.apache.calcite.rel.logical.LogicalJoin) QueryPlanner(org.apache.samza.sql.planner.QueryPlanner) ApplicationContainerContext(org.apache.samza.context.ApplicationContainerContext) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Map(java.util.Map) TableModify(org.apache.calcite.rel.core.TableModify) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) SamzaHistogram(org.apache.samza.metrics.SamzaHistogram) ExternalContext(org.apache.samza.context.ExternalContext) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) Logger(org.slf4j.Logger) LogicalProject(org.apache.calcite.rel.logical.LogicalProject) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) JobContext(org.apache.samza.context.JobContext) ContainerContext(org.apache.samza.context.ContainerContext) SamzaRelConverter(org.apache.samza.sql.interfaces.SamzaRelConverter) SamzaSqlExecutionContext(org.apache.samza.sql.data.SamzaSqlExecutionContext) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) RelNode(org.apache.calcite.rel.RelNode) SamzaException(org.apache.samza.SamzaException) ApplicationTaskContextFactory(org.apache.samza.context.ApplicationTaskContextFactory) SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) Context(org.apache.samza.context.Context) LogicalAggregate(org.apache.calcite.rel.logical.LogicalAggregate) Validate(org.apache.commons.lang3.Validate) SamzaSqlQueryParser(org.apache.samza.sql.util.SamzaSqlQueryParser) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) Optional(java.util.Optional) SamzaSqlApplicationContext(org.apache.samza.sql.runner.SamzaSqlApplicationContext) VisibleForTesting(com.google.common.annotations.VisibleForTesting) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) Table(org.apache.samza.table.Table) OutputStream(org.apache.samza.operators.OutputStream) KV(org.apache.samza.operators.KV) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) SamzaException(org.apache.samza.SamzaException) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 8 with DelegatingSystemDescriptor

use of org.apache.samza.system.descriptors.DelegatingSystemDescriptor in project samza by apache.

the class ScanTranslator method translate.

// ScanMapFunction
void translate(final TableScan tableScan, final String queryLogicalId, final String logicalOpId, final TranslatorContext context, Map<String, DelegatingSystemDescriptor> systemDescriptors, Map<String, MessageStream<SamzaSqlInputMessage>> inputMsgStreams) {
    StreamApplicationDescriptor streamAppDesc = context.getStreamAppDescriptor();
    List<String> tableNameParts = tableScan.getTable().getQualifiedName();
    String sourceName = SqlIOConfig.getSourceFromSourceParts(tableNameParts);
    Validate.isTrue(relMsgConverters.containsKey(sourceName), String.format("Unknown source %s", sourceName));
    SqlIOConfig sqlIOConfig = systemStreamConfig.get(sourceName);
    final String systemName = sqlIOConfig.getSystemName();
    final String streamId = sqlIOConfig.getStreamId();
    final String source = sqlIOConfig.getSource();
    final boolean isRemoteTable = sqlIOConfig.getTableDescriptor().isPresent() && (sqlIOConfig.getTableDescriptor().get() instanceof RemoteTableDescriptor || sqlIOConfig.getTableDescriptor().get() instanceof CachingTableDescriptor);
    // descriptor to load the local table.
    if (isRemoteTable) {
        return;
    }
    // set the wrapper input transformer (SamzaSqlInputTransformer) in system descriptor
    DelegatingSystemDescriptor systemDescriptor = systemDescriptors.get(systemName);
    if (systemDescriptor == null) {
        systemDescriptor = new DelegatingSystemDescriptor(systemName, new SamzaSqlInputTransformer());
        systemDescriptors.put(systemName, systemDescriptor);
    } else {
        /* in SamzaSQL, there should be no systemDescriptor setup by user, so this branch happens only
       * in case of Fan-OUT (i.e., same input stream used in multiple sql statements), or when same input
       * used twice in same sql statement (e.g., select ... from input as i1, input as i2 ...), o.w., throw error */
        if (systemDescriptor.getTransformer().isPresent()) {
            InputTransformer existingTransformer = systemDescriptor.getTransformer().get();
            if (!(existingTransformer instanceof SamzaSqlInputTransformer)) {
                throw new SamzaException("SamzaSQL Exception: existing transformer for " + systemName + " is not SamzaSqlInputTransformer");
            }
        }
    }
    InputDescriptor inputDescriptor = systemDescriptor.getInputDescriptor(streamId, new NoOpSerde<>());
    if (!inputMsgStreams.containsKey(source)) {
        MessageStream<SamzaSqlInputMessage> inputMsgStream = streamAppDesc.getInputStream(inputDescriptor);
        inputMsgStreams.put(source, inputMsgStream.map(new SystemMessageMapperFunction(source, queryId)));
    }
    MessageStream<SamzaSqlRelMessage> samzaSqlRelMessageStream = inputMsgStreams.get(source).filter(new FilterSystemMessageFunction(sourceName, queryId)).map(new ScanMapFunction(sourceName, queryId, queryLogicalId, logicalOpId));
    context.registerMessageStream(tableScan.getId(), samzaSqlRelMessageStream);
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) InputDescriptor(org.apache.samza.system.descriptors.InputDescriptor) CachingTableDescriptor(org.apache.samza.table.descriptors.CachingTableDescriptor) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) SamzaSqlInputTransformer(org.apache.samza.sql.SamzaSqlInputTransformer) InputTransformer(org.apache.samza.system.descriptors.InputTransformer) SamzaException(org.apache.samza.SamzaException) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) SamzaSqlInputTransformer(org.apache.samza.sql.SamzaSqlInputTransformer) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 9 with DelegatingSystemDescriptor

use of org.apache.samza.system.descriptors.DelegatingSystemDescriptor in project samza by apache.

the class TestRemoteTableWithBatchEndToEnd method doTestStreamTableJoinRemoteTablePartialUpdates.

private void doTestStreamTableJoinRemoteTablePartialUpdates(String testName, boolean isCompactBatch) throws Exception {
    final InMemoryWriteFunction writer = new InMemoryWriteFunction(testName);
    BATCH_READS.put(testName, new AtomicInteger());
    BATCH_WRITES.put(testName, new AtomicInteger());
    WRITTEN_RECORDS.put(testName, new HashMap<>());
    int count = 16;
    int batchSize = 4;
    String profiles = Base64Serializer.serialize(generateProfiles(count));
    final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
    final RateLimiter writeRateLimiter = mock(RateLimiter.class, withSettings().serializable());
    final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
    final StreamApplication app = appDesc -> {
        RemoteTableDescriptor<Integer, Profile, Void> inputTableDesc = new RemoteTableDescriptor<>("profile-table-1");
        inputTableDesc.withReadFunction(InMemoryReadFunction.getInMemoryReadFunction(testName, profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
        // dummy reader
        TableReadFunction<Integer, EnrichedPageView> readFn = new MyReadFunction();
        RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView> outputTableDesc = new RemoteTableDescriptor<>("enriched-page-view-table-1");
        outputTableDesc.withReadFunction(readFn).withWriteFunction(writer).withRateLimiter(writeRateLimiter, creditFunction, creditFunction);
        if (isCompactBatch) {
            outputTableDesc.withBatchProvider(new CompactBatchProvider<Integer, EnrichedPageView, EnrichedPageView>().withMaxBatchSize(batchSize).withMaxBatchDelay(Duration.ofHours(1)));
        } else {
            outputTableDesc.withBatchProvider(new CompleteBatchProvider<Integer, EnrichedPageView, EnrichedPageView>().withMaxBatchSize(batchSize).withMaxBatchDelay(Duration.ofHours(1)));
        }
        Table<KV<Integer, EnrichedPageView>> table = appDesc.getTable(outputTableDesc);
        Table<KV<Integer, Profile>> inputTable = appDesc.getTable(inputTableDesc);
        DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
        GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
        appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(inputTable, new PageViewToProfileJoinFunction()).map(m -> new KV<>(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(table, UpdateOptions.UPDATE_WITH_DEFAULTS);
    };
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
    TestRunner.of(app).addInputStream(inputDescriptor, Arrays.asList(generatePageViewsWithDistinctKeys(count))).addConfig("task.max.concurrency", String.valueOf(count)).addConfig("task.async.commit", String.valueOf(true)).run(Duration.ofSeconds(10));
    Assert.assertEquals(count, WRITTEN_RECORDS.get(testName).size());
    Assert.assertNotNull(WRITTEN_RECORDS.get(testName).get(0));
    Assert.assertEquals(count / batchSize, BATCH_WRITES.get(testName).get());
}
Also used : Arrays(java.util.Arrays) UpdateMessage(org.apache.samza.operators.UpdateMessage) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) ObjectInputStream(java.io.ObjectInputStream) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) Function(java.util.function.Function) TableReadFunction(org.apache.samza.table.remote.TableReadFunction) Base64Serializer(org.apache.samza.test.util.Base64Serializer) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TestTableData(org.apache.samza.test.table.TestTableData) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) Map(java.util.Map) TableWriteFunction(org.apache.samza.table.remote.TableWriteFunction) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Table(org.apache.samza.table.Table) Collection(java.util.Collection) BaseTableFunction(org.apache.samza.table.remote.BaseTableFunction) IOException(java.io.IOException) Test(org.junit.Test) Collectors(java.util.stream.Collectors) SamzaException(org.apache.samza.SamzaException) TestRunner(org.apache.samza.test.framework.TestRunner) Mockito(org.mockito.Mockito) Entry(org.apache.samza.storage.kv.Entry) RateLimiter(org.apache.samza.util.RateLimiter) UpdateOptions(org.apache.samza.operators.UpdateOptions) CompactBatchProvider(org.apache.samza.table.batching.CompactBatchProvider) StreamApplication(org.apache.samza.application.StreamApplication) Assert(org.junit.Assert) CompleteBatchProvider(org.apache.samza.table.batching.CompleteBatchProvider) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) TableReadFunction(org.apache.samza.table.remote.TableReadFunction) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) CompleteBatchProvider(org.apache.samza.table.batching.CompleteBatchProvider) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) Table(org.apache.samza.table.Table) StreamApplication(org.apache.samza.application.StreamApplication) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) RateLimiter(org.apache.samza.util.RateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) NoOpSerde(org.apache.samza.serializers.NoOpSerde) CompactBatchProvider(org.apache.samza.table.batching.CompactBatchProvider)

Example 10 with DelegatingSystemDescriptor

use of org.apache.samza.system.descriptors.DelegatingSystemDescriptor in project samza by apache.

the class TestRemoteTableEndToEnd method doTestStreamTableJoinRemoteTable.

private void doTestStreamTableJoinRemoteTable(boolean withCache, boolean defaultCache, boolean withUpdate, String testName) throws Exception {
    WRITTEN_RECORDS.put(testName, new ArrayList<>());
    // max member id for page views is 10
    final String profiles = Base64Serializer.serialize(generateProfiles(10));
    final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
    final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
    final StreamApplication app = appDesc -> {
        final RemoteTableDescriptor joinTableDesc = new RemoteTableDescriptor<Integer, TestTableData.Profile, Void>("profile-table-1").withReadFunction(InMemoryProfileReadFunction.getInMemoryReadFunction(profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
        final RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView>("enriched-page-view-table-1").withReadFunction(new NoOpTableReadFunction<>()).withReadRateLimiterDisabled().withWriteFunction(new InMemoryEnrichedPageViewWriteFunction(testName)).withWriteRateLimit(1000);
        final Table<KV<Integer, Profile>> outputTable = withCache ? getCachingTable(outputTableDesc, defaultCache, appDesc) : appDesc.getTable(outputTableDesc);
        final Table<KV<Integer, Profile>> joinTable = withCache ? getCachingTable(joinTableDesc, defaultCache, appDesc) : appDesc.getTable(joinTableDesc);
        final DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
        final GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
        if (withUpdate) {
            appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(outputTable, UpdateOptions.UPDATE_WITH_DEFAULTS);
        } else {
            appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> KV.of(m.getMemberId(), m)).sendTo(outputTable);
        }
    };
    int numPageViews = 40;
    InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
    InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
    TestRunner.of(app).addInputStream(inputDescriptor, TestTableData.generatePartitionedPageViews(numPageViews, 4)).run(Duration.ofSeconds(10));
    Assert.assertEquals(numPageViews, WRITTEN_RECORDS.get(testName).size());
    Assert.assertNotNull(WRITTEN_RECORDS.get(testName).get(0));
    WRITTEN_RECORDS.get(testName).forEach(epv -> Assert.assertFalse(epv.company.contains("-")));
}
Also used : GuavaCacheTableDescriptor(org.apache.samza.table.descriptors.GuavaCacheTableDescriptor) Arrays(java.util.Arrays) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) ObjectInputStream(java.io.ObjectInputStream) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) RemoteTable(org.apache.samza.table.remote.RemoteTable) CachingTableDescriptor(org.apache.samza.table.descriptors.CachingTableDescriptor) Counter(org.apache.samza.metrics.Counter) InMemoryInputDescriptor(org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Duration(java.time.Duration) Map(java.util.Map) NoOpTableReadFunction(org.apache.samza.table.remote.NoOpTableReadFunction) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) Table(org.apache.samza.table.Table) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) Context(org.apache.samza.context.Context) RecordNotFoundException(org.apache.samza.table.RecordNotFoundException) TestRunner(org.apache.samza.test.framework.TestRunner) Matchers.any(org.mockito.Matchers.any) List(java.util.List) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) TestTableData.generateProfiles(org.apache.samza.test.table.TestTableData.generateProfiles) CacheBuilder(com.google.common.cache.CacheBuilder) PageView(org.apache.samza.test.table.TestTableData.PageView) StreamApplication(org.apache.samza.application.StreamApplication) UpdateMessage(org.apache.samza.operators.UpdateMessage) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Profile(org.apache.samza.test.table.TestTableData.Profile) Function(java.util.function.Function) TableReadFunction(org.apache.samza.table.remote.TableReadFunction) Matchers.anyString(org.mockito.Matchers.anyString) ArrayList(java.util.ArrayList) Base64Serializer(org.apache.samza.test.util.Base64Serializer) MockContext(org.apache.samza.context.MockContext) TableWriteFunction(org.apache.samza.table.remote.TableWriteFunction) ExpectedException(org.junit.rules.ExpectedException) Timer(org.apache.samza.metrics.Timer) BaseTableFunction(org.apache.samza.table.remote.BaseTableFunction) IOException(java.io.IOException) Test(org.junit.Test) SamzaException(org.apache.samza.SamzaException) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Rule(org.junit.Rule) RateLimiter(org.apache.samza.util.RateLimiter) UpdateOptions(org.apache.samza.operators.UpdateOptions) Assert(org.junit.Assert) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) Matchers.anyString(org.mockito.Matchers.anyString) Profile(org.apache.samza.test.table.TestTableData.Profile) InMemorySystemDescriptor(org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView) PageView(org.apache.samza.test.table.TestTableData.PageView) RemoteTable(org.apache.samza.table.remote.RemoteTable) Table(org.apache.samza.table.Table) StreamApplication(org.apache.samza.application.StreamApplication) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) KV(org.apache.samza.operators.KV) RateLimiter(org.apache.samza.util.RateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) TableRateLimiter(org.apache.samza.table.remote.TableRateLimiter) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) NoOpSerde(org.apache.samza.serializers.NoOpSerde) EnrichedPageView(org.apache.samza.test.table.TestTableData.EnrichedPageView)

Aggregations

DelegatingSystemDescriptor (org.apache.samza.system.descriptors.DelegatingSystemDescriptor)13 KV (org.apache.samza.operators.KV)12 NoOpSerde (org.apache.samza.serializers.NoOpSerde)12 StreamApplication (org.apache.samza.application.StreamApplication)11 GenericInputDescriptor (org.apache.samza.system.descriptors.GenericInputDescriptor)11 Test (org.junit.Test)11 Duration (java.time.Duration)10 HashMap (java.util.HashMap)10 Map (java.util.Map)10 StreamApplicationDescriptor (org.apache.samza.application.descriptors.StreamApplicationDescriptor)10 Table (org.apache.samza.table.Table)10 TestRunner (org.apache.samza.test.framework.TestRunner)10 InMemoryInputDescriptor (org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor)10 InMemorySystemDescriptor (org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor)10 ArrayList (java.util.ArrayList)9 List (java.util.List)9 SamzaException (org.apache.samza.SamzaException)9 RemoteTableDescriptor (org.apache.samza.table.descriptors.RemoteTableDescriptor)9 Arrays (java.util.Arrays)8 Assert (org.junit.Assert)8