use of org.apache.samza.table.descriptors.RemoteTableDescriptor in project samza by apache.
the class TestRemoteTableEndToEnd method testSendToWithDefaultsAndUpdateOnly.
// Test fails with the following exception:
// org.apache.samza.SamzaException: Put default failed for update as the UpdateOptions was set to UPDATE_ONLY.
// Please use UpdateOptions.UPDATE_WITH_DEFAULTS instead.
@Test(expected = SamzaException.class)
public void testSendToWithDefaultsAndUpdateOnly() throws Exception {
String testName = "testSendToWithDefaultsAndUpdateOnly";
final String profiles = Base64Serializer.serialize(generateProfiles(30));
final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
final StreamApplication app = appDesc -> {
final RemoteTableDescriptor joinTableDesc = new RemoteTableDescriptor<Integer, TestTableData.Profile, Void>("profile-table-1").withReadFunction(InMemoryProfileReadFunction.getInMemoryReadFunction(profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
final RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView>("enriched-page-view-table-1").withReadFunction(new NoOpTableReadFunction<>()).withReadRateLimiterDisabled().withWriteFunction(new InMemoryEnrichedPageViewWriteFunction2(testName, false)).withWriteRateLimit(1000);
// counters to count puts and updates
COUNTERS.put(testName + "-put", new AtomicInteger());
COUNTERS.put(testName + "-update", new AtomicInteger());
final Table<KV<Integer, Profile>> outputTable = appDesc.getTable(outputTableDesc);
final Table<KV<Integer, Profile>> joinTable = appDesc.getTable(joinTableDesc);
final DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
final GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(outputTable, UpdateOptions.UPDATE_ONLY);
};
int numPageViews = 15;
InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
Map<Integer, List<PageView>> integerListMap = TestTableData.generatePartitionedPageViews(numPageViews, 1);
TestRunner.of(app).addInputStream(inputDescriptor, integerListMap).run(Duration.ofSeconds(10));
}
use of org.apache.samza.table.descriptors.RemoteTableDescriptor in project samza by apache.
the class ScanTranslator method translate.
// ScanMapFunction
void translate(final TableScan tableScan, final String queryLogicalId, final String logicalOpId, final TranslatorContext context, Map<String, DelegatingSystemDescriptor> systemDescriptors, Map<String, MessageStream<SamzaSqlInputMessage>> inputMsgStreams) {
StreamApplicationDescriptor streamAppDesc = context.getStreamAppDescriptor();
List<String> tableNameParts = tableScan.getTable().getQualifiedName();
String sourceName = SqlIOConfig.getSourceFromSourceParts(tableNameParts);
Validate.isTrue(relMsgConverters.containsKey(sourceName), String.format("Unknown source %s", sourceName));
SqlIOConfig sqlIOConfig = systemStreamConfig.get(sourceName);
final String systemName = sqlIOConfig.getSystemName();
final String streamId = sqlIOConfig.getStreamId();
final String source = sqlIOConfig.getSource();
final boolean isRemoteTable = sqlIOConfig.getTableDescriptor().isPresent() && (sqlIOConfig.getTableDescriptor().get() instanceof RemoteTableDescriptor || sqlIOConfig.getTableDescriptor().get() instanceof CachingTableDescriptor);
// descriptor to load the local table.
if (isRemoteTable) {
return;
}
// set the wrapper input transformer (SamzaSqlInputTransformer) in system descriptor
DelegatingSystemDescriptor systemDescriptor = systemDescriptors.get(systemName);
if (systemDescriptor == null) {
systemDescriptor = new DelegatingSystemDescriptor(systemName, new SamzaSqlInputTransformer());
systemDescriptors.put(systemName, systemDescriptor);
} else {
/* in SamzaSQL, there should be no systemDescriptor setup by user, so this branch happens only
* in case of Fan-OUT (i.e., same input stream used in multiple sql statements), or when same input
* used twice in same sql statement (e.g., select ... from input as i1, input as i2 ...), o.w., throw error */
if (systemDescriptor.getTransformer().isPresent()) {
InputTransformer existingTransformer = systemDescriptor.getTransformer().get();
if (!(existingTransformer instanceof SamzaSqlInputTransformer)) {
throw new SamzaException("SamzaSQL Exception: existing transformer for " + systemName + " is not SamzaSqlInputTransformer");
}
}
}
InputDescriptor inputDescriptor = systemDescriptor.getInputDescriptor(streamId, new NoOpSerde<>());
if (!inputMsgStreams.containsKey(source)) {
MessageStream<SamzaSqlInputMessage> inputMsgStream = streamAppDesc.getInputStream(inputDescriptor);
inputMsgStreams.put(source, inputMsgStream.map(new SystemMessageMapperFunction(source, queryId)));
}
MessageStream<SamzaSqlRelMessage> samzaSqlRelMessageStream = inputMsgStreams.get(source).filter(new FilterSystemMessageFunction(sourceName, queryId)).map(new ScanMapFunction(sourceName, queryId, queryLogicalId, logicalOpId));
context.registerMessageStream(tableScan.getId(), samzaSqlRelMessageStream);
}
use of org.apache.samza.table.descriptors.RemoteTableDescriptor in project samza by apache.
the class TestRemoteTableWithBatchEndToEnd method doTestStreamTableJoinRemoteTablePartialUpdates.
private void doTestStreamTableJoinRemoteTablePartialUpdates(String testName, boolean isCompactBatch) throws Exception {
final InMemoryWriteFunction writer = new InMemoryWriteFunction(testName);
BATCH_READS.put(testName, new AtomicInteger());
BATCH_WRITES.put(testName, new AtomicInteger());
WRITTEN_RECORDS.put(testName, new HashMap<>());
int count = 16;
int batchSize = 4;
String profiles = Base64Serializer.serialize(generateProfiles(count));
final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
final RateLimiter writeRateLimiter = mock(RateLimiter.class, withSettings().serializable());
final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
final StreamApplication app = appDesc -> {
RemoteTableDescriptor<Integer, Profile, Void> inputTableDesc = new RemoteTableDescriptor<>("profile-table-1");
inputTableDesc.withReadFunction(InMemoryReadFunction.getInMemoryReadFunction(testName, profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
// dummy reader
TableReadFunction<Integer, EnrichedPageView> readFn = new MyReadFunction();
RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView> outputTableDesc = new RemoteTableDescriptor<>("enriched-page-view-table-1");
outputTableDesc.withReadFunction(readFn).withWriteFunction(writer).withRateLimiter(writeRateLimiter, creditFunction, creditFunction);
if (isCompactBatch) {
outputTableDesc.withBatchProvider(new CompactBatchProvider<Integer, EnrichedPageView, EnrichedPageView>().withMaxBatchSize(batchSize).withMaxBatchDelay(Duration.ofHours(1)));
} else {
outputTableDesc.withBatchProvider(new CompleteBatchProvider<Integer, EnrichedPageView, EnrichedPageView>().withMaxBatchSize(batchSize).withMaxBatchDelay(Duration.ofHours(1)));
}
Table<KV<Integer, EnrichedPageView>> table = appDesc.getTable(outputTableDesc);
Table<KV<Integer, Profile>> inputTable = appDesc.getTable(inputTableDesc);
DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(inputTable, new PageViewToProfileJoinFunction()).map(m -> new KV<>(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(table, UpdateOptions.UPDATE_WITH_DEFAULTS);
};
InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
TestRunner.of(app).addInputStream(inputDescriptor, Arrays.asList(generatePageViewsWithDistinctKeys(count))).addConfig("task.max.concurrency", String.valueOf(count)).addConfig("task.async.commit", String.valueOf(true)).run(Duration.ofSeconds(10));
Assert.assertEquals(count, WRITTEN_RECORDS.get(testName).size());
Assert.assertNotNull(WRITTEN_RECORDS.get(testName).get(0));
Assert.assertEquals(count / batchSize, BATCH_WRITES.get(testName).get());
}
use of org.apache.samza.table.descriptors.RemoteTableDescriptor in project samza by apache.
the class TestRemoteTableEndToEnd method doTestStreamTableJoinRemoteTable.
private void doTestStreamTableJoinRemoteTable(boolean withCache, boolean defaultCache, boolean withUpdate, String testName) throws Exception {
WRITTEN_RECORDS.put(testName, new ArrayList<>());
// max member id for page views is 10
final String profiles = Base64Serializer.serialize(generateProfiles(10));
final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
final StreamApplication app = appDesc -> {
final RemoteTableDescriptor joinTableDesc = new RemoteTableDescriptor<Integer, TestTableData.Profile, Void>("profile-table-1").withReadFunction(InMemoryProfileReadFunction.getInMemoryReadFunction(profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
final RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView>("enriched-page-view-table-1").withReadFunction(new NoOpTableReadFunction<>()).withReadRateLimiterDisabled().withWriteFunction(new InMemoryEnrichedPageViewWriteFunction(testName)).withWriteRateLimit(1000);
final Table<KV<Integer, Profile>> outputTable = withCache ? getCachingTable(outputTableDesc, defaultCache, appDesc) : appDesc.getTable(outputTableDesc);
final Table<KV<Integer, Profile>> joinTable = withCache ? getCachingTable(joinTableDesc, defaultCache, appDesc) : appDesc.getTable(joinTableDesc);
final DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
final GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
if (withUpdate) {
appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(outputTable, UpdateOptions.UPDATE_WITH_DEFAULTS);
} else {
appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> KV.of(m.getMemberId(), m)).sendTo(outputTable);
}
};
int numPageViews = 40;
InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
TestRunner.of(app).addInputStream(inputDescriptor, TestTableData.generatePartitionedPageViews(numPageViews, 4)).run(Duration.ofSeconds(10));
Assert.assertEquals(numPageViews, WRITTEN_RECORDS.get(testName).size());
Assert.assertNotNull(WRITTEN_RECORDS.get(testName).get(0));
WRITTEN_RECORDS.get(testName).forEach(epv -> Assert.assertFalse(epv.company.contains("-")));
}
use of org.apache.samza.table.descriptors.RemoteTableDescriptor in project samza by apache.
the class TestRemoteTableEndToEnd method doTestStreamTableJoinRemoteTableWithFirstTimeUpdates.
private void doTestStreamTableJoinRemoteTableWithFirstTimeUpdates(String testName, boolean withDefaults, boolean failUpdatesAlways) throws IOException {
final String profiles = Base64Serializer.serialize(generateProfiles(30));
final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
final StreamApplication app = appDesc -> {
final RemoteTableDescriptor joinTableDesc = new RemoteTableDescriptor<Integer, TestTableData.Profile, Void>("profile-table-1").withReadFunction(InMemoryProfileReadFunction.getInMemoryReadFunction(profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
final RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView>("enriched-page-view-table-1").withReadFunction(new NoOpTableReadFunction<>()).withReadRateLimiterDisabled().withWriteFunction(new InMemoryEnrichedPageViewWriteFunction2(testName, failUpdatesAlways)).withWriteRateLimit(1000);
// counters to count puts and updates
COUNTERS.put(testName + "-put", new AtomicInteger());
COUNTERS.put(testName + "-update", new AtomicInteger());
final Table<KV<Integer, Profile>> outputTable = appDesc.getTable(outputTableDesc);
final Table<KV<Integer, Profile>> joinTable = appDesc.getTable(joinTableDesc);
final DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
final GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
if (withDefaults) {
appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(outputTable, UpdateOptions.UPDATE_WITH_DEFAULTS);
} else {
appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m))).sendTo(outputTable, UpdateOptions.UPDATE_ONLY);
}
};
int numPageViews = 15;
InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
Map<Integer, List<PageView>> integerListMap = TestTableData.generatePartitionedPageViews(numPageViews, 1);
TestRunner.of(app).addInputStream(inputDescriptor, integerListMap).run(Duration.ofSeconds(10));
if (withDefaults) {
Assert.assertEquals(10, COUNTERS.get(testName + "-put").intValue());
Assert.assertEquals(15, COUNTERS.get(testName + "-update").intValue());
}
}
Aggregations