use of org.apache.samza.table.Table in project samza by apache.
the class TestRemoteTableEndToEnd method testSendToUpdatesWithoutUpdateOptions.
// Test will fail as we use sendTo with KV<K, UpdateMessage> stream without UpdateOptions
@Test(expected = SamzaException.class)
public void testSendToUpdatesWithoutUpdateOptions() throws Exception {
// max member id for page views is 10
final String profiles = Base64Serializer.serialize(generateProfiles(10));
final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
final StreamApplication app = appDesc -> {
final RemoteTableDescriptor joinTableDesc = new RemoteTableDescriptor<Integer, TestTableData.Profile, Void>("profile-table-1").withReadFunction(InMemoryProfileReadFunction.getInMemoryReadFunction(profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
final RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView>("enriched-page-view-table-1").withReadFunction(new NoOpTableReadFunction<>()).withReadRateLimiterDisabled().withWriteFunction(new InMemoryEnrichedPageViewWriteFunction2("testUpdateWithoutUpdateOptions", false)).withWriteRateLimit(1000);
final Table<KV<Integer, Profile>> outputTable = appDesc.getTable(outputTableDesc);
final Table<KV<Integer, Profile>> joinTable = appDesc.getTable(joinTableDesc);
final DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
final GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(outputTable);
};
int numPageViews = 40;
InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
TestRunner.of(app).addInputStream(inputDescriptor, TestTableData.generatePartitionedPageViews(numPageViews, 4)).run(Duration.ofSeconds(10));
}
use of org.apache.samza.table.Table in project samza by apache.
the class TestRemoteTableEndToEnd method testSendToUpdatesFailureAfterPutDefault.
// Test fails with the following exception:
// org.apache.samza.SamzaException: Update after Put default failed with exception.
@Test(expected = SamzaException.class)
public void testSendToUpdatesFailureAfterPutDefault() throws Exception {
// the test checks for failure when update after put default fails
String testName = "testSendToUpdatesFailureAfterPutDefault";
final String profiles = Base64Serializer.serialize(generateProfiles(30));
final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
final StreamApplication app = appDesc -> {
final RemoteTableDescriptor joinTableDesc = new RemoteTableDescriptor<Integer, TestTableData.Profile, Void>("profile-table-1").withReadFunction(InMemoryProfileReadFunction.getInMemoryReadFunction(profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
final RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView>("enriched-page-view-table-1").withReadFunction(new NoOpTableReadFunction<>()).withReadRateLimiterDisabled().withWriteFunction(new InMemoryEnrichedPageViewWriteFunction2(testName, false, true)).withWriteRateLimit(1000);
final Table<KV<Integer, Profile>> outputTable = appDesc.getTable(outputTableDesc);
final Table<KV<Integer, Profile>> joinTable = appDesc.getTable(joinTableDesc);
final DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
final GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(outputTable, UpdateOptions.UPDATE_WITH_DEFAULTS);
};
int numPageViews = 15;
InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
Map<Integer, List<PageView>> integerListMap = TestTableData.generatePartitionedPageViews(numPageViews, 1);
TestRunner.of(app).addInputStream(inputDescriptor, integerListMap).run(Duration.ofSeconds(10));
}
use of org.apache.samza.table.Table in project samza by apache.
the class TestRemoteTableEndToEnd method testSendToWithDefaultsAndUpdateOnly.
// Test fails with the following exception:
// org.apache.samza.SamzaException: Put default failed for update as the UpdateOptions was set to UPDATE_ONLY.
// Please use UpdateOptions.UPDATE_WITH_DEFAULTS instead.
@Test(expected = SamzaException.class)
public void testSendToWithDefaultsAndUpdateOnly() throws Exception {
String testName = "testSendToWithDefaultsAndUpdateOnly";
final String profiles = Base64Serializer.serialize(generateProfiles(30));
final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
final StreamApplication app = appDesc -> {
final RemoteTableDescriptor joinTableDesc = new RemoteTableDescriptor<Integer, TestTableData.Profile, Void>("profile-table-1").withReadFunction(InMemoryProfileReadFunction.getInMemoryReadFunction(profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
final RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView>("enriched-page-view-table-1").withReadFunction(new NoOpTableReadFunction<>()).withReadRateLimiterDisabled().withWriteFunction(new InMemoryEnrichedPageViewWriteFunction2(testName, false)).withWriteRateLimit(1000);
// counters to count puts and updates
COUNTERS.put(testName + "-put", new AtomicInteger());
COUNTERS.put(testName + "-update", new AtomicInteger());
final Table<KV<Integer, Profile>> outputTable = appDesc.getTable(outputTableDesc);
final Table<KV<Integer, Profile>> joinTable = appDesc.getTable(joinTableDesc);
final DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
final GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(outputTable, UpdateOptions.UPDATE_ONLY);
};
int numPageViews = 15;
InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
Map<Integer, List<PageView>> integerListMap = TestTableData.generatePartitionedPageViews(numPageViews, 1);
TestRunner.of(app).addInputStream(inputDescriptor, integerListMap).run(Duration.ofSeconds(10));
}
use of org.apache.samza.table.Table in project samza by apache.
the class QueryTranslator method sendToOutputStream.
private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
if (!tableDescriptor.isPresent()) {
KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
String systemName = sinkConfig.getSystemName();
DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
outputStream.sendTo(stm);
// Process system events only if the output is a stream.
if (sqlConfig.isProcessSystemEvents()) {
for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
MessageStream<KV<Object, Object>> systemEventStream = inputStream.filter(message -> message.getMetadata().isSystemMessage()).map(SamzaSqlInputMessage::getKeyAndMessageKV);
systemEventStream.sendTo(stm);
}
}
} else {
Table outputTable = appDesc.getTable(tableDescriptor.get());
if (outputTable == null) {
String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
throw new SamzaException(msg);
}
outputStream.sendTo(outputTable);
}
}
use of org.apache.samza.table.Table in project samza by apache.
the class JoinTranslator method validateJoinQuery.
private void validateJoinQuery(LogicalJoin join, JoinInputNode.InputType inputTypeOnLeft, JoinInputNode.InputType inputTypeOnRight) {
JoinRelType joinRelType = join.getJoinType();
if (joinRelType.compareTo(JoinRelType.INNER) != 0 && joinRelType.compareTo(JoinRelType.LEFT) != 0 && joinRelType.compareTo(JoinRelType.RIGHT) != 0) {
throw new SamzaException("Query with only INNER and LEFT/RIGHT OUTER join are supported.");
}
boolean isTablePosOnLeft = inputTypeOnLeft != JoinInputNode.InputType.STREAM;
boolean isTablePosOnRight = inputTypeOnRight != JoinInputNode.InputType.STREAM;
if (!isTablePosOnLeft && !isTablePosOnRight) {
throw new SamzaException("Invalid query with both sides of join being denoted as 'stream'. " + "Stream-stream join is not yet supported. " + dumpRelPlanForNode(join));
}
if (isTablePosOnLeft && isTablePosOnRight) {
throw new SamzaException("Invalid query with both sides of join being denoted as 'table'. " + dumpRelPlanForNode(join));
}
if (joinRelType.compareTo(JoinRelType.LEFT) == 0 && isTablePosOnLeft) {
throw new SamzaException("Invalid query for outer left join. Left side of the join should be a 'stream' and " + "right side of join should be a 'table'. " + dumpRelPlanForNode(join));
}
if (joinRelType.compareTo(JoinRelType.RIGHT) == 0 && isTablePosOnRight) {
throw new SamzaException("Invalid query for outer right join. Left side of the join should be a 'table' and " + "right side of join should be a 'stream'. " + dumpRelPlanForNode(join));
}
final List<RexNode> conjunctionList = new ArrayList<>();
decomposeAndValidateConjunction(join.getCondition(), conjunctionList);
if (conjunctionList.isEmpty()) {
throw new SamzaException("Query results in a cross join, which is not supported. Please optimize the query." + " It is expected that the joins should include JOIN ON operator in the sql query.");
}
// TODO Not sure why we can not allow literal as part of the join condition will revisit this in another scope
conjunctionList.forEach(rexNode -> rexNode.accept(new RexShuttle() {
@Override
public RexNode visitLiteral(RexLiteral literal) {
throw new SamzaException("Join Condition can not allow literal " + literal.toString() + " join node" + join.getDigest());
}
}));
final JoinInputNode.InputType rootTableInput = isTablePosOnRight ? inputTypeOnRight : inputTypeOnLeft;
if (rootTableInput.compareTo(JoinInputNode.InputType.REMOTE_TABLE) != 0) {
// it is not a remote table all is good we do not have to validate the project on key Column
return;
}
/*
For remote Table we need to validate The join Condition and The project that is above remote table scan.
- As of today Filter need to be exactly one equi-join using the __key__ column (see SAMZA-2554)
- The Project on the top of the remote table has to contain only simple input references to any of the column used in the join.
*/
// First let's collect the ref of columns used by the join condition.
List<RexInputRef> refCollector = new ArrayList<>();
join.getCondition().accept(new RexShuttle() {
@Override
public RexNode visitInputRef(RexInputRef inputRef) {
refCollector.add(inputRef);
return inputRef;
}
});
// start index of the Remote table within the Join Row
final int tableStartIndex = isTablePosOnRight ? join.getLeft().getRowType().getFieldCount() : 0;
// end index of the Remote table withing the Join Row
final int tableEndIndex = isTablePosOnRight ? join.getRowType().getFieldCount() : join.getLeft().getRowType().getFieldCount();
List<Integer> tableRefsIdx = refCollector.stream().map(x -> x.getIndex()).filter(// collect all the refs form table side
x -> tableStartIndex <= x && x < tableEndIndex).map(// re-adjust the offset
x -> x - tableStartIndex).sorted().collect(// we have a list with all the input from table side with 0 based index.
Collectors.toList());
if (conjunctionList.size() != 1 || tableRefsIdx.size() != 1) {
// TODO We can relax this by allowing another filter to be evaluated post lookup see SAMZA-2554
throw new SamzaException("Invalid query for join condition must contain exactly one predicate for remote table on __key__ column " + dumpRelPlanForNode(join));
}
// Validate the Project, follow each input and ensure that it is a simple ref with no rexCall in the way.
if (!isValidRemoteJoinRef(tableRefsIdx.get(0), isTablePosOnRight ? join.getRight() : join.getLeft())) {
throw new SamzaException("Invalid query for join condition can not have an expression and must be reference " + SamzaSqlRelMessage.KEY_NAME + " column " + dumpRelPlanForNode(join));
}
}
Aggregations