use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.Table in project beam by apache.
the class PubsubTableProviderIT method testSQLWithBytePayload.
@Test
public void testSQLWithBytePayload() throws Exception {
// Prepare messages to send later
List<PubsubMessage> messages = ImmutableList.of(objectsProvider.messageIdName(ts(1), 3, "foo"), objectsProvider.messageIdName(ts(2), 5, "bar"), objectsProvider.messageIdName(ts(3), 7, "baz"));
String createTableString = String.format("CREATE EXTERNAL TABLE message (\n" + "event_timestamp TIMESTAMP, \n" + "attributes MAP<VARCHAR, VARCHAR>, \n" + "payload VARBINARY \n" + ") \n" + "TYPE '%s' \n" + "LOCATION '%s' \n" + "TBLPROPERTIES '{ " + "\"protoClass\" : \"%s\", " + "\"timestampAttributeKey\" : \"ts\" }'", tableProvider.getTableType(), eventsTopic.topicPath(), PayloadMessages.SimpleMessage.class.getName());
String queryString = "SELECT message.payload AS some_bytes FROM message";
// Initialize SQL environment and create the pubsub table
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new PubsubTableProvider());
sqlEnv.executeDdl(createTableString);
// Apply the PTransform to query the pubsub topic
PCollection<Row> queryOutput = query(sqlEnv, pipeline, queryString);
// Observe the query results and send success signal after seeing the expected messages
Schema justBytesSchema = Schema.builder().addField("some_bytes", FieldType.BYTES.withNullable(true)).build();
Row expectedRow0 = row(justBytesSchema, (Object) messages.get(0).getPayload());
Row expectedRow1 = row(justBytesSchema, (Object) messages.get(1).getPayload());
Row expectedRow2 = row(justBytesSchema, (Object) messages.get(2).getPayload());
Set<Row> expected = ImmutableSet.of(expectedRow0, expectedRow1, expectedRow2);
queryOutput.apply("waitForSuccess", resultSignal.signalSuccessWhen(SchemaCoder.of(justBytesSchema), observedRows -> observedRows.equals(expected)));
// Start the pipeline
pipeline.run();
// Block until a subscription for this topic exists
eventsTopic.assertSubscriptionEventuallyCreated(pipeline.getOptions().as(GcpOptions.class).getProject(), Duration.standardMinutes(5));
// Start publishing the messages when main pipeline is started and signaling topic is ready
eventsTopic.publish(messages);
// Poll the signaling topic for success message
resultSignal.waitForSuccess(timeout);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.Table in project beam by apache.
the class PubsubTableProviderIT method testSQLSelectsArrayAttributes.
@Ignore("https://issues.apache.org/jira/browse/BEAM-12320")
@Test
public void testSQLSelectsArrayAttributes() throws Exception {
String createTableString = String.format("CREATE EXTERNAL TABLE message (\n" + "event_timestamp TIMESTAMP, \n" + "attributes ARRAY<ROW<key VARCHAR, `value` VARCHAR>>, \n" + "payload ROW< \n" + " id INTEGER, \n" + " name VARCHAR \n" + " > \n" + ") \n" + "TYPE '%s' \n" + "LOCATION '%s' \n" + "TBLPROPERTIES '{ " + "%s" + "\"protoClass\" : \"%s\", " + "\"timestampAttributeKey\" : \"ts\" }'", tableProvider.getTableType(), eventsTopic.topicPath(), payloadFormatParam(), PayloadMessages.SimpleMessage.class.getName());
String queryString = "SELECT message.payload.id, attributes[1].key AS a1, attributes[2].key AS a2 FROM message";
// Initialize SQL environment and create the pubsub table
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new PubsubTableProvider());
sqlEnv.executeDdl(createTableString);
// Apply the PTransform to query the pubsub topic
PCollection<Row> queryOutput = query(sqlEnv, pipeline, queryString);
// Observe the query results and send success signal after seeing the expected messages
queryOutput.apply("waitForSuccess", resultSignal.signalSuccessWhen(SchemaCoder.of(PAYLOAD_SCHEMA), observedRows -> {
Map<Integer, String> entries = new HashMap<>();
for (Row row : observedRows) {
if ("ts".equals(row.getString("a1"))) {
entries.put(row.getInt32("id"), row.getString("a2"));
} else {
entries.put(row.getInt32("id"), row.getString("a1"));
}
}
LOG.info("Entries: {}", entries);
return entries.equals(ImmutableMap.of(3, "foo", 5, "bar", 7, "baz"));
}));
// Start the pipeline
pipeline.run();
// Block until a subscription for this topic exists
eventsTopic.assertSubscriptionEventuallyCreated(pipeline.getOptions().as(GcpOptions.class).getProject(), Duration.standardMinutes(5));
// Start publishing the messages when main pipeline is started and signaling topic is ready
eventsTopic.publish(ImmutableList.of(objectsProvider.messageIdName(ts(1), 3, "foo"), objectsProvider.messageIdName(ts(2), 5, "bar"), objectsProvider.messageIdName(ts(3), 7, "baz")));
// Poll the signaling topic for success message
resultSignal.waitForSuccess(timeout);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.Table in project beam by apache.
the class PubsubTableProviderIT method testUsesDlq.
@Test
@SuppressWarnings("unchecked")
public void testUsesDlq() throws Exception {
String createTableString = String.format("CREATE EXTERNAL TABLE message (\n" + "event_timestamp TIMESTAMP, \n" + "attributes MAP<VARCHAR, VARCHAR>, \n" + "payload ROW< \n" + " id INTEGER, \n" + " name VARCHAR \n" + " > \n" + ") \n" + "TYPE '%s' \n" + "LOCATION '%s' \n" + "TBLPROPERTIES " + " '{ " + " %s" + " \"timestampAttributeKey\" : \"ts\", " + " \"deadLetterQueue\" : \"%s\", " + " \"protoClass\" : \"%s\" " + " }'", tableProvider.getTableType(), eventsTopic.topicPath(), payloadFormatParam(), dlqTopic.topicPath(), PayloadMessages.SimpleMessage.class.getName());
String queryString = "SELECT message.payload.id, message.payload.name from message";
// Initialize SQL environment and create the pubsub table
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new PubsubTableProvider());
sqlEnv.executeDdl(createTableString);
// Apply the PTransform to query the pubsub topic
PCollection<Row> queryOutput = query(sqlEnv, pipeline, queryString);
// Observe the query results and send success signal after seeing the expected messages
queryOutput.apply("waitForSuccess", resultSignal.signalSuccessWhen(SchemaCoder.of(PAYLOAD_SCHEMA), observedRows -> observedRows.equals(ImmutableSet.of(row(PAYLOAD_SCHEMA, 3, "foo"), row(PAYLOAD_SCHEMA, 5, "bar"), row(PAYLOAD_SCHEMA, 7, "baz")))));
// Start the pipeline
pipeline.run();
// Block until a subscription for this topic exists
eventsTopic.assertSubscriptionEventuallyCreated(pipeline.getOptions().as(GcpOptions.class).getProject(), Duration.standardMinutes(5));
// Start publishing the messages when main pipeline is started and signaling topics are ready
eventsTopic.publish(ImmutableList.of(objectsProvider.messageIdName(ts(1), 3, "foo"), objectsProvider.messageIdName(ts(2), 5, "bar"), objectsProvider.messageIdName(ts(3), 7, "baz"), // invalid message, will go to DLQ
messagePayload(ts(4), "{ - }", ImmutableMap.of()), // invalid message, will go to DLQ
messagePayload(ts(5), "{ + }", ImmutableMap.of())));
// Poll the signaling topic for success message
resultSignal.waitForSuccess(timeout);
dlqTopic.assertThatTopicEventuallyReceives(matcherPayload(ts(4), "{ - }"), matcherPayload(ts(5), "{ + }")).waitForUpTo(Duration.standardSeconds(40));
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.Table in project beam by apache.
the class BeamZetaSqlCatalog method addTableToLeafCatalog.
/**
* Assume last element in tablePath is a table name, and everything before is catalogs. So the
* logic is to create nested catalogs until the last level, then add a table at the last level.
*
* <p>Table schema is extracted from Calcite schema based on the table name resolution strategy,
* e.g. either by drilling down the schema.getSubschema() path or joining the table name with dots
* to construct a single compound identifier (e.g. Data Catalog use case).
*/
private void addTableToLeafCatalog(List<String> tablePath, QueryTrait queryTrait) {
SimpleCatalog leafCatalog = createNestedCatalogs(zetaSqlCatalog, tablePath);
org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.Table calciteTable = TableResolution.resolveCalciteTable(calciteSchema, tablePath);
if (calciteTable == null) {
throw new ZetaSqlException("Wasn't able to resolve the path " + tablePath + " in schema: " + calciteSchema.getName());
}
RelDataType rowType = calciteTable.getRowType(typeFactory);
TableResolution.SimpleTableWithPath tableWithPath = TableResolution.SimpleTableWithPath.of(tablePath);
queryTrait.addResolvedTable(tableWithPath);
addFieldsToTable(tableWithPath, rowType);
leafCatalog.addSimpleTable(tableWithPath.getTable());
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.schema.Table in project beam by apache.
the class JdbcDriverTest method testSelectsFromExistingComplexTable.
@Test
public void testSelectsFromExistingComplexTable() throws Exception {
TestTableProvider tableProvider = new TestTableProvider();
Connection connection = JdbcDriver.connect(tableProvider, PipelineOptionsFactory.create());
connection.createStatement().executeUpdate("CREATE EXTERNAL TABLE person ( \n" + "description VARCHAR, \n" + "nestedRow ROW< \n" + " id BIGINT, \n" + " name VARCHAR> \n" + ") \n" + "TYPE 'test'");
tableProvider.addRows("person", row(COMPLEX_SCHEMA, "description1", row(1L, "aaa")), row(COMPLEX_SCHEMA, "description2", row(2L, "bbb")));
ResultSet selectResult = connection.createStatement().executeQuery("SELECT person.nestedRow.id, person.nestedRow.name FROM person");
List<Row> resultRows = readResultSet(selectResult).stream().map(values -> values.stream().collect(toRow(BASIC_SCHEMA))).collect(Collectors.toList());
assertThat(resultRows, containsInAnyOrder(row(1L, "aaa"), row(2L, "bbb")));
}
Aggregations