use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.
the class PubsubTableProviderIT method testSQLInsertRowsToPubsubWithTimestampAttributeFlat.
@Test
@SuppressWarnings("unchecked")
public void testSQLInsertRowsToPubsubWithTimestampAttributeFlat() throws Exception {
String createTableString = String.format("CREATE EXTERNAL TABLE message (\n" + " event_timestamp TIMESTAMP, \n" + " name VARCHAR, \n" + " height INTEGER, \n" + " knows_javascript BOOLEAN \n" + ") \n" + "TYPE '%s' \n" + "LOCATION '%s' \n" + "TBLPROPERTIES " + " '{ " + " %s " + " \"protoClass\" : \"%s\", " + " \"deadLetterQueue\" : \"%s\"," + " \"timestampAttributeKey\" : \"ts\"" + " }'", tableProvider.getTableType(), eventsTopic.topicPath(), payloadFormatParam(), PayloadMessages.NameHeightKnowsJSMessage.class.getName(), dlqTopic.topicPath());
// Initialize SQL environment and create the pubsub table
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new PubsubTableProvider());
sqlEnv.executeDdl(createTableString);
String queryString = "INSERT INTO message " + "VALUES " + "(TIMESTAMP '1970-01-01 00:00:00.002', 'person2', 70, FALSE)";
query(sqlEnv, pipeline, queryString);
pipeline.run().waitUntilFinish(Duration.standardMinutes(5));
eventsTopic.assertThatTopicEventuallyReceives(matcherTsNameHeightKnowsJS(ts(2), "person2", 70, false)).waitForUpTo(Duration.standardSeconds(40));
}
use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.
the class PubsubTableProviderIT method testSQLWithBytePayload.
@Test
public void testSQLWithBytePayload() throws Exception {
// Prepare messages to send later
List<PubsubMessage> messages = ImmutableList.of(objectsProvider.messageIdName(ts(1), 3, "foo"), objectsProvider.messageIdName(ts(2), 5, "bar"), objectsProvider.messageIdName(ts(3), 7, "baz"));
String createTableString = String.format("CREATE EXTERNAL TABLE message (\n" + "event_timestamp TIMESTAMP, \n" + "attributes MAP<VARCHAR, VARCHAR>, \n" + "payload VARBINARY \n" + ") \n" + "TYPE '%s' \n" + "LOCATION '%s' \n" + "TBLPROPERTIES '{ " + "\"protoClass\" : \"%s\", " + "\"timestampAttributeKey\" : \"ts\" }'", tableProvider.getTableType(), eventsTopic.topicPath(), PayloadMessages.SimpleMessage.class.getName());
String queryString = "SELECT message.payload AS some_bytes FROM message";
// Initialize SQL environment and create the pubsub table
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new PubsubTableProvider());
sqlEnv.executeDdl(createTableString);
// Apply the PTransform to query the pubsub topic
PCollection<Row> queryOutput = query(sqlEnv, pipeline, queryString);
// Observe the query results and send success signal after seeing the expected messages
Schema justBytesSchema = Schema.builder().addField("some_bytes", FieldType.BYTES.withNullable(true)).build();
Row expectedRow0 = row(justBytesSchema, (Object) messages.get(0).getPayload());
Row expectedRow1 = row(justBytesSchema, (Object) messages.get(1).getPayload());
Row expectedRow2 = row(justBytesSchema, (Object) messages.get(2).getPayload());
Set<Row> expected = ImmutableSet.of(expectedRow0, expectedRow1, expectedRow2);
queryOutput.apply("waitForSuccess", resultSignal.signalSuccessWhen(SchemaCoder.of(justBytesSchema), observedRows -> observedRows.equals(expected)));
// Start the pipeline
pipeline.run();
// Block until a subscription for this topic exists
eventsTopic.assertSubscriptionEventuallyCreated(pipeline.getOptions().as(GcpOptions.class).getProject(), Duration.standardMinutes(5));
// Start publishing the messages when main pipeline is started and signaling topic is ready
eventsTopic.publish(messages);
// Poll the signaling topic for success message
resultSignal.waitForSuccess(timeout);
}
use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.
the class PubsubTableProviderIT method testSQLInsertRowsToPubsubFlat.
@Test
@SuppressWarnings("unchecked")
public void testSQLInsertRowsToPubsubFlat() throws Exception {
String createTableString = String.format("CREATE EXTERNAL TABLE message (\n" + "event_timestamp TIMESTAMP, \n" + "name VARCHAR, \n" + "height INTEGER, \n" + "knows_javascript BOOLEAN \n" + ") \n" + "TYPE '%s' \n" + "LOCATION '%s' \n" + "TBLPROPERTIES " + " '{ " + " %s" + " \"protoClass\" : \"%s\", " + " \"deadLetterQueue\" : \"%s\"" + " }'", tableProvider.getTableType(), eventsTopic.topicPath(), payloadFormatParam(), PayloadMessages.NameHeightKnowsJSMessage.class.getName(), dlqTopic.topicPath());
// Initialize SQL environment and create the pubsub table
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new PubsubTableProvider());
sqlEnv.executeDdl(createTableString);
// TODO(BEAM-8741): Ideally we could write this query without specifying a column list, because
// it shouldn't be possible to write to event_timestamp when it's mapped to publish time.
String queryString = "INSERT INTO message (name, height, knows_javascript) \n" + "VALUES \n" + "('person1', 80, TRUE), \n" + "('person2', 70, FALSE)";
// Apply the PTransform to insert the rows
query(sqlEnv, pipeline, queryString);
pipeline.run().waitUntilFinish(Duration.standardMinutes(5));
eventsTopic.assertThatTopicEventuallyReceives(objectsProvider.matcherNameHeightKnowsJS("person1", 80, true), objectsProvider.matcherNameHeightKnowsJS("person2", 70, false)).waitForUpTo(Duration.standardSeconds(40));
}
use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.
the class PubsubTableProviderIT method testSQLSelectsArrayAttributes.
@Ignore("https://issues.apache.org/jira/browse/BEAM-12320")
@Test
public void testSQLSelectsArrayAttributes() throws Exception {
String createTableString = String.format("CREATE EXTERNAL TABLE message (\n" + "event_timestamp TIMESTAMP, \n" + "attributes ARRAY<ROW<key VARCHAR, `value` VARCHAR>>, \n" + "payload ROW< \n" + " id INTEGER, \n" + " name VARCHAR \n" + " > \n" + ") \n" + "TYPE '%s' \n" + "LOCATION '%s' \n" + "TBLPROPERTIES '{ " + "%s" + "\"protoClass\" : \"%s\", " + "\"timestampAttributeKey\" : \"ts\" }'", tableProvider.getTableType(), eventsTopic.topicPath(), payloadFormatParam(), PayloadMessages.SimpleMessage.class.getName());
String queryString = "SELECT message.payload.id, attributes[1].key AS a1, attributes[2].key AS a2 FROM message";
// Initialize SQL environment and create the pubsub table
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new PubsubTableProvider());
sqlEnv.executeDdl(createTableString);
// Apply the PTransform to query the pubsub topic
PCollection<Row> queryOutput = query(sqlEnv, pipeline, queryString);
// Observe the query results and send success signal after seeing the expected messages
queryOutput.apply("waitForSuccess", resultSignal.signalSuccessWhen(SchemaCoder.of(PAYLOAD_SCHEMA), observedRows -> {
Map<Integer, String> entries = new HashMap<>();
for (Row row : observedRows) {
if ("ts".equals(row.getString("a1"))) {
entries.put(row.getInt32("id"), row.getString("a2"));
} else {
entries.put(row.getInt32("id"), row.getString("a1"));
}
}
LOG.info("Entries: {}", entries);
return entries.equals(ImmutableMap.of(3, "foo", 5, "bar", 7, "baz"));
}));
// Start the pipeline
pipeline.run();
// Block until a subscription for this topic exists
eventsTopic.assertSubscriptionEventuallyCreated(pipeline.getOptions().as(GcpOptions.class).getProject(), Duration.standardMinutes(5));
// Start publishing the messages when main pipeline is started and signaling topic is ready
eventsTopic.publish(ImmutableList.of(objectsProvider.messageIdName(ts(1), 3, "foo"), objectsProvider.messageIdName(ts(2), 5, "bar"), objectsProvider.messageIdName(ts(3), 7, "baz")));
// Poll the signaling topic for success message
resultSignal.waitForSuccess(timeout);
}
use of org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv in project beam by apache.
the class PubsubTableProviderIT method testUsesDlq.
@Test
@SuppressWarnings("unchecked")
public void testUsesDlq() throws Exception {
String createTableString = String.format("CREATE EXTERNAL TABLE message (\n" + "event_timestamp TIMESTAMP, \n" + "attributes MAP<VARCHAR, VARCHAR>, \n" + "payload ROW< \n" + " id INTEGER, \n" + " name VARCHAR \n" + " > \n" + ") \n" + "TYPE '%s' \n" + "LOCATION '%s' \n" + "TBLPROPERTIES " + " '{ " + " %s" + " \"timestampAttributeKey\" : \"ts\", " + " \"deadLetterQueue\" : \"%s\", " + " \"protoClass\" : \"%s\" " + " }'", tableProvider.getTableType(), eventsTopic.topicPath(), payloadFormatParam(), dlqTopic.topicPath(), PayloadMessages.SimpleMessage.class.getName());
String queryString = "SELECT message.payload.id, message.payload.name from message";
// Initialize SQL environment and create the pubsub table
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new PubsubTableProvider());
sqlEnv.executeDdl(createTableString);
// Apply the PTransform to query the pubsub topic
PCollection<Row> queryOutput = query(sqlEnv, pipeline, queryString);
// Observe the query results and send success signal after seeing the expected messages
queryOutput.apply("waitForSuccess", resultSignal.signalSuccessWhen(SchemaCoder.of(PAYLOAD_SCHEMA), observedRows -> observedRows.equals(ImmutableSet.of(row(PAYLOAD_SCHEMA, 3, "foo"), row(PAYLOAD_SCHEMA, 5, "bar"), row(PAYLOAD_SCHEMA, 7, "baz")))));
// Start the pipeline
pipeline.run();
// Block until a subscription for this topic exists
eventsTopic.assertSubscriptionEventuallyCreated(pipeline.getOptions().as(GcpOptions.class).getProject(), Duration.standardMinutes(5));
// Start publishing the messages when main pipeline is started and signaling topics are ready
eventsTopic.publish(ImmutableList.of(objectsProvider.messageIdName(ts(1), 3, "foo"), objectsProvider.messageIdName(ts(2), 5, "bar"), objectsProvider.messageIdName(ts(3), 7, "baz"), // invalid message, will go to DLQ
messagePayload(ts(4), "{ - }", ImmutableMap.of()), // invalid message, will go to DLQ
messagePayload(ts(5), "{ + }", ImmutableMap.of())));
// Poll the signaling topic for success message
resultSignal.waitForSuccess(timeout);
dlqTopic.assertThatTopicEventuallyReceives(matcherPayload(ts(4), "{ - }"), matcherPayload(ts(5), "{ + }")).waitForUpTo(Duration.standardSeconds(40));
}
Aggregations