use of org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage in project beam by apache.
the class PubsubTableProviderIT method testSQLWithBytePayload.
@Test
public void testSQLWithBytePayload() throws Exception {
// Prepare messages to send later
List<PubsubMessage> messages = ImmutableList.of(objectsProvider.messageIdName(ts(1), 3, "foo"), objectsProvider.messageIdName(ts(2), 5, "bar"), objectsProvider.messageIdName(ts(3), 7, "baz"));
String createTableString = String.format("CREATE EXTERNAL TABLE message (\n" + "event_timestamp TIMESTAMP, \n" + "attributes MAP<VARCHAR, VARCHAR>, \n" + "payload VARBINARY \n" + ") \n" + "TYPE '%s' \n" + "LOCATION '%s' \n" + "TBLPROPERTIES '{ " + "\"protoClass\" : \"%s\", " + "\"timestampAttributeKey\" : \"ts\" }'", tableProvider.getTableType(), eventsTopic.topicPath(), PayloadMessages.SimpleMessage.class.getName());
String queryString = "SELECT message.payload AS some_bytes FROM message";
// Initialize SQL environment and create the pubsub table
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new PubsubTableProvider());
sqlEnv.executeDdl(createTableString);
// Apply the PTransform to query the pubsub topic
PCollection<Row> queryOutput = query(sqlEnv, pipeline, queryString);
// Observe the query results and send success signal after seeing the expected messages
Schema justBytesSchema = Schema.builder().addField("some_bytes", FieldType.BYTES.withNullable(true)).build();
Row expectedRow0 = row(justBytesSchema, (Object) messages.get(0).getPayload());
Row expectedRow1 = row(justBytesSchema, (Object) messages.get(1).getPayload());
Row expectedRow2 = row(justBytesSchema, (Object) messages.get(2).getPayload());
Set<Row> expected = ImmutableSet.of(expectedRow0, expectedRow1, expectedRow2);
queryOutput.apply("waitForSuccess", resultSignal.signalSuccessWhen(SchemaCoder.of(justBytesSchema), observedRows -> observedRows.equals(expected)));
// Start the pipeline
pipeline.run();
// Block until a subscription for this topic exists
eventsTopic.assertSubscriptionEventuallyCreated(pipeline.getOptions().as(GcpOptions.class).getProject(), Duration.standardMinutes(5));
// Start publishing the messages when main pipeline is started and signaling topic is ready
eventsTopic.publish(messages);
// Poll the signaling topic for success message
resultSignal.waitForSuccess(timeout);
}
use of org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage in project components by Talend.
the class PubSubOutputRuntime method expand.
@Override
public PDone expand(PCollection<IndexedRecord> in) {
PubSubDatasetProperties dataset = properties.getDatasetProperties();
PubSubDatastoreProperties datastore = dataset.getDatastoreProperties();
try {
createTopicSubscriptionIfNeeded(properties);
} catch (IOException e) {
throw TalendRuntimeException.createUnexpectedException(e);
}
PubsubIO.Write<PubsubMessage> pubsubWrite = PubsubIO.writeMessages().to(String.format("projects/%s/topics/%s", datastore.projectName.getValue(), dataset.topic.getValue()));
if (properties.idLabel.getValue() != null && !"".equals(properties.idLabel.getValue())) {
pubsubWrite.withIdAttribute(properties.idLabel.getValue());
}
if (properties.timestampLabel.getValue() != null && !"".equals(properties.timestampLabel.getValue())) {
pubsubWrite.withTimestampAttribute(properties.timestampLabel.getValue());
}
switch(dataset.valueFormat.getValue()) {
case CSV:
{
return in.apply(MapElements.via(new FormatCsv(dataset.fieldDelimiter.getValue()))).apply(pubsubWrite);
}
case AVRO:
{
return in.apply(MapElements.via(new FormatAvro())).apply(pubsubWrite);
}
default:
throw new RuntimeException("To be implemented: " + dataset.valueFormat.getValue());
}
}
use of org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage in project beam by apache.
the class PubsubTableProviderIT method testSQLLimit.
@Test
@SuppressWarnings({ "unchecked", "rawtypes" })
public void testSQLLimit() throws Exception {
String createTableString = String.format("CREATE EXTERNAL TABLE message (\n" + "event_timestamp TIMESTAMP, \n" + "attributes MAP<VARCHAR, VARCHAR>, \n" + "payload ROW< \n" + " id INTEGER, \n" + " name VARCHAR \n" + " > \n" + ") \n" + "TYPE '%s' \n" + "LOCATION '%s' \n" + "TBLPROPERTIES " + " '{ " + " %s" + " \"timestampAttributeKey\" : \"ts\", " + " \"deadLetterQueue\" : \"%s\", " + " \"protoClass\" : \"%s\" " + " }'", tableProvider.getTableType(), eventsTopic.topicPath(), payloadFormatParam(), dlqTopic.topicPath(), PayloadMessages.SimpleMessage.class.getName());
List<PubsubMessage> messages = ImmutableList.of(objectsProvider.messageIdName(ts(1), 3, "foo"), objectsProvider.messageIdName(ts(2), 5, "bar"), objectsProvider.messageIdName(ts(3), 7, "baz"), objectsProvider.messageIdName(ts(4), 9, "ba2"), objectsProvider.messageIdName(ts(5), 10, "ba3"), objectsProvider.messageIdName(ts(6), 13, "ba4"), objectsProvider.messageIdName(ts(7), 15, "ba5"));
// We need the default options on the schema to include the project passed in for the
// integration test
CalciteConnection connection = connect(pipeline.getOptions(), new PubsubTableProvider());
Statement statement = connection.createStatement();
statement.execute(createTableString);
// Because Pubsub only allow new subscription receives message after the subscription is
// created, eventsTopic.publish(messages) can only be called after statement.executeQuery.
// However, because statement.executeQuery is a blocking call, it has to be put into a
// separate thread to execute.
ExecutorService pool = Executors.newFixedThreadPool(1);
Future<List<String>> queryResult = pool.submit((Callable) () -> {
ResultSet resultSet = statement.executeQuery("SELECT message.payload.id FROM message LIMIT 3");
ImmutableList.Builder<String> result = ImmutableList.builder();
while (resultSet.next()) {
result.add(resultSet.getString(1));
}
return result.build();
});
try {
eventsTopic.assertSubscriptionEventuallyCreated(pipeline.getOptions().as(GcpOptions.class).getProject(), Duration.standardMinutes(5));
} catch (AssertionError assertionError) {
// Check if the forked thread had an exception.
try {
queryResult.get(0, TimeUnit.SECONDS);
} catch (TimeoutException e) {
// Nothing went wrong on the forked thread, but a subscription still wasn't created.
} catch (ExecutionException e) {
// up to the user.
throw new AssertionError("Exception occurred in statement.executeQuery thread", e);
}
// Just re-throw the timeout assertion.
throw assertionError;
}
eventsTopic.publish(messages);
assertThat(queryResult.get(2, TimeUnit.MINUTES).size(), equalTo(3));
pool.shutdown();
}
Aggregations