Search in sources :

Example 1 with PubsubMessage

use of org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage in project beam by apache.

the class PubsubTableProviderIT method testSQLWithBytePayload.

@Test
public void testSQLWithBytePayload() throws Exception {
    // Prepare messages to send later
    List<PubsubMessage> messages = ImmutableList.of(objectsProvider.messageIdName(ts(1), 3, "foo"), objectsProvider.messageIdName(ts(2), 5, "bar"), objectsProvider.messageIdName(ts(3), 7, "baz"));
    String createTableString = String.format("CREATE EXTERNAL TABLE message (\n" + "event_timestamp TIMESTAMP, \n" + "attributes MAP<VARCHAR, VARCHAR>, \n" + "payload VARBINARY \n" + ") \n" + "TYPE '%s' \n" + "LOCATION '%s' \n" + "TBLPROPERTIES '{ " + "\"protoClass\" : \"%s\", " + "\"timestampAttributeKey\" : \"ts\" }'", tableProvider.getTableType(), eventsTopic.topicPath(), PayloadMessages.SimpleMessage.class.getName());
    String queryString = "SELECT message.payload AS some_bytes FROM message";
    // Initialize SQL environment and create the pubsub table
    BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new PubsubTableProvider());
    sqlEnv.executeDdl(createTableString);
    // Apply the PTransform to query the pubsub topic
    PCollection<Row> queryOutput = query(sqlEnv, pipeline, queryString);
    // Observe the query results and send success signal after seeing the expected messages
    Schema justBytesSchema = Schema.builder().addField("some_bytes", FieldType.BYTES.withNullable(true)).build();
    Row expectedRow0 = row(justBytesSchema, (Object) messages.get(0).getPayload());
    Row expectedRow1 = row(justBytesSchema, (Object) messages.get(1).getPayload());
    Row expectedRow2 = row(justBytesSchema, (Object) messages.get(2).getPayload());
    Set<Row> expected = ImmutableSet.of(expectedRow0, expectedRow1, expectedRow2);
    queryOutput.apply("waitForSuccess", resultSignal.signalSuccessWhen(SchemaCoder.of(justBytesSchema), observedRows -> observedRows.equals(expected)));
    // Start the pipeline
    pipeline.run();
    // Block until a subscription for this topic exists
    eventsTopic.assertSubscriptionEventuallyCreated(pipeline.getOptions().as(GcpOptions.class).getProject(), Duration.standardMinutes(5));
    // Start publishing the messages when main pipeline is started and signaling topic is ready
    eventsTopic.publish(messages);
    // Poll the signaling topic for success message
    resultSignal.waitForSuccess(timeout);
}
Also used : Arrays(java.util.Arrays) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) PubsubMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage) Future(java.util.concurrent.Future) TestPubsub(org.apache.beam.sdk.io.gcp.pubsub.TestPubsub) ResultSet(java.sql.ResultSet) Map(java.util.Map) TestPubsubSignal(org.apache.beam.sdk.io.gcp.pubsub.TestPubsubSignal) Parameterized(org.junit.runners.Parameterized) ImmutableMap(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableMap) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) Matchers.allOf(org.hamcrest.Matchers.allOf) Collection(java.util.Collection) SchemaCoder(org.apache.beam.sdk.schemas.SchemaCoder) Set(java.util.Set) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) SchemaIOTableProviderWrapper(org.apache.beam.sdk.extensions.sql.meta.provider.SchemaIOTableProviderWrapper) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) Executors(java.util.concurrent.Executors) ImmutableSet(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableSet) Serializable(java.io.Serializable) List(java.util.List) Matchers.equalTo(org.hamcrest.Matchers.equalTo) JdbcDriver(org.apache.beam.sdk.extensions.sql.impl.JdbcDriver) ReflectHelpers(org.apache.beam.sdk.util.common.ReflectHelpers) ImmutableList(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableList) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) JsonMatcher.jsonBytesLike(org.apache.beam.sdk.testing.JsonMatcher.jsonBytesLike) ByteArrayOutputStream(java.io.ByteArrayOutputStream) InMemoryMetaStore(org.apache.beam.sdk.extensions.sql.meta.store.InMemoryMetaStore) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) Matchers.hasProperty(org.hamcrest.Matchers.hasProperty) BeamSqlRelUtils(org.apache.beam.sdk.extensions.sql.impl.rel.BeamSqlRelUtils) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExecutorService(java.util.concurrent.ExecutorService) AvroUtils(org.apache.beam.sdk.schemas.utils.AvroUtils) Matchers.hasEntry(org.hamcrest.Matchers.hasEntry) GenericRecord(org.apache.avro.generic.GenericRecord) Logger(org.slf4j.Logger) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Parameter(org.junit.runners.Parameterized.Parameter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JdbcConnection(org.apache.beam.sdk.extensions.sql.impl.JdbcConnection) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) Test(org.junit.Test) PCollection(org.apache.beam.sdk.values.PCollection) AvroCoder(org.apache.beam.sdk.coders.AvroCoder) Schema(org.apache.beam.sdk.schemas.Schema) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) PayloadMessages(org.apache.beam.sdk.extensions.protobuf.PayloadMessages) Rule(org.junit.Rule) Ignore(org.junit.Ignore) Matcher(org.hamcrest.Matcher) Instant(org.joda.time.Instant) Statement(java.sql.Statement) TableProvider(org.apache.beam.sdk.extensions.sql.meta.provider.TableProvider) CalciteConnection(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.jdbc.CalciteConnection) Schema(org.apache.beam.sdk.schemas.Schema) BeamSqlEnv(org.apache.beam.sdk.extensions.sql.impl.BeamSqlEnv) Row(org.apache.beam.sdk.values.Row) PubsubMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage) Test(org.junit.Test)

Example 2 with PubsubMessage

use of org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage in project components by Talend.

the class PubSubOutputRuntime method expand.

@Override
public PDone expand(PCollection<IndexedRecord> in) {
    PubSubDatasetProperties dataset = properties.getDatasetProperties();
    PubSubDatastoreProperties datastore = dataset.getDatastoreProperties();
    try {
        createTopicSubscriptionIfNeeded(properties);
    } catch (IOException e) {
        throw TalendRuntimeException.createUnexpectedException(e);
    }
    PubsubIO.Write<PubsubMessage> pubsubWrite = PubsubIO.writeMessages().to(String.format("projects/%s/topics/%s", datastore.projectName.getValue(), dataset.topic.getValue()));
    if (properties.idLabel.getValue() != null && !"".equals(properties.idLabel.getValue())) {
        pubsubWrite.withIdAttribute(properties.idLabel.getValue());
    }
    if (properties.timestampLabel.getValue() != null && !"".equals(properties.timestampLabel.getValue())) {
        pubsubWrite.withTimestampAttribute(properties.timestampLabel.getValue());
    }
    switch(dataset.valueFormat.getValue()) {
        case CSV:
            {
                return in.apply(MapElements.via(new FormatCsv(dataset.fieldDelimiter.getValue()))).apply(pubsubWrite);
            }
        case AVRO:
            {
                return in.apply(MapElements.via(new FormatAvro())).apply(pubsubWrite);
            }
        default:
            throw new RuntimeException("To be implemented: " + dataset.valueFormat.getValue());
    }
}
Also used : PubSubDatasetProperties(org.talend.components.pubsub.PubSubDatasetProperties) TalendRuntimeException(org.talend.daikon.exception.TalendRuntimeException) PubsubIO(org.apache.beam.sdk.io.gcp.pubsub.PubsubIO) PubSubDatastoreProperties(org.talend.components.pubsub.PubSubDatastoreProperties) IOException(java.io.IOException) PubsubMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage)

Example 3 with PubsubMessage

use of org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage in project beam by apache.

the class PubsubTableProviderIT method testSQLLimit.

@Test
@SuppressWarnings({ "unchecked", "rawtypes" })
public void testSQLLimit() throws Exception {
    String createTableString = String.format("CREATE EXTERNAL TABLE message (\n" + "event_timestamp TIMESTAMP, \n" + "attributes MAP<VARCHAR, VARCHAR>, \n" + "payload ROW< \n" + "             id INTEGER, \n" + "             name VARCHAR \n" + "           > \n" + ") \n" + "TYPE '%s' \n" + "LOCATION '%s' \n" + "TBLPROPERTIES " + "    '{ " + "       %s" + "       \"timestampAttributeKey\" : \"ts\", " + "       \"deadLetterQueue\" : \"%s\", " + "       \"protoClass\" : \"%s\" " + "     }'", tableProvider.getTableType(), eventsTopic.topicPath(), payloadFormatParam(), dlqTopic.topicPath(), PayloadMessages.SimpleMessage.class.getName());
    List<PubsubMessage> messages = ImmutableList.of(objectsProvider.messageIdName(ts(1), 3, "foo"), objectsProvider.messageIdName(ts(2), 5, "bar"), objectsProvider.messageIdName(ts(3), 7, "baz"), objectsProvider.messageIdName(ts(4), 9, "ba2"), objectsProvider.messageIdName(ts(5), 10, "ba3"), objectsProvider.messageIdName(ts(6), 13, "ba4"), objectsProvider.messageIdName(ts(7), 15, "ba5"));
    // We need the default options on the schema to include the project passed in for the
    // integration test
    CalciteConnection connection = connect(pipeline.getOptions(), new PubsubTableProvider());
    Statement statement = connection.createStatement();
    statement.execute(createTableString);
    // Because Pubsub only allow new subscription receives message after the subscription is
    // created, eventsTopic.publish(messages) can only be called after statement.executeQuery.
    // However, because statement.executeQuery is a blocking call, it has to be put into a
    // separate thread to execute.
    ExecutorService pool = Executors.newFixedThreadPool(1);
    Future<List<String>> queryResult = pool.submit((Callable) () -> {
        ResultSet resultSet = statement.executeQuery("SELECT message.payload.id FROM message LIMIT 3");
        ImmutableList.Builder<String> result = ImmutableList.builder();
        while (resultSet.next()) {
            result.add(resultSet.getString(1));
        }
        return result.build();
    });
    try {
        eventsTopic.assertSubscriptionEventuallyCreated(pipeline.getOptions().as(GcpOptions.class).getProject(), Duration.standardMinutes(5));
    } catch (AssertionError assertionError) {
        // Check if the forked thread had an exception.
        try {
            queryResult.get(0, TimeUnit.SECONDS);
        } catch (TimeoutException e) {
        // Nothing went wrong on the forked thread, but a subscription still wasn't created.
        } catch (ExecutionException e) {
            // up to the user.
            throw new AssertionError("Exception occurred in statement.executeQuery thread", e);
        }
        // Just re-throw the timeout assertion.
        throw assertionError;
    }
    eventsTopic.publish(messages);
    assertThat(queryResult.get(2, TimeUnit.MINUTES).size(), equalTo(3));
    pool.shutdown();
}
Also used : Statement(java.sql.Statement) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) PubsubMessage(org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage) ExecutorService(java.util.concurrent.ExecutorService) ResultSet(java.sql.ResultSet) List(java.util.List) ImmutableList(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableList) ExecutionException(java.util.concurrent.ExecutionException) CalciteConnection(org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.jdbc.CalciteConnection) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Aggregations

PubsubMessage (org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage)3 IOException (java.io.IOException)2 ResultSet (java.sql.ResultSet)2 Statement (java.sql.Statement)2 List (java.util.List)2 ExecutionException (java.util.concurrent.ExecutionException)2 ExecutorService (java.util.concurrent.ExecutorService)2 TimeoutException (java.util.concurrent.TimeoutException)2 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)2 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 Serializable (java.io.Serializable)1 StandardCharsets (java.nio.charset.StandardCharsets)1 UTF_8 (java.nio.charset.StandardCharsets.UTF_8)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Set (java.util.Set)1