Search in sources :

Example 11 with FailsafeElement

use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.

the class SpannerStreamingWriteIntegrationTest method canIgnoreCaseWhileEventProcessing.

@Test
public void canIgnoreCaseWhileEventProcessing() throws Exception {
    JSONObject json1 = getChangeEvent("Table1", "INSERT", "1");
    json1.put("ID", "1");
    json1.put("dAtA", "23");
    JSONObject json2 = getChangeEvent("Table1", "INSERT", "1");
    json2.put("iD", "2");
    json2.put("DaTa", "23");
    PCollection<FailsafeElement<String, String>> jsonRecords = testPipeline.apply(Create.of(Arrays.asList(FailsafeElement.of(json1.toString(), json1.toString()), FailsafeElement.of(json2.toString(), json2.toString()))).withCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())));
    constructAndRunPipeline(jsonRecords);
    verifyRecordCountinTable("Table1", 2);
    verifyDataInTable1(1, 23);
    verifyDataInTable1(2, 23);
}
Also used : JSONObject(org.json.JSONObject) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement) Test(org.junit.Test) IntegrationTest(com.google.cloud.teleport.v2.spanner.IntegrationTest)

Example 12 with FailsafeElement

use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.

the class SpannerStreamingWriteIntegrationTest method canUpdateWithDisorderedAndDuplicatedEvents.

// @Test
public void canUpdateWithDisorderedAndDuplicatedEvents() throws Exception {
    JSONObject json1 = getChangeEventForTable1("1", "10", "INSERT", "1");
    JSONObject json2 = getChangeEventForTable1("1", "20", "UPDATE", "3");
    PCollection<FailsafeElement<String, String>> jsonRecords = testPipeline.apply(Create.of(Arrays.asList(FailsafeElement.of(json2.toString(), json2.toString()), FailsafeElement.of(json1.toString(), json1.toString()), FailsafeElement.of(json2.toString(), json2.toString()), FailsafeElement.of(json1.toString(), json1.toString()), FailsafeElement.of(json2.toString(), json2.toString()), FailsafeElement.of(json2.toString(), json2.toString()), FailsafeElement.of(json1.toString(), json1.toString()))).withCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())));
    constructAndRunPipeline(jsonRecords);
    verifyRecordCountinTable("Table1", 1);
    verifyDataInTable1(1, 20);
}
Also used : JSONObject(org.json.JSONObject) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement)

Example 13 with FailsafeElement

use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.

the class DataStreamToMongoDB method run.

/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return  The result of the pipeline execution.
 */
public static PipelineResult run(Options options) {
    /*
     * Stages:
     *   1) Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   2) Push the data to MongoDB
     */
    Pipeline pipeline = Pipeline.create(options);
    /*
     * Stage 1: Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   a) Read DataStream data from GCS into JSON String FailsafeElements (datastreamJsonRecords)
     */
    PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getInputSubscription(), options.getRfcStartDateTime()).withFileReadConcurrency(options.getFileReadConcurrency()));
    PCollection<FailsafeElement<String, String>> jsonRecords = PCollectionList.of(datastreamJsonRecords).apply(Flatten.pCollections());
    /**
     * Does below steps:
     * 1. Converts JSON to BSON documents.
     * 2. Removes the metadata fileds.
     * 3. Inserts the data into MongoDB collections.
     */
    jsonRecords.apply("jsonToDocuments", MapElements.via(new SimpleFunction<FailsafeElement<String, String>, Document>() {

        @Override
        public Document apply(FailsafeElement<String, String> jsonString) {
            String s = jsonString.getOriginalPayload();
            Document doc = Document.parse(s);
            return removeTableRowFields(doc, MAPPER_IGNORE_FIELDS);
        }
    })).apply("Write To MongoDB", MongoDbIO.write().withUri(options.getMongoDBUri()).withDatabase(options.getDatabase()).withCollection(options.getCollection()));
    // Execute the pipeline and return the result.
    return pipeline.run();
}
Also used : DataStreamIO(com.google.cloud.teleport.v2.cdc.sources.DataStreamIO) Document(org.bson.Document) Pipeline(org.apache.beam.sdk.Pipeline) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement)

Example 14 with FailsafeElement

use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.

the class DataStreamToSQL method run.

/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return The result of the pipeline execution.
 */
public static PipelineResult run(Options options) {
    /*
     * Stages:
     *   1) Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   2) Write JSON Strings to SQL DML Objects
     *   3) Filter stale rows using stateful PK transform
     *   4) Write DML statements to SQL Database via jdbc
     */
    Pipeline pipeline = Pipeline.create(options);
    CdcJdbcIO.DataSourceConfiguration dataSourceConfiguration = getDataSourceConfiguration(options);
    validateOptions(options, dataSourceConfiguration);
    Map<String, String> schemaMap = parseSchemaMap(options.getSchemaMap());
    /*
     * Stage 1: Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   a) Read DataStream data from GCS into JSON String FailsafeElements (datastreamJsonRecords)
     */
    PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getGcsPubSubSubscription(), options.getRfcStartDateTime()).withLowercaseSourceColumns().withRenameColumnValue("_metadata_row_id", "rowid").withHashRowId());
    /*
     * Stage 2: Write JSON Strings to SQL Insert Strings
     *   a) Convert JSON String FailsafeElements to TableRow's (tableRowRecords)
     * Stage 3) Filter stale rows using stateful PK transform
     */
    PCollection<KV<String, DmlInfo>> dmlStatements = datastreamJsonRecords.apply("Format to DML", CreateDml.of(dataSourceConfiguration).withSchemaMap(schemaMap)).apply("DML Stateful Processing", ProcessDml.statefulOrderByPK());
    /*
     * Stage 4: Write Inserts to CloudSQL
     */
    dmlStatements.apply("Write to SQL", CdcJdbcIO.<KV<String, DmlInfo>>write().withDataSourceConfiguration(dataSourceConfiguration).withStatementFormatter(new CdcJdbcIO.StatementFormatter<KV<String, DmlInfo>>() {

        public String formatStatement(KV<String, DmlInfo> element) {
            LOG.debug("Executing SQL: {}", element.getValue().getDmlSql());
            return element.getValue().getDmlSql();
        }
    }));
    // Execute the pipeline and return the result.
    return pipeline.run();
}
Also used : DataStreamIO(com.google.cloud.teleport.v2.cdc.sources.DataStreamIO) KV(org.apache.beam.sdk.values.KV) DmlInfo(com.google.cloud.teleport.v2.values.DmlInfo) CdcJdbcIO(com.google.cloud.teleport.v2.io.CdcJdbcIO) Pipeline(org.apache.beam.sdk.Pipeline) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement)

Example 15 with FailsafeElement

use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.

the class DataStreamToPostgres method run.

/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return The result of the pipeline execution.
 */
public static PipelineResult run(Options options) {
    /*
     * Stages:
     *   1) Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   2) Write JSON Strings to Postgres DML Objects
     *   3) Filter stale rows using stateful PK transform
     *   4) Write DML statements to Postgres
     */
    Pipeline pipeline = Pipeline.create(options);
    String jdbcDriverConnectionString = String.format("jdbc:postgresql://%s:%s/%s", options.getDatabaseHost(), options.getDatabasePort(), options.getDatabaseName());
    CdcJdbcIO.DataSourceConfiguration dataSourceConfiguration = CdcJdbcIO.DataSourceConfiguration.create("org.postgresql.Driver", jdbcDriverConnectionString).withUsername(options.getDatabaseUser()).withPassword(options.getDatabasePassword()).withMaxIdleConnections(new Integer(0));
    validateOptions(options, dataSourceConfiguration);
    /*
     * Stage 1: Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   a) Read DataStream data from GCS into JSON String FailsafeElements (datastreamJsonRecords)
     */
    PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getGcsPubSubSubscription(), options.getRfcStartDateTime()).withLowercaseSourceColumns().withRenameColumnValue("_metadata_row_id", "rowid").withHashRowId());
    /*
     * Stage 2: Write JSON Strings to Postgres Insert Strings
     *   a) Convert JSON String FailsafeElements to TableRow's (tableRowRecords)
     * Stage 3) Filter stale rows using stateful PK transform
     */
    PCollection<DmlInfo> dmlStatements = datastreamJsonRecords.apply("Format to Postgres DML", CreateDml.createDmlObjects(dataSourceConfiguration)).apply("DML Stateful Processing", ProcessDml.statefulOrderByPK());
    /*
     * Stage 4: Write Inserts to CloudSQL
     */
    dmlStatements.apply("Write to Postgres", CdcJdbcIO.<DmlInfo>write().withDataSourceConfiguration(dataSourceConfiguration).withStatementFormatter(new CdcJdbcIO.StatementFormatter<DmlInfo>() {

        public String formatStatement(DmlInfo element) {
            return element.getDmlSql();
        }
    }));
    // Execute the pipeline and return the result.
    return pipeline.run();
}
Also used : DataStreamIO(com.google.cloud.teleport.v2.cdc.sources.DataStreamIO) DmlInfo(com.google.cloud.teleport.v2.values.DmlInfo) CdcJdbcIO(com.google.cloud.teleport.v2.io.CdcJdbcIO) Pipeline(org.apache.beam.sdk.Pipeline) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement)

Aggregations

FailsafeElement (com.google.cloud.teleport.v2.values.FailsafeElement)31 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)26 CoderRegistry (org.apache.beam.sdk.coders.CoderRegistry)21 Test (org.junit.Test)21 Pipeline (org.apache.beam.sdk.Pipeline)14 TableRow (com.google.api.services.bigquery.model.TableRow)8 PubsubMessage (org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage)6 DoFn (org.apache.beam.sdk.transforms.DoFn)6 PubSubToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.PubSubToElasticsearchOptions)5 IntegrationTest (com.google.cloud.teleport.v2.spanner.IntegrationTest)5 JSONObject (org.json.JSONObject)5 DeadLetterQueueManager (com.google.cloud.teleport.v2.cdc.dlq.DeadLetterQueueManager)4 StringDeadLetterQueueSanitizer (com.google.cloud.teleport.v2.cdc.dlq.StringDeadLetterQueueSanitizer)4 DataStreamIO (com.google.cloud.teleport.v2.cdc.sources.DataStreamIO)4 FailsafeElementCoder (com.google.cloud.teleport.v2.coders.FailsafeElementCoder)4 GCSToSplunk.flattenErrorsAndConvertToString (com.google.cloud.teleport.v2.templates.GCSToSplunk.flattenErrorsAndConvertToString)4 PipelineResult (org.apache.beam.sdk.PipelineResult)4 SpannerConfig (org.apache.beam.sdk.io.gcp.spanner.SpannerConfig)4 KV (org.apache.beam.sdk.values.KV)4 GCSToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.GCSToElasticsearchOptions)3