use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.
the class SpannerStreamingWriteIntegrationTest method canIgnoreCaseWhileEventProcessing.
@Test
public void canIgnoreCaseWhileEventProcessing() throws Exception {
JSONObject json1 = getChangeEvent("Table1", "INSERT", "1");
json1.put("ID", "1");
json1.put("dAtA", "23");
JSONObject json2 = getChangeEvent("Table1", "INSERT", "1");
json2.put("iD", "2");
json2.put("DaTa", "23");
PCollection<FailsafeElement<String, String>> jsonRecords = testPipeline.apply(Create.of(Arrays.asList(FailsafeElement.of(json1.toString(), json1.toString()), FailsafeElement.of(json2.toString(), json2.toString()))).withCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())));
constructAndRunPipeline(jsonRecords);
verifyRecordCountinTable("Table1", 2);
verifyDataInTable1(1, 23);
verifyDataInTable1(2, 23);
}
use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.
the class SpannerStreamingWriteIntegrationTest method canUpdateWithDisorderedAndDuplicatedEvents.
// @Test
public void canUpdateWithDisorderedAndDuplicatedEvents() throws Exception {
JSONObject json1 = getChangeEventForTable1("1", "10", "INSERT", "1");
JSONObject json2 = getChangeEventForTable1("1", "20", "UPDATE", "3");
PCollection<FailsafeElement<String, String>> jsonRecords = testPipeline.apply(Create.of(Arrays.asList(FailsafeElement.of(json2.toString(), json2.toString()), FailsafeElement.of(json1.toString(), json1.toString()), FailsafeElement.of(json2.toString(), json2.toString()), FailsafeElement.of(json1.toString(), json1.toString()), FailsafeElement.of(json2.toString(), json2.toString()), FailsafeElement.of(json2.toString(), json2.toString()), FailsafeElement.of(json1.toString(), json1.toString()))).withCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())));
constructAndRunPipeline(jsonRecords);
verifyRecordCountinTable("Table1", 1);
verifyDataInTable1(1, 20);
}
use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.
the class DataStreamToMongoDB method run.
/**
* Runs the pipeline with the supplied options.
*
* @param options The execution parameters to the pipeline.
* @return The result of the pipeline execution.
*/
public static PipelineResult run(Options options) {
/*
* Stages:
* 1) Ingest and Normalize Data to FailsafeElement with JSON Strings
* 2) Push the data to MongoDB
*/
Pipeline pipeline = Pipeline.create(options);
/*
* Stage 1: Ingest and Normalize Data to FailsafeElement with JSON Strings
* a) Read DataStream data from GCS into JSON String FailsafeElements (datastreamJsonRecords)
*/
PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getInputSubscription(), options.getRfcStartDateTime()).withFileReadConcurrency(options.getFileReadConcurrency()));
PCollection<FailsafeElement<String, String>> jsonRecords = PCollectionList.of(datastreamJsonRecords).apply(Flatten.pCollections());
/**
* Does below steps:
* 1. Converts JSON to BSON documents.
* 2. Removes the metadata fileds.
* 3. Inserts the data into MongoDB collections.
*/
jsonRecords.apply("jsonToDocuments", MapElements.via(new SimpleFunction<FailsafeElement<String, String>, Document>() {
@Override
public Document apply(FailsafeElement<String, String> jsonString) {
String s = jsonString.getOriginalPayload();
Document doc = Document.parse(s);
return removeTableRowFields(doc, MAPPER_IGNORE_FIELDS);
}
})).apply("Write To MongoDB", MongoDbIO.write().withUri(options.getMongoDBUri()).withDatabase(options.getDatabase()).withCollection(options.getCollection()));
// Execute the pipeline and return the result.
return pipeline.run();
}
use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.
the class DataStreamToSQL method run.
/**
* Runs the pipeline with the supplied options.
*
* @param options The execution parameters to the pipeline.
* @return The result of the pipeline execution.
*/
public static PipelineResult run(Options options) {
/*
* Stages:
* 1) Ingest and Normalize Data to FailsafeElement with JSON Strings
* 2) Write JSON Strings to SQL DML Objects
* 3) Filter stale rows using stateful PK transform
* 4) Write DML statements to SQL Database via jdbc
*/
Pipeline pipeline = Pipeline.create(options);
CdcJdbcIO.DataSourceConfiguration dataSourceConfiguration = getDataSourceConfiguration(options);
validateOptions(options, dataSourceConfiguration);
Map<String, String> schemaMap = parseSchemaMap(options.getSchemaMap());
/*
* Stage 1: Ingest and Normalize Data to FailsafeElement with JSON Strings
* a) Read DataStream data from GCS into JSON String FailsafeElements (datastreamJsonRecords)
*/
PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getGcsPubSubSubscription(), options.getRfcStartDateTime()).withLowercaseSourceColumns().withRenameColumnValue("_metadata_row_id", "rowid").withHashRowId());
/*
* Stage 2: Write JSON Strings to SQL Insert Strings
* a) Convert JSON String FailsafeElements to TableRow's (tableRowRecords)
* Stage 3) Filter stale rows using stateful PK transform
*/
PCollection<KV<String, DmlInfo>> dmlStatements = datastreamJsonRecords.apply("Format to DML", CreateDml.of(dataSourceConfiguration).withSchemaMap(schemaMap)).apply("DML Stateful Processing", ProcessDml.statefulOrderByPK());
/*
* Stage 4: Write Inserts to CloudSQL
*/
dmlStatements.apply("Write to SQL", CdcJdbcIO.<KV<String, DmlInfo>>write().withDataSourceConfiguration(dataSourceConfiguration).withStatementFormatter(new CdcJdbcIO.StatementFormatter<KV<String, DmlInfo>>() {
public String formatStatement(KV<String, DmlInfo> element) {
LOG.debug("Executing SQL: {}", element.getValue().getDmlSql());
return element.getValue().getDmlSql();
}
}));
// Execute the pipeline and return the result.
return pipeline.run();
}
use of com.google.cloud.teleport.v2.values.FailsafeElement in project DataflowTemplates by GoogleCloudPlatform.
the class DataStreamToPostgres method run.
/**
* Runs the pipeline with the supplied options.
*
* @param options The execution parameters to the pipeline.
* @return The result of the pipeline execution.
*/
public static PipelineResult run(Options options) {
/*
* Stages:
* 1) Ingest and Normalize Data to FailsafeElement with JSON Strings
* 2) Write JSON Strings to Postgres DML Objects
* 3) Filter stale rows using stateful PK transform
* 4) Write DML statements to Postgres
*/
Pipeline pipeline = Pipeline.create(options);
String jdbcDriverConnectionString = String.format("jdbc:postgresql://%s:%s/%s", options.getDatabaseHost(), options.getDatabasePort(), options.getDatabaseName());
CdcJdbcIO.DataSourceConfiguration dataSourceConfiguration = CdcJdbcIO.DataSourceConfiguration.create("org.postgresql.Driver", jdbcDriverConnectionString).withUsername(options.getDatabaseUser()).withPassword(options.getDatabasePassword()).withMaxIdleConnections(new Integer(0));
validateOptions(options, dataSourceConfiguration);
/*
* Stage 1: Ingest and Normalize Data to FailsafeElement with JSON Strings
* a) Read DataStream data from GCS into JSON String FailsafeElements (datastreamJsonRecords)
*/
PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getGcsPubSubSubscription(), options.getRfcStartDateTime()).withLowercaseSourceColumns().withRenameColumnValue("_metadata_row_id", "rowid").withHashRowId());
/*
* Stage 2: Write JSON Strings to Postgres Insert Strings
* a) Convert JSON String FailsafeElements to TableRow's (tableRowRecords)
* Stage 3) Filter stale rows using stateful PK transform
*/
PCollection<DmlInfo> dmlStatements = datastreamJsonRecords.apply("Format to Postgres DML", CreateDml.createDmlObjects(dataSourceConfiguration)).apply("DML Stateful Processing", ProcessDml.statefulOrderByPK());
/*
* Stage 4: Write Inserts to CloudSQL
*/
dmlStatements.apply("Write to Postgres", CdcJdbcIO.<DmlInfo>write().withDataSourceConfiguration(dataSourceConfiguration).withStatementFormatter(new CdcJdbcIO.StatementFormatter<DmlInfo>() {
public String formatStatement(DmlInfo element) {
return element.getDmlSql();
}
}));
// Execute the pipeline and return the result.
return pipeline.run();
}
Aggregations