use of com.google.cloud.teleport.v2.io.CdcJdbcIO in project DataflowTemplates by GoogleCloudPlatform.
the class DataStreamToSQL method run.
/**
* Runs the pipeline with the supplied options.
*
* @param options The execution parameters to the pipeline.
* @return The result of the pipeline execution.
*/
public static PipelineResult run(Options options) {
/*
* Stages:
* 1) Ingest and Normalize Data to FailsafeElement with JSON Strings
* 2) Write JSON Strings to SQL DML Objects
* 3) Filter stale rows using stateful PK transform
* 4) Write DML statements to SQL Database via jdbc
*/
Pipeline pipeline = Pipeline.create(options);
CdcJdbcIO.DataSourceConfiguration dataSourceConfiguration = getDataSourceConfiguration(options);
validateOptions(options, dataSourceConfiguration);
Map<String, String> schemaMap = parseSchemaMap(options.getSchemaMap());
/*
* Stage 1: Ingest and Normalize Data to FailsafeElement with JSON Strings
* a) Read DataStream data from GCS into JSON String FailsafeElements (datastreamJsonRecords)
*/
PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getGcsPubSubSubscription(), options.getRfcStartDateTime()).withLowercaseSourceColumns().withHashColumnValue("_metadata_row_id", "rowid"));
/*
* Stage 2: Write JSON Strings to SQL Insert Strings
* a) Convert JSON String FailsafeElements to TableRow's (tableRowRecords)
* Stage 3) Filter stale rows using stateful PK transform
*/
PCollection<DmlInfo> dmlStatements = datastreamJsonRecords.apply("Format to DML", CreateDml.of(dataSourceConfiguration).withSchemaMap(schemaMap)).apply("DML Stateful Processing", ProcessDml.statefulOrderByPK());
/*
* Stage 4: Write Inserts to CloudSQL
*/
dmlStatements.apply("Write to SQL", CdcJdbcIO.<DmlInfo>write().withDataSourceConfiguration(dataSourceConfiguration).withStatementFormatter(new CdcJdbcIO.StatementFormatter<DmlInfo>() {
public String formatStatement(DmlInfo element) {
return element.getDmlSql();
}
}));
// Execute the pipeline and return the result.
return pipeline.run();
}
use of com.google.cloud.teleport.v2.io.CdcJdbcIO in project DataflowTemplates by GoogleCloudPlatform.
the class DataStreamToPostgres method run.
/**
* Runs the pipeline with the supplied options.
*
* @param options The execution parameters to the pipeline.
* @return The result of the pipeline execution.
*/
public static PipelineResult run(Options options) {
/*
* Stages:
* 1) Ingest and Normalize Data to FailsafeElement with JSON Strings
* 2) Write JSON Strings to Postgres DML Objects
* 3) Filter stale rows using stateful PK transform
* 4) Write DML statements to Postgres
*/
Pipeline pipeline = Pipeline.create(options);
String jdbcDriverConnectionString = String.format("jdbc:postgresql://%s:%s/%s", options.getDatabaseHost(), options.getDatabasePort(), options.getDatabaseName());
CdcJdbcIO.DataSourceConfiguration dataSourceConfiguration = CdcJdbcIO.DataSourceConfiguration.create("org.postgresql.Driver", jdbcDriverConnectionString).withUsername(options.getDatabaseUser()).withPassword(options.getDatabasePassword()).withMaxIdleConnections(new Integer(0));
validateOptions(options, dataSourceConfiguration);
/*
* Stage 1: Ingest and Normalize Data to FailsafeElement with JSON Strings
* a) Read DataStream data from GCS into JSON String FailsafeElements (datastreamJsonRecords)
*/
PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getGcsPubSubSubscription(), options.getRfcStartDateTime()).withLowercaseSourceColumns().withHashColumnValue("_metadata_row_id", "rowid"));
/*
* Stage 2: Write JSON Strings to Postgres Insert Strings
* a) Convert JSON String FailsafeElements to TableRow's (tableRowRecords)
* Stage 3) Filter stale rows using stateful PK transform
*/
PCollection<DmlInfo> dmlStatements = datastreamJsonRecords.apply("Format to Postgres DML", CreateDml.createDmlObjects(dataSourceConfiguration)).apply("DML Stateful Processing", ProcessDml.statefulOrderByPK());
/*
* Stage 4: Write Inserts to CloudSQL
*/
dmlStatements.apply("Write to Postgres", CdcJdbcIO.<DmlInfo>write().withDataSourceConfiguration(dataSourceConfiguration).withStatementFormatter(new CdcJdbcIO.StatementFormatter<DmlInfo>() {
public String formatStatement(DmlInfo element) {
return element.getDmlSql();
}
}));
// Execute the pipeline and return the result.
return pipeline.run();
}
Aggregations