use of com.google.cloud.teleport.v2.io.DynamicJdbcIO in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexJdbcIngestion method buildGcsPipeline.
@VisibleForTesting
static void buildGcsPipeline(Pipeline pipeline, DataplexJdbcIngestionOptions options, DynamicDataSourceConfiguration dataSourceConfig, String targetRootPath) {
List<String> existingFiles = StorageUtils.getFilesInDirectory(targetRootPath);
// Auto inferring beam schema
Schema beamSchema = Schemas.jdbcSchemaToBeamSchema(dataSourceConfig.buildDatasource(), options.getQuery());
// Convert to Avro Schema
org.apache.avro.Schema avroSchema = AvroUtils.toAvroSchema(beamSchema);
// Read from JdbcIO and convert ResultSet to Beam Row
PCollection<Row> resultRows = pipeline.apply("Read from JdbcIO", DynamicJdbcIO.<Row>read().withDataSourceConfiguration(dataSourceConfig).withQuery(options.getQuery()).withCoder(RowCoder.of(beamSchema)).withRowMapper(BeamSchemaUtil.of(beamSchema)));
// Convert Beam Row to GenericRecord
PCollection<GenericRecord> genericRecords = resultRows.apply("convert to GenericRecord", ParDo.of(new BeamRowToGenericRecordFn(avroSchema))).setCoder(AvroCoder.of(avroSchema));
// existingFiles
if (options.getParitionColumn() == null || options.getPartitioningScheme() == null) {
if (shouldSkipUnpartitionedTable(options, targetRootPath, existingFiles)) {
return;
}
} else {
genericRecords = applyPartitionedWriteDispositionFilter(genericRecords, options, targetRootPath, avroSchema, existingFiles);
}
// Write to GCS bucket
PCollection<PartitionMetadata> metadata = genericRecords.apply("Write to GCS", new GenericRecordsToGcsPartitioned(targetRootPath, Schemas.serialize(avroSchema), options.getParitionColumn(), options.getPartitioningScheme(), options.getFileFormat()));
}
use of com.google.cloud.teleport.v2.io.DynamicJdbcIO in project DataflowTemplates by GoogleCloudPlatform.
the class JdbcToPubsub method run.
/**
* Runs a pipeline which reads message from JdbcIO and writes to Pub/Sub.
*
* @param options The execution options.
* @return The pipeline result.
*/
public static PipelineResult run(JdbcToPubsubOptions options) {
// Create the pipeline
Pipeline pipeline = Pipeline.create(options);
LOG.info("Starting Jdbc-To-PubSub Pipeline.");
/*
* Steps:
* 1) Read data from a Jdbc Table
* 2) Write to Pub/Sub topic
*/
DynamicJdbcIO.DynamicDataSourceConfiguration dataSourceConfiguration = DynamicJdbcIO.DynamicDataSourceConfiguration.create(options.getDriverClassName(), maybeDecrypt(options.getConnectionUrl(), options.getKMSEncryptionKey())).withDriverJars(options.getDriverJars());
if (options.getUsername() != null) {
dataSourceConfiguration = dataSourceConfiguration.withUsername(maybeDecrypt(options.getUsername(), options.getKMSEncryptionKey()));
}
if (options.getPassword() != null) {
dataSourceConfiguration = dataSourceConfiguration.withPassword(maybeDecrypt(options.getPassword(), options.getKMSEncryptionKey()));
}
if (options.getConnectionProperties() != null) {
dataSourceConfiguration = dataSourceConfiguration.withConnectionProperties(options.getConnectionProperties());
}
PCollection<String> jdbcData = pipeline.apply("readFromJdbc", DynamicJdbcIO.<String>read().withDataSourceConfiguration(dataSourceConfiguration).withQuery(options.getQuery()).withCoder(StringUtf8Coder.of()).withRowMapper(new ResultSetToJSONString()));
jdbcData.apply("writeSuccessMessages", PubsubIO.writeStrings().to(options.getOutputTopic()));
return pipeline.run();
}
use of com.google.cloud.teleport.v2.io.DynamicJdbcIO in project DataflowTemplates by GoogleCloudPlatform.
the class PubsubToJdbc method run.
/**
* Runs a pipeline which reads message from Pub/Sub and writes to JdbcIO.
*
* @param options The execution options.
* @return The pipeline result.
*/
public static PipelineResult run(PubsubToJdbcOptions options) {
// Create the pipeline
Pipeline pipeline = Pipeline.create(options);
LOG.info("Starting Pubsub-to-Jdbc Pipeline.");
/*
* Steps:
* 1) Read data from a Pub/Sub subscription
* 2) Write to Jdbc Table
* 3) Write errors to deadletter topic
*/
PCollection<String> pubsubData = pipeline.apply("readFromPubSubSubscription", PubsubIO.readStrings().fromSubscription(options.getInputSubscription()));
DynamicJdbcIO.DynamicDataSourceConfiguration dataSourceConfiguration = DynamicJdbcIO.DynamicDataSourceConfiguration.create(options.getDriverClassName(), maybeDecrypt(options.getConnectionUrl(), options.getKMSEncryptionKey())).withDriverJars(options.getDriverJars());
if (options.getUsername() != null) {
dataSourceConfiguration = dataSourceConfiguration.withUsername(maybeDecrypt(options.getUsername(), options.getKMSEncryptionKey()));
}
if (options.getPassword() != null) {
dataSourceConfiguration = dataSourceConfiguration.withPassword(maybeDecrypt(options.getPassword(), options.getKMSEncryptionKey()));
}
if (options.getConnectionProperties() != null) {
dataSourceConfiguration = dataSourceConfiguration.withConnectionProperties(options.getConnectionProperties());
}
PCollection<FailsafeElement<String, String>> errors = pubsubData.apply("writeToJdbc", DynamicJdbcIO.<String>write().withDataSourceConfiguration(dataSourceConfiguration).withStatement(options.getStatement()).withPreparedStatementSetter(new MapJsonStringToQuery(getKeyOrder(options.getStatement())))).setCoder(FAILSAFE_ELEMENT_CODER);
errors.apply("WriteFailedRecords", ErrorConverters.WriteStringMessageErrorsToPubSub.newBuilder().setErrorRecordsTopic(options.getOutputDeadletterTopic()).build());
return pipeline.run();
}
use of com.google.cloud.teleport.v2.io.DynamicJdbcIO in project DataflowTemplates by GoogleCloudPlatform.
the class DynamicJdbcIOTest method testReadRows.
@Test
@Category(NeedsRunner.class)
public void testReadRows() throws Exception {
DynamicDataSourceConfiguration dataSourceConfig = DynamicJdbcIO.DynamicDataSourceConfiguration.create("org.apache.derby.jdbc.ClientDriver", maybeDecrypt("jdbc:derby://localhost:" + port + "/target/beam", null));
String query = "select name, id from " + readTableName;
org.apache.beam.sdk.schemas.Schema beamSchema = Schemas.jdbcSchemaToBeamSchema(dataSourceConfig.buildDatasource(), query);
PCollection<Row> resultRows = pipeline.apply(DynamicJdbcIO.<Row>read().withDataSourceConfiguration(dataSourceConfig).withQuery(query).withCoder(RowCoder.of(beamSchema)).withRowMapper(BeamSchemaUtil.of(beamSchema)));
PAssert.thatSingleton(resultRows.apply("Count", Count.globally())).isEqualTo((long) EXPECTED_ROW_COUNT);
Schema schema = Schema.builder().addField("name", Schema.FieldType.STRING).addField("id", Schema.FieldType.INT32).build();
List<Row> expectedList = Lists.newArrayListWithExpectedSize(EXPECTED_ROW_COUNT);
for (int i = 0; i < EXPECTED_ROW_COUNT; i++) {
Row row = Row.withSchema(schema).addValues(TEST_ROW_SUFFIX + "-" + i, i).build();
expectedList.add(row);
}
PAssert.that(resultRows).containsInAnyOrder(expectedList);
pipeline.run();
}
Aggregations