use of com.google.cloud.teleport.v2.options.DataplexJdbcIngestionOptions in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexJdbcIngestion method main.
/**
* Main entry point for pipeline execution.
*
* @param args Command line arguments to the pipeline.
*/
public static void main(String[] args) throws IOException {
DataplexJdbcIngestionOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(DataplexJdbcIngestionOptions.class);
Pipeline pipeline = Pipeline.create(options);
DataplexClient dataplexClient = DefaultDataplexClient.withDefaultClient(options.getGcpCredential());
String assetName = options.getOutputAsset();
GoogleCloudDataplexV1Asset asset = resolveAsset(assetName, dataplexClient);
DynamicDataSourceConfiguration dataSourceConfig = configDataSource(options);
String assetType = asset.getResourceSpec().getType();
if (DataplexAssetResourceSpec.BIGQUERY_DATASET.name().equals(assetType)) {
buildBigQueryPipeline(pipeline, options, dataSourceConfig);
} else if (DataplexAssetResourceSpec.STORAGE_BUCKET.name().equals(assetType)) {
String targetRootPath = "gs://" + asset.getResourceSpec().getName() + "/" + options.getOutputTable();
buildGcsPipeline(pipeline, options, dataSourceConfig, targetRootPath);
} else {
throw new IllegalArgumentException(String.format("Asset " + assetName + " is of type " + assetType + ". Only " + DataplexAssetResourceSpec.BIGQUERY_DATASET.name() + "and " + DataplexAssetResourceSpec.STORAGE_BUCKET.name() + " supported."));
}
pipeline.run();
}
use of com.google.cloud.teleport.v2.options.DataplexJdbcIngestionOptions in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexJdbcIngestion method buildGcsPipeline.
@VisibleForTesting
static void buildGcsPipeline(Pipeline pipeline, DataplexJdbcIngestionOptions options, DynamicDataSourceConfiguration dataSourceConfig, String targetRootPath) {
// Auto inferring beam schema
Schema beamSchema = Schemas.jdbcSchemaToBeamSchema(dataSourceConfig.buildDatasource(), options.getQuery());
// Convert to Avro Schema
org.apache.avro.Schema avroSchema = AvroUtils.toAvroSchema(beamSchema);
// Read from JdbcIO and convert ResultSet to Beam Row
PCollection<Row> resultRows = pipeline.apply("Read from JdbcIO", DynamicJdbcIO.<Row>read().withDataSourceConfiguration(dataSourceConfig).withQuery(options.getQuery()).withCoder(RowCoder.of(beamSchema)).withRowMapper(BeamSchemaUtil.of(beamSchema)));
// Convert Beam Row to GenericRecord
PCollection<GenericRecord> genericRecords = resultRows.apply("convert to GenericRecord", ParDo.of(new BeamRowToGenericRecordFn(avroSchema))).setCoder(AvroCoder.of(avroSchema));
// Write to GCS bucket
PCollection<PartitionMetadata> metadata = genericRecords.apply("Write to GCS", new GenericRecordsToGcsPartitioned(targetRootPath, Schemas.serialize(avroSchema), options.getParitionColumn(), options.getPartitioningScheme(), options.getFileFormat()));
}
Aggregations