use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexJdbcIngestion method main.
/**
* Main entry point for pipeline execution.
*
* @param args Command line arguments to the pipeline.
*/
public static void main(String[] args) throws IOException {
DataplexJdbcIngestionOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(DataplexJdbcIngestionOptions.class);
Pipeline pipeline = Pipeline.create(options);
DataplexClient dataplexClient = DefaultDataplexClient.withDefaultClient(options.getGcpCredential());
String assetName = options.getOutputAsset();
GoogleCloudDataplexV1Asset asset = resolveAsset(assetName, dataplexClient);
DynamicDataSourceConfiguration dataSourceConfig = configDataSource(options);
String assetType = asset.getResourceSpec().getType();
if (DataplexAssetResourceSpec.BIGQUERY_DATASET.name().equals(assetType)) {
buildBigQueryPipeline(pipeline, options, dataSourceConfig);
} else if (DataplexAssetResourceSpec.STORAGE_BUCKET.name().equals(assetType)) {
String targetRootPath = "gs://" + asset.getResourceSpec().getName() + "/" + options.getOutputTable();
buildGcsPipeline(pipeline, options, dataSourceConfig, targetRootPath);
} else {
throw new IllegalArgumentException(String.format("Asset " + assetName + " is of type " + assetType + ". Only " + DataplexAssetResourceSpec.BIGQUERY_DATASET.name() + "and " + DataplexAssetResourceSpec.STORAGE_BUCKET.name() + " supported."));
}
pipeline.run();
}
use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.
the class JdbcToPubsub method main.
/**
* Main entry point for pipeline execution.
*
* @param args Command line arguments to the pipeline.
*/
public static void main(String[] args) {
JdbcToPubsubOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(JdbcToPubsubOptions.class);
run(options);
}
use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.
the class PubsubToJdbc method main.
/**
* Main entry point for pipeline execution.
*
* @param args Command line arguments to the pipeline.
*/
public static void main(String[] args) {
PubsubToJdbcOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(PubsubToJdbcOptions.class);
run(options);
}
use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.
the class DeleteBigQueryDataFn method processElement.
@ProcessElement
public void processElement(@Element KV<BigQueryTable, BigQueryTablePartition> input, PipelineOptions options) {
BigQueryTable t = input.getKey();
BigQueryTablePartition p = input.getValue();
if (t.isPartitioned() && p == null) {
throw new IllegalStateException(String.format("No partition to delete provided for a partitioned table %s.", t.getTableName()));
}
if (!t.isPartitioned() && p != null) {
throw new IllegalStateException(String.format("Got unexpected partition %s to delete for a non-partitioned table %s.", p.getPartitionName(), t.getTableName()));
}
if (!options.as(Options.class).getDeleteSourceData()) {
if (t.isPartitioned()) {
LOG.info("Skipping source BigQuery data deletion for partition {}${}.", t.getTableName(), p.getPartitionName());
} else {
LOG.info("Skipping source BigQuery data deletion for table {}.", t.getTableName());
}
return;
}
if (t.isPartitioned()) {
deletePartition(t, p);
} else {
deleteTable(t);
}
}
use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.
the class FileFormatConversionTest method testParquetToAvroE2E.
/**
* Tests if the Parquet to Avro pipeline transforms data correctly and stores it in an Avro file.
*/
@Test
public void testParquetToAvroE2E() {
FileFormatConversionOptions options = PipelineOptionsFactory.create().as(FileFormatConversionOptions.class);
final String tempDir = temporaryFolder.getRoot().getAbsolutePath() + "/";
options.setInputFileFormat(PARQUET);
options.setOutputFileFormat(AVRO);
options.setInputFileSpec(PARQUET_FILE_PATH);
options.setOutputBucket(tempDir);
options.setSchema(SCHEMA_FILE_PATH);
Schema schema = SchemaUtils.getAvroSchema(SCHEMA_FILE_PATH);
GenericRecord genericRecords = new GenericData.Record(schema);
genericRecords.put("id", "007");
genericRecords.put("state", "CA");
genericRecords.put("price", 26.23);
mainPipeline.apply("TestParquetToAvro", FileFormatConversionFactory.FileFormat.newBuilder().setOptions(options).setInputFileFormat(PARQUET).setOutputFileFormat(AVRO).build());
mainPipeline.run();
PCollection<GenericRecord> readAvroFile = readPipeline.apply("ReadAvroFile", AvroConverters.ReadAvroFile.newBuilder().withInputFileSpec(tempDir + "*").withSchema(SCHEMA_FILE_PATH).build());
PAssert.that(readAvroFile).containsInAnyOrder(genericRecords);
readPipeline.run();
}
Aggregations