Search in sources :

Example 6 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexJdbcIngestion method main.

/**
 * Main entry point for pipeline execution.
 *
 * @param args Command line arguments to the pipeline.
 */
public static void main(String[] args) throws IOException {
    DataplexJdbcIngestionOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(DataplexJdbcIngestionOptions.class);
    Pipeline pipeline = Pipeline.create(options);
    DataplexClient dataplexClient = DefaultDataplexClient.withDefaultClient(options.getGcpCredential());
    String assetName = options.getOutputAsset();
    GoogleCloudDataplexV1Asset asset = resolveAsset(assetName, dataplexClient);
    DynamicDataSourceConfiguration dataSourceConfig = configDataSource(options);
    String assetType = asset.getResourceSpec().getType();
    if (DataplexAssetResourceSpec.BIGQUERY_DATASET.name().equals(assetType)) {
        buildBigQueryPipeline(pipeline, options, dataSourceConfig);
    } else if (DataplexAssetResourceSpec.STORAGE_BUCKET.name().equals(assetType)) {
        String targetRootPath = "gs://" + asset.getResourceSpec().getName() + "/" + options.getOutputTable();
        buildGcsPipeline(pipeline, options, dataSourceConfig, targetRootPath);
    } else {
        throw new IllegalArgumentException(String.format("Asset " + assetName + " is of type " + assetType + ". Only " + DataplexAssetResourceSpec.BIGQUERY_DATASET.name() + "and " + DataplexAssetResourceSpec.STORAGE_BUCKET.name() + " supported."));
    }
    pipeline.run();
}
Also used : DataplexJdbcIngestionOptions(com.google.cloud.teleport.v2.options.DataplexJdbcIngestionOptions) DefaultDataplexClient(com.google.cloud.teleport.v2.clients.DefaultDataplexClient) DataplexClient(com.google.cloud.teleport.v2.clients.DataplexClient) DynamicDataSourceConfiguration(com.google.cloud.teleport.v2.io.DynamicJdbcIO.DynamicDataSourceConfiguration) GoogleCloudDataplexV1Asset(com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Asset) Pipeline(org.apache.beam.sdk.Pipeline)

Example 7 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class JdbcToPubsub method main.

/**
 * Main entry point for pipeline execution.
 *
 * @param args Command line arguments to the pipeline.
 */
public static void main(String[] args) {
    JdbcToPubsubOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(JdbcToPubsubOptions.class);
    run(options);
}
Also used : JdbcToPubsubOptions(com.google.cloud.teleport.v2.options.JdbcToPubsubOptions)

Example 8 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class PubsubToJdbc method main.

/**
 * Main entry point for pipeline execution.
 *
 * @param args Command line arguments to the pipeline.
 */
public static void main(String[] args) {
    PubsubToJdbcOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(PubsubToJdbcOptions.class);
    run(options);
}
Also used : PubsubToJdbcOptions(com.google.cloud.teleport.v2.options.PubsubToJdbcOptions)

Example 9 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class DeleteBigQueryDataFn method processElement.

@ProcessElement
public void processElement(@Element KV<BigQueryTable, BigQueryTablePartition> input, PipelineOptions options) {
    BigQueryTable t = input.getKey();
    BigQueryTablePartition p = input.getValue();
    if (t.isPartitioned() && p == null) {
        throw new IllegalStateException(String.format("No partition to delete provided for a partitioned table %s.", t.getTableName()));
    }
    if (!t.isPartitioned() && p != null) {
        throw new IllegalStateException(String.format("Got unexpected partition %s to delete for a non-partitioned table %s.", p.getPartitionName(), t.getTableName()));
    }
    if (!options.as(Options.class).getDeleteSourceData()) {
        if (t.isPartitioned()) {
            LOG.info("Skipping source BigQuery data deletion for partition {}${}.", t.getTableName(), p.getPartitionName());
        } else {
            LOG.info("Skipping source BigQuery data deletion for table {}.", t.getTableName());
        }
        return;
    }
    if (t.isPartitioned()) {
        deletePartition(t, p);
    } else {
        deleteTable(t);
    }
}
Also used : BigQueryTablePartition(com.google.cloud.teleport.v2.values.BigQueryTablePartition) BigQueryTable(com.google.cloud.teleport.v2.values.BigQueryTable)

Example 10 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class FileFormatConversionTest method testParquetToAvroE2E.

/**
 * Tests if the Parquet to Avro pipeline transforms data correctly and stores it in an Avro file.
 */
@Test
public void testParquetToAvroE2E() {
    FileFormatConversionOptions options = PipelineOptionsFactory.create().as(FileFormatConversionOptions.class);
    final String tempDir = temporaryFolder.getRoot().getAbsolutePath() + "/";
    options.setInputFileFormat(PARQUET);
    options.setOutputFileFormat(AVRO);
    options.setInputFileSpec(PARQUET_FILE_PATH);
    options.setOutputBucket(tempDir);
    options.setSchema(SCHEMA_FILE_PATH);
    Schema schema = SchemaUtils.getAvroSchema(SCHEMA_FILE_PATH);
    GenericRecord genericRecords = new GenericData.Record(schema);
    genericRecords.put("id", "007");
    genericRecords.put("state", "CA");
    genericRecords.put("price", 26.23);
    mainPipeline.apply("TestParquetToAvro", FileFormatConversionFactory.FileFormat.newBuilder().setOptions(options).setInputFileFormat(PARQUET).setOutputFileFormat(AVRO).build());
    mainPipeline.run();
    PCollection<GenericRecord> readAvroFile = readPipeline.apply("ReadAvroFile", AvroConverters.ReadAvroFile.newBuilder().withInputFileSpec(tempDir + "*").withSchema(SCHEMA_FILE_PATH).build());
    PAssert.that(readAvroFile).containsInAnyOrder(genericRecords);
    readPipeline.run();
}
Also used : FileFormatConversionOptions(com.google.cloud.teleport.v2.templates.FileFormatConversion.FileFormatConversionOptions) Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)63 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)25 FailsafeElement (com.google.cloud.teleport.v2.values.FailsafeElement)20 Pipeline (org.apache.beam.sdk.Pipeline)19 CoderRegistry (org.apache.beam.sdk.coders.CoderRegistry)19 BigQueryTable (com.google.cloud.teleport.v2.values.BigQueryTable)15 GenericRecord (org.apache.avro.generic.GenericRecord)12 Category (org.junit.experimental.categories.Category)12 Filter (com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter)10 BigQueryTablePartition (com.google.cloud.teleport.v2.values.BigQueryTablePartition)10 PubSubToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.PubSubToElasticsearchOptions)9 TableRow (com.google.api.services.bigquery.model.TableRow)8 DataplexClient (com.google.cloud.teleport.v2.clients.DataplexClient)8 FileFormatConversionOptions (com.google.cloud.teleport.v2.templates.FileFormatConversion.FileFormatConversionOptions)8 KV (org.apache.beam.sdk.values.KV)8 ArrayList (java.util.ArrayList)7 ElasticsearchWriteOptions (com.google.cloud.teleport.v2.elasticsearch.options.ElasticsearchWriteOptions)6 GCSToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.GCSToElasticsearchOptions)6 FileFormatConversionOptions (com.google.cloud.teleport.v2.templates.DataplexFileFormatConversion.FileFormatConversionOptions)6 PubSubProtoToBigQueryOptions (com.google.cloud.teleport.v2.templates.PubsubProtoToBigQuery.PubSubProtoToBigQueryOptions)6