Search in sources :

Example 1 with DataStreamIO

use of com.google.cloud.teleport.v2.cdc.sources.DataStreamIO in project DataflowTemplates by GoogleCloudPlatform.

the class DataStreamToSQL method run.

/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return The result of the pipeline execution.
 */
public static PipelineResult run(Options options) {
    /*
     * Stages:
     *   1) Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   2) Write JSON Strings to SQL DML Objects
     *   3) Filter stale rows using stateful PK transform
     *   4) Write DML statements to SQL Database via jdbc
     */
    Pipeline pipeline = Pipeline.create(options);
    CdcJdbcIO.DataSourceConfiguration dataSourceConfiguration = getDataSourceConfiguration(options);
    validateOptions(options, dataSourceConfiguration);
    Map<String, String> schemaMap = parseSchemaMap(options.getSchemaMap());
    /*
     * Stage 1: Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   a) Read DataStream data from GCS into JSON String FailsafeElements (datastreamJsonRecords)
     */
    PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getGcsPubSubSubscription(), options.getRfcStartDateTime()).withLowercaseSourceColumns().withHashColumnValue("_metadata_row_id", "rowid"));
    /*
     * Stage 2: Write JSON Strings to SQL Insert Strings
     *   a) Convert JSON String FailsafeElements to TableRow's (tableRowRecords)
     * Stage 3) Filter stale rows using stateful PK transform
     */
    PCollection<DmlInfo> dmlStatements = datastreamJsonRecords.apply("Format to DML", CreateDml.of(dataSourceConfiguration).withSchemaMap(schemaMap)).apply("DML Stateful Processing", ProcessDml.statefulOrderByPK());
    /*
     * Stage 4: Write Inserts to CloudSQL
     */
    dmlStatements.apply("Write to SQL", CdcJdbcIO.<DmlInfo>write().withDataSourceConfiguration(dataSourceConfiguration).withStatementFormatter(new CdcJdbcIO.StatementFormatter<DmlInfo>() {

        public String formatStatement(DmlInfo element) {
            return element.getDmlSql();
        }
    }));
    // Execute the pipeline and return the result.
    return pipeline.run();
}
Also used : DataStreamIO(com.google.cloud.teleport.v2.cdc.sources.DataStreamIO) DmlInfo(com.google.cloud.teleport.v2.values.DmlInfo) CdcJdbcIO(com.google.cloud.teleport.v2.io.CdcJdbcIO) Pipeline(org.apache.beam.sdk.Pipeline) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement)

Example 2 with DataStreamIO

use of com.google.cloud.teleport.v2.cdc.sources.DataStreamIO in project DataflowTemplates by GoogleCloudPlatform.

the class DataStreamToSpanner method run.

/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return The result of the pipeline execution.
 */
public static PipelineResult run(Options options) {
    /*
     * Stages:
     *   1) Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   2) Write JSON Strings to Cloud Spanner
     *   3) Write Failures to GCS Dead Letter Queue
     */
    Pipeline pipeline = Pipeline.create(options);
    DeadLetterQueueManager dlqManager = buildDlqManager(options);
    /*
     * Stage 1: Ingest/Normalize Data to FailsafeElement with JSON Strings and
     * read Cloud Spanner information schema.
     *   a) Prepare spanner config and process information schema
     *   b) Read DataStream data from GCS into JSON String FailsafeElements
     *   c) Reconsume Dead Letter Queue data from GCS into JSON String FailsafeElements
     *   d) Flatten DataStream and DLQ Streams
     */
    // Prepare Spanner config
    SpannerConfig spannerConfig = ExposedSpannerConfig.create().withHost(ValueProvider.StaticValueProvider.of(options.getSpannerHost())).withInstanceId(ValueProvider.StaticValueProvider.of(options.getInstanceId())).withDatabaseId(ValueProvider.StaticValueProvider.of(options.getDatabaseId()));
    /* Process information schema
     * 1) Read information schema from destination Cloud Spanner database
     * 2) Check if shadow tables are present and create if necessary
     * 3) Return new information schema
     */
    PCollection<Ddl> ddl = pipeline.apply("Process Information Schema", new ProcessInformationSchema(spannerConfig, options.getShouldCreateShadowTables(), options.getShadowTablePrefix(), options.getDatastreamSourceType()));
    PCollectionView<Ddl> ddlView = ddl.apply("Cloud Spanner DDL as view", View.asSingleton());
    PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getGcsPubSubSubscription(), options.getRfcStartDateTime()).withFileReadConcurrency(options.getFileReadConcurrency()));
    // Elements sent to the Dead Letter Queue are to be reconsumed.
    // A DLQManager is to be created using PipelineOptions, and it is in charge
    // of building pieces of the DLQ.
    PCollectionTuple reconsumedElements = dlqManager.getReconsumerDataTransform(pipeline.apply(dlqManager.dlqReconsumer(options.getDlqRetryMinutes())));
    PCollection<FailsafeElement<String, String>> dlqJsonRecords = reconsumedElements.get(DeadLetterQueueManager.RETRYABLE_ERRORS).setCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()));
    PCollection<FailsafeElement<String, String>> jsonRecords = PCollectionList.of(datastreamJsonRecords).and(dlqJsonRecords).apply(Flatten.pCollections()).apply("Reshuffle", Reshuffle.viaRandomKey());
    /*
     * Stage 2: Write records to Cloud Spanner
     */
    SpannerTransactionWriter.Result spannerWriteResults = jsonRecords.apply("Write events to Cloud Spanner", new SpannerTransactionWriter(spannerConfig, ddlView, options.getShadowTablePrefix(), options.getDatastreamSourceType()));
    /*
     * Stage 3: Write failures to GCS Dead Letter Queue
     * a) Retryable errors are written to retry GCS Dead letter queue
     * b) Severe errors are written to severe GCS Dead letter queue
     */
    spannerWriteResults.retryableErrors().apply("DLQ: Write retryable Failures to GCS", MapElements.via(new StringDeadLetterQueueSanitizer())).setCoder(StringUtf8Coder.of()).apply("Write To DLQ", DLQWriteTransform.WriteDLQ.newBuilder().withDlqDirectory(dlqManager.getRetryDlqDirectoryWithDateTime()).withTmpDirectory(dlqManager.getRetryDlqDirectory() + "tmp/").build());
    PCollection<FailsafeElement<String, String>> dlqErrorRecords = reconsumedElements.get(DeadLetterQueueManager.PERMANENT_ERRORS).setCoder(FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()));
    PCollection<FailsafeElement<String, String>> permanentErrors = PCollectionList.of(dlqErrorRecords).and(spannerWriteResults.permanentErrors()).apply(Flatten.pCollections()).apply("Reshuffle", Reshuffle.viaRandomKey());
    permanentErrors.apply("DLQ: Write Severe errors to GCS", MapElements.via(new StringDeadLetterQueueSanitizer())).setCoder(StringUtf8Coder.of()).apply("Write To DLQ", DLQWriteTransform.WriteDLQ.newBuilder().withDlqDirectory(dlqManager.getSevereDlqDirectoryWithDateTime()).withTmpDirectory(dlqManager.getSevereDlqDirectory() + "tmp/").build());
    // Execute the pipeline and return the result.
    return pipeline.run();
}
Also used : SpannerConfig(org.apache.beam.sdk.io.gcp.spanner.SpannerConfig) ExposedSpannerConfig(org.apache.beam.sdk.io.gcp.spanner.ExposedSpannerConfig) DeadLetterQueueManager(com.google.cloud.teleport.v2.cdc.dlq.DeadLetterQueueManager) Ddl(com.google.cloud.teleport.v2.templates.spanner.ddl.Ddl) Pipeline(org.apache.beam.sdk.Pipeline) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement) DataStreamIO(com.google.cloud.teleport.v2.cdc.sources.DataStreamIO) ProcessInformationSchema(com.google.cloud.teleport.v2.templates.spanner.ProcessInformationSchema) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) StringDeadLetterQueueSanitizer(com.google.cloud.teleport.v2.cdc.dlq.StringDeadLetterQueueSanitizer)

Example 3 with DataStreamIO

use of com.google.cloud.teleport.v2.cdc.sources.DataStreamIO in project DataflowTemplates by GoogleCloudPlatform.

the class DataStreamToMongoDB method run.

/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return  The result of the pipeline execution.
 */
public static PipelineResult run(Options options) {
    /*
     * Stages:
     *   1) Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   2) Push the data to MongoDB
     */
    Pipeline pipeline = Pipeline.create(options);
    /*
     * Stage 1: Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   a) Read DataStream data from GCS into JSON String FailsafeElements (datastreamJsonRecords)
     */
    PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getInputSubscription(), options.getRfcStartDateTime()).withFileReadConcurrency(options.getFileReadConcurrency()));
    PCollection<FailsafeElement<String, String>> jsonRecords = PCollectionList.of(datastreamJsonRecords).apply(Flatten.pCollections());
    /**
     * Does below steps:
     * 1. Converts JSON to BSON documents.
     * 2. Removes the metadata fileds.
     * 3. Inserts the data into MongoDB collections.
     */
    jsonRecords.apply("jsonToDocuments", MapElements.via(new SimpleFunction<FailsafeElement<String, String>, Document>() {

        @Override
        public Document apply(FailsafeElement<String, String> jsonString) {
            String s = jsonString.getOriginalPayload();
            Document doc = Document.parse(s);
            return removeTableRowFields(doc, MAPPER_IGNORE_FIELDS);
        }
    })).apply("Write To MongoDB", MongoDbIO.write().withUri(options.getMongoDBUri()).withDatabase(options.getDatabase()).withCollection(options.getCollection()));
    // Execute the pipeline and return the result.
    return pipeline.run();
}
Also used : DataStreamIO(com.google.cloud.teleport.v2.cdc.sources.DataStreamIO) Document(org.bson.Document) Pipeline(org.apache.beam.sdk.Pipeline) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement)

Example 4 with DataStreamIO

use of com.google.cloud.teleport.v2.cdc.sources.DataStreamIO in project DataflowTemplates by GoogleCloudPlatform.

the class DataStreamToPostgres method run.

/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return The result of the pipeline execution.
 */
public static PipelineResult run(Options options) {
    /*
     * Stages:
     *   1) Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   2) Write JSON Strings to Postgres DML Objects
     *   3) Filter stale rows using stateful PK transform
     *   4) Write DML statements to Postgres
     */
    Pipeline pipeline = Pipeline.create(options);
    String jdbcDriverConnectionString = String.format("jdbc:postgresql://%s:%s/%s", options.getDatabaseHost(), options.getDatabasePort(), options.getDatabaseName());
    CdcJdbcIO.DataSourceConfiguration dataSourceConfiguration = CdcJdbcIO.DataSourceConfiguration.create("org.postgresql.Driver", jdbcDriverConnectionString).withUsername(options.getDatabaseUser()).withPassword(options.getDatabasePassword()).withMaxIdleConnections(new Integer(0));
    validateOptions(options, dataSourceConfiguration);
    /*
     * Stage 1: Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   a) Read DataStream data from GCS into JSON String FailsafeElements (datastreamJsonRecords)
     */
    PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getGcsPubSubSubscription(), options.getRfcStartDateTime()).withLowercaseSourceColumns().withHashColumnValue("_metadata_row_id", "rowid"));
    /*
     * Stage 2: Write JSON Strings to Postgres Insert Strings
     *   a) Convert JSON String FailsafeElements to TableRow's (tableRowRecords)
     * Stage 3) Filter stale rows using stateful PK transform
     */
    PCollection<DmlInfo> dmlStatements = datastreamJsonRecords.apply("Format to Postgres DML", CreateDml.createDmlObjects(dataSourceConfiguration)).apply("DML Stateful Processing", ProcessDml.statefulOrderByPK());
    /*
     * Stage 4: Write Inserts to CloudSQL
     */
    dmlStatements.apply("Write to Postgres", CdcJdbcIO.<DmlInfo>write().withDataSourceConfiguration(dataSourceConfiguration).withStatementFormatter(new CdcJdbcIO.StatementFormatter<DmlInfo>() {

        public String formatStatement(DmlInfo element) {
            return element.getDmlSql();
        }
    }));
    // Execute the pipeline and return the result.
    return pipeline.run();
}
Also used : DataStreamIO(com.google.cloud.teleport.v2.cdc.sources.DataStreamIO) DmlInfo(com.google.cloud.teleport.v2.values.DmlInfo) CdcJdbcIO(com.google.cloud.teleport.v2.io.CdcJdbcIO) Pipeline(org.apache.beam.sdk.Pipeline) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement)

Example 5 with DataStreamIO

use of com.google.cloud.teleport.v2.cdc.sources.DataStreamIO in project DataflowTemplates by GoogleCloudPlatform.

the class DataStreamToBigQuery method run.

/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return The result of the pipeline execution.
 */
public static PipelineResult run(Options options) {
    /*
     * Stages:
     *   1) Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   2) Write JSON Strings to TableRow Collection
     *       - Optionally apply a UDF
     *   3) BigQuery Output of TableRow Data
     *     a) Map New Columns & Write to Staging Tables
     *     b) Map New Columns & Merge Staging to Target Table
     *   4) Write Failures to GCS Dead Letter Queue
     */
    Pipeline pipeline = Pipeline.create(options);
    DeadLetterQueueManager dlqManager = buildDlqManager(options);
    String bigqueryProjectId = getBigQueryProjectId(options);
    String dlqDirectory = dlqManager.getRetryDlqDirectoryWithDateTime();
    String tempDlqDir = dlqManager.getRetryDlqDirectory() + "tmp/";
    InputUDFToTableRow<String> failsafeTableRowTransformer = new InputUDFToTableRow<String>(options.getJavascriptTextTransformGcsPath(), options.getJavascriptTextTransformFunctionName(), options.getPythonTextTransformGcsPath(), options.getPythonTextTransformFunctionName(), options.getRuntimeRetries(), FAILSAFE_ELEMENT_CODER);
    /*
     * Stage 1: Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   a) Read DataStream data from GCS into JSON String FailsafeElements (datastreamJsonRecords)
     *   b) Reconsume Dead Letter Queue data from GCS into JSON String FailsafeElements
     *     (dlqJsonRecords)
     *   c) Flatten DataStream and DLQ Streams (jsonRecords)
     */
    PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getGcsPubSubSubscription(), options.getRfcStartDateTime()).withFileReadConcurrency(options.getFileReadConcurrency()));
    // Elements sent to the Dead Letter Queue are to be reconsumed.
    // A DLQManager is to be created using PipelineOptions, and it is in charge
    // of building pieces of the DLQ.
    PCollection<FailsafeElement<String, String>> dlqJsonRecords = pipeline.apply("DLQ Consumer/reader", dlqManager.dlqReconsumer(options.getDlqRetryMinutes())).apply("DLQ Consumer/cleaner", ParDo.of(new DoFn<String, FailsafeElement<String, String>>() {

        @ProcessElement
        public void process(@Element String input, OutputReceiver<FailsafeElement<String, String>> receiver) {
            receiver.output(FailsafeElement.of(input, input));
        }
    })).setCoder(FAILSAFE_ELEMENT_CODER);
    PCollection<FailsafeElement<String, String>> jsonRecords = PCollectionList.of(datastreamJsonRecords).and(dlqJsonRecords).apply("Merge Datastream & DLQ", Flatten.pCollections());
    /*
     * Stage 2: Write JSON Strings to TableRow PCollectionTuple
     *   a) Optionally apply a Javascript or Python UDF
     *   b) Convert JSON String FailsafeElements to TableRow's (tableRowRecords)
     */
    PCollectionTuple tableRowRecords = jsonRecords.apply("UDF to TableRow/udf", failsafeTableRowTransformer);
    PCollection<TableRow> shuffledTableRows = tableRowRecords.get(failsafeTableRowTransformer.transformOut).apply("UDF to TableRow/ReShuffle", Reshuffle.<TableRow>viaRandomKey().withNumBuckets(100));
    /*
     * Stage 3: BigQuery Output of TableRow Data
     *   a) Map New Columns & Write to Staging Tables (writeResult)
     *   b) Map New Columns & Merge Staging to Target Table (null)
     *
     *   failsafe: writeResult.getFailedInsertsWithErr()
     */
    // TODO(beam 2.23): InsertRetryPolicy should be CDC compliant
    Set<String> fieldsToIgnore = getFieldsToIgnore(options.getIgnoreFields());
    WriteResult writeResult = shuffledTableRows.apply("Map to Staging Tables", new DataStreamMapper(options.as(GcpOptions.class), options.getOutputProjectId(), options.getOutputStagingDatasetTemplate(), options.getOutputStagingTableNameTemplate()).withDataStreamRootUrl(options.getDataStreamRootUrl()).withDefaultSchema(BigQueryDefaultSchemas.DATASTREAM_METADATA_SCHEMA).withDayPartitioning(true).withIgnoreFields(fieldsToIgnore)).apply("Write Successful Records", BigQueryIO.<KV<TableId, TableRow>>write().to(new BigQueryDynamicConverters().bigQueryDynamicDestination()).withFormatFunction(element -> removeTableRowFields(element.getValue(), fieldsToIgnore)).withFormatRecordOnFailureFunction(element -> element.getValue()).withoutValidation().ignoreInsertIds().withCreateDisposition(CreateDisposition.CREATE_NEVER).withWriteDisposition(WriteDisposition.WRITE_APPEND).withExtendedErrorInfo().withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS).withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors()));
    if (options.getApplyMerge()) {
        shuffledTableRows.apply("Map To Replica Tables", new DataStreamMapper(options.as(GcpOptions.class), options.getOutputProjectId(), options.getOutputDatasetTemplate(), options.getOutputTableNameTemplate()).withDataStreamRootUrl(options.getDataStreamRootUrl()).withDefaultSchema(BigQueryDefaultSchemas.DATASTREAM_METADATA_SCHEMA).withIgnoreFields(fieldsToIgnore)).apply("BigQuery Merge/Build MergeInfo", new MergeInfoMapper(bigqueryProjectId, options.getOutputStagingDatasetTemplate(), options.getOutputStagingTableNameTemplate(), options.getOutputDatasetTemplate(), options.getOutputTableNameTemplate())).apply("BigQuery Merge/Merge into Replica Tables", BigQueryMerger.of(MergeConfiguration.bigQueryConfiguration().withMergeWindowDuration(Duration.standardMinutes(options.getMergeFrequencyMinutes()))));
    }
    /*
     * Stage 4: Write Failures to GCS Dead Letter Queue
     */
    PCollection<String> udfDlqJson = PCollectionList.of(tableRowRecords.get(failsafeTableRowTransformer.udfDeadletterOut)).and(tableRowRecords.get(failsafeTableRowTransformer.transformDeadletterOut)).apply("UDF Failures/Flatten", Flatten.pCollections()).apply("UDF Failures/Sanitize", MapElements.via(new StringDeadLetterQueueSanitizer()));
    PCollection<String> bqWriteDlqJson = writeResult.getFailedInsertsWithErr().apply("BigQuery Failures", MapElements.via(new BigQueryDeadLetterQueueSanitizer()));
    PCollectionList.of(udfDlqJson).and(bqWriteDlqJson).apply("Write To DLQ/Flatten", Flatten.pCollections()).apply("Write To DLQ/Writer", DLQWriteTransform.WriteDLQ.newBuilder().withDlqDirectory(dlqDirectory).withTmpDirectory(tempDlqDir).build());
    // Execute the pipeline and return the result.
    return pipeline.run();
}
Also used : TableId(com.google.cloud.bigquery.TableId) KV(org.apache.beam.sdk.values.KV) DataStreamIO(com.google.cloud.teleport.v2.cdc.sources.DataStreamIO) PipelineResult(org.apache.beam.sdk.PipelineResult) Default(org.apache.beam.sdk.options.Default) TableId(com.google.cloud.bigquery.TableId) InsertRetryPolicy(org.apache.beam.sdk.io.gcp.bigquery.InsertRetryPolicy) Duration(org.joda.time.Duration) LoggerFactory(org.slf4j.LoggerFactory) BigQueryOptions(org.apache.beam.sdk.io.gcp.bigquery.BigQueryOptions) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) DLQWriteTransform(com.google.cloud.teleport.v2.transforms.DLQWriteTransform) InputUDFOptions(com.google.cloud.teleport.v2.transforms.UDFTextTransformer.InputUDFOptions) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) Description(org.apache.beam.sdk.options.Description) HashSet(java.util.HashSet) PCollectionList(org.apache.beam.sdk.values.PCollectionList) DataStreamMapper(com.google.cloud.teleport.v2.cdc.mappers.DataStreamMapper) FailsafeElementCoder(com.google.cloud.teleport.v2.coders.FailsafeElementCoder) BigQueryDefaultSchemas(com.google.cloud.teleport.v2.cdc.mappers.BigQueryDefaultSchemas) TupleTag(org.apache.beam.sdk.values.TupleTag) TableRow(com.google.api.services.bigquery.model.TableRow) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) BigQueryMerger(com.google.cloud.teleport.v2.cdc.merge.BigQueryMerger) Pipeline(org.apache.beam.sdk.Pipeline) Splitter(com.google.common.base.Splitter) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Flatten(org.apache.beam.sdk.transforms.Flatten) DoFn(org.apache.beam.sdk.transforms.DoFn) MapElements(org.apache.beam.sdk.transforms.MapElements) Reshuffle(org.apache.beam.sdk.transforms.Reshuffle) DeadLetterQueueManager(com.google.cloud.teleport.v2.cdc.dlq.DeadLetterQueueManager) Logger(org.slf4j.Logger) MergeInfoMapper(com.google.cloud.teleport.v2.cdc.mappers.MergeInfoMapper) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) BigQueryIO(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO) StreamingOptions(org.apache.beam.sdk.options.StreamingOptions) Set(java.util.Set) CreateDisposition(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) WriteResult(org.apache.beam.sdk.io.gcp.bigquery.WriteResult) PCollection(org.apache.beam.sdk.values.PCollection) ParDo(org.apache.beam.sdk.transforms.ParDo) WriteDisposition(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition) StringDeadLetterQueueSanitizer(com.google.cloud.teleport.v2.cdc.dlq.StringDeadLetterQueueSanitizer) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement) Pattern(java.util.regex.Pattern) InputUDFToTableRow(com.google.cloud.teleport.v2.transforms.UDFTextTransformer.InputUDFToTableRow) MergeConfiguration(com.google.cloud.teleport.v2.cdc.merge.MergeConfiguration) DeadLetterQueueManager(com.google.cloud.teleport.v2.cdc.dlq.DeadLetterQueueManager) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) StringDeadLetterQueueSanitizer(com.google.cloud.teleport.v2.cdc.dlq.StringDeadLetterQueueSanitizer) MergeInfoMapper(com.google.cloud.teleport.v2.cdc.mappers.MergeInfoMapper) Pipeline(org.apache.beam.sdk.Pipeline) WriteResult(org.apache.beam.sdk.io.gcp.bigquery.WriteResult) DataStreamMapper(com.google.cloud.teleport.v2.cdc.mappers.DataStreamMapper) DataStreamIO(com.google.cloud.teleport.v2.cdc.sources.DataStreamIO) TableRow(com.google.api.services.bigquery.model.TableRow) InputUDFToTableRow(com.google.cloud.teleport.v2.transforms.UDFTextTransformer.InputUDFToTableRow) InputUDFToTableRow(com.google.cloud.teleport.v2.transforms.UDFTextTransformer.InputUDFToTableRow)

Aggregations

DataStreamIO (com.google.cloud.teleport.v2.cdc.sources.DataStreamIO)5 FailsafeElement (com.google.cloud.teleport.v2.values.FailsafeElement)5 Pipeline (org.apache.beam.sdk.Pipeline)5 DeadLetterQueueManager (com.google.cloud.teleport.v2.cdc.dlq.DeadLetterQueueManager)2 StringDeadLetterQueueSanitizer (com.google.cloud.teleport.v2.cdc.dlq.StringDeadLetterQueueSanitizer)2 CdcJdbcIO (com.google.cloud.teleport.v2.io.CdcJdbcIO)2 DmlInfo (com.google.cloud.teleport.v2.values.DmlInfo)2 TableRow (com.google.api.services.bigquery.model.TableRow)1 TableId (com.google.cloud.bigquery.TableId)1 BigQueryDefaultSchemas (com.google.cloud.teleport.v2.cdc.mappers.BigQueryDefaultSchemas)1 DataStreamMapper (com.google.cloud.teleport.v2.cdc.mappers.DataStreamMapper)1 MergeInfoMapper (com.google.cloud.teleport.v2.cdc.mappers.MergeInfoMapper)1 BigQueryMerger (com.google.cloud.teleport.v2.cdc.merge.BigQueryMerger)1 MergeConfiguration (com.google.cloud.teleport.v2.cdc.merge.MergeConfiguration)1 FailsafeElementCoder (com.google.cloud.teleport.v2.coders.FailsafeElementCoder)1 ProcessInformationSchema (com.google.cloud.teleport.v2.templates.spanner.ProcessInformationSchema)1 Ddl (com.google.cloud.teleport.v2.templates.spanner.ddl.Ddl)1 DLQWriteTransform (com.google.cloud.teleport.v2.transforms.DLQWriteTransform)1 InputUDFOptions (com.google.cloud.teleport.v2.transforms.UDFTextTransformer.InputUDFOptions)1 InputUDFToTableRow (com.google.cloud.teleport.v2.transforms.UDFTextTransformer.InputUDFToTableRow)1