Search in sources :

Example 76 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class DataStreamToBigQuery method run.

/**
 * Runs the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return The result of the pipeline execution.
 */
public static PipelineResult run(Options options) {
    /*
     * Stages:
     *   1) Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   2) Write JSON Strings to TableRow Collection
     *       - Optionally apply a UDF
     *   3) BigQuery Output of TableRow Data
     *     a) Map New Columns & Write to Staging Tables
     *     b) Map New Columns & Merge Staging to Target Table
     *   4) Write Failures to GCS Dead Letter Queue
     */
    Pipeline pipeline = Pipeline.create(options);
    DeadLetterQueueManager dlqManager = buildDlqManager(options);
    String bigqueryProjectId = getBigQueryProjectId(options);
    String dlqDirectory = dlqManager.getRetryDlqDirectoryWithDateTime();
    String tempDlqDir = dlqManager.getRetryDlqDirectory() + "tmp/";
    InputUDFToTableRow<String> failsafeTableRowTransformer = new InputUDFToTableRow<String>(options.getJavascriptTextTransformGcsPath(), options.getJavascriptTextTransformFunctionName(), options.getPythonTextTransformGcsPath(), options.getPythonTextTransformFunctionName(), options.getRuntimeRetries(), FAILSAFE_ELEMENT_CODER);
    StatefulRowCleaner statefulCleaner = StatefulRowCleaner.of();
    /*
     * Stage 1: Ingest and Normalize Data to FailsafeElement with JSON Strings
     *   a) Read DataStream data from GCS into JSON String FailsafeElements (datastreamJsonRecords)
     *   b) Reconsume Dead Letter Queue data from GCS into JSON String FailsafeElements
     *     (dlqJsonRecords)
     *   c) Flatten DataStream and DLQ Streams (jsonRecords)
     */
    PCollection<FailsafeElement<String, String>> datastreamJsonRecords = pipeline.apply(new DataStreamIO(options.getStreamName(), options.getInputFilePattern(), options.getInputFileFormat(), options.getGcsPubSubSubscription(), options.getRfcStartDateTime()).withFileReadConcurrency(options.getFileReadConcurrency()));
    // Elements sent to the Dead Letter Queue are to be reconsumed.
    // A DLQManager is to be created using PipelineOptions, and it is in charge
    // of building pieces of the DLQ.
    PCollection<FailsafeElement<String, String>> dlqJsonRecords = pipeline.apply("DLQ Consumer/reader", dlqManager.dlqReconsumer(options.getDlqRetryMinutes())).apply("DLQ Consumer/cleaner", ParDo.of(new DoFn<String, FailsafeElement<String, String>>() {

        @ProcessElement
        public void process(@Element String input, OutputReceiver<FailsafeElement<String, String>> receiver) {
            receiver.output(FailsafeElement.of(input, input));
        }
    })).setCoder(FAILSAFE_ELEMENT_CODER);
    PCollection<FailsafeElement<String, String>> jsonRecords = PCollectionList.of(datastreamJsonRecords).and(dlqJsonRecords).apply("Merge Datastream & DLQ", Flatten.pCollections());
    /*
     * Stage 2: Write JSON Strings to TableRow PCollectionTuple
     *   a) Optionally apply a Javascript or Python UDF
     *   b) Convert JSON String FailsafeElements to TableRow's (tableRowRecords)
     */
    PCollectionTuple tableRowRecords = jsonRecords.apply("UDF to TableRow/udf", failsafeTableRowTransformer);
    PCollectionTuple cleanedRows = tableRowRecords.get(failsafeTableRowTransformer.transformOut).apply("UDF to TableRow/Oracle Cleaner", statefulCleaner);
    PCollection<TableRow> shuffledTableRows = cleanedRows.get(statefulCleaner.successTag).apply("UDF to TableRow/ReShuffle", Reshuffle.<TableRow>viaRandomKey().withNumBuckets(100));
    /*
     * Stage 3: BigQuery Output of TableRow Data
     *   a) Map New Columns & Write to Staging Tables (writeResult)
     *   b) Map New Columns & Merge Staging to Target Table (null)
     *
     *   failsafe: writeResult.getFailedInsertsWithErr()
     */
    // TODO(beam 2.23): InsertRetryPolicy should be CDC compliant
    Set<String> fieldsToIgnore = getFieldsToIgnore(options.getIgnoreFields());
    WriteResult writeResult = shuffledTableRows.apply("Map to Staging Tables", new DataStreamMapper(options.as(GcpOptions.class), options.getOutputProjectId(), options.getOutputStagingDatasetTemplate(), options.getOutputStagingTableNameTemplate()).withDataStreamRootUrl(options.getDataStreamRootUrl()).withDefaultSchema(BigQueryDefaultSchemas.DATASTREAM_METADATA_SCHEMA).withDayPartitioning(true).withIgnoreFields(fieldsToIgnore)).apply("Write Successful Records", BigQueryIO.<KV<TableId, TableRow>>write().to(new BigQueryDynamicConverters().bigQueryDynamicDestination()).withFormatFunction(element -> removeTableRowFields(element.getValue(), fieldsToIgnore)).withFormatRecordOnFailureFunction(element -> element.getValue()).withoutValidation().ignoreInsertIds().withCreateDisposition(CreateDisposition.CREATE_NEVER).withWriteDisposition(WriteDisposition.WRITE_APPEND).withExtendedErrorInfo().withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS).withFailedInsertRetryPolicy(InsertRetryPolicy.retryTransientErrors()));
    if (options.getApplyMerge()) {
        shuffledTableRows.apply("Map To Replica Tables", new DataStreamMapper(options.as(GcpOptions.class), options.getOutputProjectId(), options.getOutputDatasetTemplate(), options.getOutputTableNameTemplate()).withDataStreamRootUrl(options.getDataStreamRootUrl()).withDefaultSchema(BigQueryDefaultSchemas.DATASTREAM_METADATA_SCHEMA).withIgnoreFields(fieldsToIgnore)).apply("BigQuery Merge/Build MergeInfo", new MergeInfoMapper(bigqueryProjectId, options.getOutputStagingDatasetTemplate(), options.getOutputStagingTableNameTemplate(), options.getOutputDatasetTemplate(), options.getOutputTableNameTemplate())).apply("BigQuery Merge/Merge into Replica Tables", BigQueryMerger.of(MergeConfiguration.bigQueryConfiguration().withMergeWindowDuration(Duration.standardMinutes(options.getMergeFrequencyMinutes()))));
    }
    /*
     * Stage 4: Write Failures to GCS Dead Letter Queue
     */
    PCollection<String> udfDlqJson = PCollectionList.of(tableRowRecords.get(failsafeTableRowTransformer.udfDeadletterOut)).and(tableRowRecords.get(failsafeTableRowTransformer.transformDeadletterOut)).apply("Transform Failures/Flatten", Flatten.pCollections()).apply("Transform Failures/Sanitize", MapElements.via(new StringDeadLetterQueueSanitizer()));
    PCollection<String> rowCleanerJson = cleanedRows.get(statefulCleaner.failureTag).apply("Transform Failures/Oracle Cleaner Failures", MapElements.via(new RowCleanerDeadLetterQueueSanitizer()));
    PCollection<String> bqWriteDlqJson = writeResult.getFailedInsertsWithErr().apply("BigQuery Failures", MapElements.via(new BigQueryDeadLetterQueueSanitizer()));
    PCollectionList.of(udfDlqJson).and(rowCleanerJson).and(bqWriteDlqJson).apply("Write To DLQ/Flatten", Flatten.pCollections()).apply("Write To DLQ/Writer", DLQWriteTransform.WriteDLQ.newBuilder().withDlqDirectory(dlqDirectory).withTmpDirectory(tempDlqDir).setIncludePaneInfo(true).build());
    // Execute the pipeline and return the result.
    return pipeline.run();
}
Also used : TableId(com.google.cloud.bigquery.TableId) PipelineResult(org.apache.beam.sdk.PipelineResult) TableId(com.google.cloud.bigquery.TableId) InsertRetryPolicy(org.apache.beam.sdk.io.gcp.bigquery.InsertRetryPolicy) LoggerFactory(org.slf4j.LoggerFactory) DLQWriteTransform(com.google.cloud.teleport.v2.transforms.DLQWriteTransform) InputUDFOptions(com.google.cloud.teleport.v2.transforms.UDFTextTransformer.InputUDFOptions) Description(org.apache.beam.sdk.options.Description) PCollectionList(org.apache.beam.sdk.values.PCollectionList) TableRow(com.google.api.services.bigquery.model.TableRow) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) BigQueryMerger(com.google.cloud.teleport.v2.cdc.merge.BigQueryMerger) Splitter(com.google.common.base.Splitter) Flatten(org.apache.beam.sdk.transforms.Flatten) MapElements(org.apache.beam.sdk.transforms.MapElements) DeadLetterQueueManager(com.google.cloud.teleport.v2.cdc.dlq.DeadLetterQueueManager) MergeInfoMapper(com.google.cloud.teleport.v2.cdc.mappers.MergeInfoMapper) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) StreamingOptions(org.apache.beam.sdk.options.StreamingOptions) Set(java.util.Set) CreateDisposition(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition) RowCleanerDeadLetterQueueSanitizer(com.google.cloud.teleport.v2.transforms.StatefulRowCleaner.RowCleanerDeadLetterQueueSanitizer) ParDo(org.apache.beam.sdk.transforms.ParDo) StringDeadLetterQueueSanitizer(com.google.cloud.teleport.v2.cdc.dlq.StringDeadLetterQueueSanitizer) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement) Pattern(java.util.regex.Pattern) MergeConfiguration(com.google.cloud.teleport.v2.cdc.merge.MergeConfiguration) KV(org.apache.beam.sdk.values.KV) DataStreamIO(com.google.cloud.teleport.v2.cdc.sources.DataStreamIO) Default(org.apache.beam.sdk.options.Default) Duration(org.joda.time.Duration) StatefulRowCleaner(com.google.cloud.teleport.v2.transforms.StatefulRowCleaner) BigQueryOptions(org.apache.beam.sdk.io.gcp.bigquery.BigQueryOptions) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) HashSet(java.util.HashSet) DataStreamMapper(com.google.cloud.teleport.v2.cdc.mappers.DataStreamMapper) FailsafeElementCoder(com.google.cloud.teleport.v2.coders.FailsafeElementCoder) BigQueryDefaultSchemas(com.google.cloud.teleport.v2.cdc.mappers.BigQueryDefaultSchemas) TupleTag(org.apache.beam.sdk.values.TupleTag) Pipeline(org.apache.beam.sdk.Pipeline) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) DoFn(org.apache.beam.sdk.transforms.DoFn) Reshuffle(org.apache.beam.sdk.transforms.Reshuffle) Logger(org.slf4j.Logger) BigQueryIO(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) WriteResult(org.apache.beam.sdk.io.gcp.bigquery.WriteResult) PCollection(org.apache.beam.sdk.values.PCollection) WriteDisposition(org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition) InputUDFToTableRow(com.google.cloud.teleport.v2.transforms.UDFTextTransformer.InputUDFToTableRow) DeadLetterQueueManager(com.google.cloud.teleport.v2.cdc.dlq.DeadLetterQueueManager) FailsafeElement(com.google.cloud.teleport.v2.values.FailsafeElement) GcpOptions(org.apache.beam.sdk.extensions.gcp.options.GcpOptions) RowCleanerDeadLetterQueueSanitizer(com.google.cloud.teleport.v2.transforms.StatefulRowCleaner.RowCleanerDeadLetterQueueSanitizer) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) StringDeadLetterQueueSanitizer(com.google.cloud.teleport.v2.cdc.dlq.StringDeadLetterQueueSanitizer) MergeInfoMapper(com.google.cloud.teleport.v2.cdc.mappers.MergeInfoMapper) Pipeline(org.apache.beam.sdk.Pipeline) WriteResult(org.apache.beam.sdk.io.gcp.bigquery.WriteResult) DataStreamMapper(com.google.cloud.teleport.v2.cdc.mappers.DataStreamMapper) DataStreamIO(com.google.cloud.teleport.v2.cdc.sources.DataStreamIO) StatefulRowCleaner(com.google.cloud.teleport.v2.transforms.StatefulRowCleaner) TableRow(com.google.api.services.bigquery.model.TableRow) InputUDFToTableRow(com.google.cloud.teleport.v2.transforms.UDFTextTransformer.InputUDFToTableRow) InputUDFToTableRow(com.google.cloud.teleport.v2.transforms.UDFTextTransformer.InputUDFToTableRow)

Example 77 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexBigQueryToGcsTest method testE2E_withTargetStrategyFail_andEnforceSamePartitionKeyEnabled_throwsException.

/**
 * Tests that the pipeline throws an exception if {@code writeDisposition = FAIL}, {@code
 * enforceSamePartitionKey = true}, and one of the target files exist, when processing a
 * partitioned table.
 *
 * <p>This is a special case because depending on the {@code enforceSamePartitionKey} param the
 * generated file path can be different (for partitioned tables only!), so this verifies that
 * {@link com.google.cloud.teleport.v2.utils.DataplexBigQueryToGcsFilter
 * DataplexBigQueryToGcsFilter} can find such files correctly.
 */
@Test
public void testE2E_withTargetStrategyFail_andEnforceSamePartitionKeyEnabled_throwsException() throws Exception {
    options.setFileFormat(FileFormatOptions.PARQUET);
    options.setWriteDisposition(WriteDispositionOptions.FAIL);
    options.setEnforceSamePartitionKey(true);
    writeOutputFile("partitioned_table/ts=p2", "output-partitioned_table-p2.parquet", "Test data");
    when(bqMock.query(any())).then(invocation -> {
        Iterable<FieldValueList> result = null;
        QueryJobConfiguration q = (QueryJobConfiguration) invocation.getArguments()[0];
        if (TABLE_QUERY_PATTERN.matcher(q.getQuery()).find()) {
            result = Collections.singletonList(fields("partitioned_table", "0", "ts"));
        } else if (PARTITION_QUERY_PATTERN.matcher(q.getQuery()).find()) {
            result = Arrays.asList(fields("p1", "0"), fields("p2", "0"));
        }
        when(tableResultMock.iterateAll()).thenReturn(result);
        return tableResultMock;
    });
    try {
        DataplexBigQueryToGcs.buildPipeline(options, metadataLoader, outDir.getAbsolutePath(), DatasetId.of(PROJECT, DATASET));
        fail("Expected a WriteDispositionException");
    } catch (Exception e) {
        assertThat(e).hasCauseThat().hasCauseThat().isInstanceOf(WriteDispositionException.class);
        assertThat(e).hasCauseThat().hasCauseThat().hasMessageThat().contains("Target File partitioned_table/ts=p2/output-partitioned_table-p2.parquet exists for" + " partitioned_table$p2.");
    }
}
Also used : FieldValueList(com.google.cloud.bigquery.FieldValueList) WriteDispositionException(com.google.cloud.teleport.v2.utils.WriteDisposition.WriteDispositionException) QueryJobConfiguration(com.google.cloud.bigquery.QueryJobConfiguration) FileNotFoundException(java.io.FileNotFoundException) WriteDispositionException(com.google.cloud.teleport.v2.utils.WriteDisposition.WriteDispositionException) IOException(java.io.IOException) Test(org.junit.Test)

Example 78 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexFileFormatConversionTest method testAssetWithEntityJsonToParquetFailOnExistingFilesE2E.

/**
 * Tests JSON to Parquet conversion for an asset with entity when one of the files already exists
 * and the existing file behaviour is FAIL.
 */
@Test(expected = RuntimeException.class)
@Category(NeedsRunner.class)
public void testAssetWithEntityJsonToParquetFailOnExistingFilesE2E() throws IOException {
    // setup Dataplex client to return entity 2
    DataplexClient dataplex = mock(DataplexClient.class);
    when(dataplex.getCloudStorageEntities(asset2.getName())).thenReturn(ImmutableList.of(entity2));
    when(dataplex.getPartitions(entity2.getName())).thenReturn(ImmutableList.of());
    when(dataplex.getAsset(outputAsset.getName())).thenReturn(outputAsset);
    // setup options to fail on existing files
    FileFormatConversionOptions options = PipelineOptionsFactory.create().as(FileFormatConversionOptions.class);
    options.setInputAssetOrEntitiesList(asset2.getName());
    options.setOutputFileFormat(FileFormatOptions.PARQUET);
    options.setOutputAsset(outputAsset.getName());
    options.setWriteDisposition(WriteDispositionOptions.FAIL);
    // simulate the 1.json -> 1.parquet conversion already happened
    copyFileToOutputBucket("entity2.existing/1.parquet", "entity2/1.parquet");
    // simulate the 2.json -> 2.parquet conversion already happened
    copyFileToOutputBucket("entity2.existing/1.parquet", "entity2/2.parquet");
    // run the pipeline, the job should fail because 1.parquet already exists
    DataplexFileFormatConversion.run(mainPipeline, options, dataplex, DataplexFileFormatConversionTest::outputPathProvider).waitUntilFinish();
}
Also used : DataplexClient(com.google.cloud.teleport.v2.clients.DataplexClient) FileFormatConversionOptions(com.google.cloud.teleport.v2.templates.DataplexFileFormatConversion.FileFormatConversionOptions) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 79 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexFileFormatConversionTest method testAssetWithEntityJsonToParquetSkipExistingFilesE2E.

/**
 * Tests JSON to Parquet conversion for an asset with entity when one of the files already exists
 * and the existing file behaviour is SKIP.
 */
@Test
@Category(NeedsRunner.class)
public void testAssetWithEntityJsonToParquetSkipExistingFilesE2E() throws IOException {
    // setup Dataplex client to return entity 2
    DataplexClient dataplex = mock(DataplexClient.class);
    when(dataplex.getCloudStorageEntities(asset2.getName())).thenReturn(ImmutableList.of(entity2));
    when(dataplex.getPartitions(entity2.getName())).thenReturn(ImmutableList.of());
    when(dataplex.getAsset(outputAsset.getName())).thenReturn(outputAsset);
    // setup options to skip existing files
    FileFormatConversionOptions options = PipelineOptionsFactory.create().as(FileFormatConversionOptions.class);
    options.setInputAssetOrEntitiesList(asset2.getName());
    options.setOutputFileFormat(FileFormatOptions.PARQUET);
    options.setOutputAsset(outputAsset.getName());
    options.setWriteDisposition(WriteDispositionOptions.SKIP);
    // simulate the 1.json -> 1.parquet conversion already happened
    copyFileToOutputBucket("entity2.existing/1.parquet", "entity2/1.parquet");
    // run the pipeline, only  2.json -> 2.parquet conversion should happen
    DataplexFileFormatConversion.run(mainPipeline, options, dataplex, DataplexFileFormatConversionTest::outputPathProvider);
    // read the conversion results
    PCollection<GenericRecord> readParquetFile = readPipeline.apply("ReadParquetFile", ParquetConverters.ReadParquetFile.newBuilder().withInputFileSpec(temporaryFolder.getRoot().getAbsolutePath() + "/**/*.parquet").withSerializedSchema(EXPECT_SERIALIZED_AVRO_SCHEMA).build());
    // expect old 1.parquet (from entity2.existing) and newly converted 2.parquet (from entity2)
    ImmutableList.Builder<GenericRecord> expected = ImmutableList.builder();
    Record record = new Record(EXPECTED_AVRO_SCHEMA);
    record.put("Word", "abc.existing");
    record.put("Number", 1);
    expected.add(record);
    record = new Record(EXPECTED_AVRO_SCHEMA);
    record.put("Word", "def");
    record.put("Number", 2);
    expected.add(record);
    record = new Record(EXPECTED_AVRO_SCHEMA);
    record.put("Word", "ghi");
    record.put("Number", 3);
    expected.add(record);
    PAssert.that(readParquetFile).containsInAnyOrder(expected.build());
    readPipeline.run();
}
Also used : DataplexClient(com.google.cloud.teleport.v2.clients.DataplexClient) ImmutableList(com.google.common.collect.ImmutableList) FileFormatConversionOptions(com.google.cloud.teleport.v2.templates.DataplexFileFormatConversion.FileFormatConversionOptions) Record(org.apache.avro.generic.GenericData.Record) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 80 with Options

use of com.google.cloud.teleport.v2.transforms.DeleteBigQueryDataFn.Options in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexFileFormatConversionTest method testAssetWithEntityJsonToGzippedParquetE2E.

/**
 * Tests JSON to Parquet conversion for an asset with entity using non-default compression.
 */
@Test
@Category(NeedsRunner.class)
public void testAssetWithEntityJsonToGzippedParquetE2E() throws IOException {
    DataplexClient dataplex = mock(DataplexClient.class);
    when(dataplex.getCloudStorageEntities(asset2.getName())).thenReturn(ImmutableList.of(entity2));
    when(dataplex.getPartitions(entity2.getName())).thenReturn(ImmutableList.of());
    when(dataplex.getAsset(outputAsset.getName())).thenReturn(outputAsset);
    FileFormatConversionOptions options = PipelineOptionsFactory.create().as(FileFormatConversionOptions.class);
    options.setInputAssetOrEntitiesList(asset2.getName());
    options.setOutputFileFormat(FileFormatOptions.PARQUET);
    options.setOutputAsset(outputAsset.getName());
    options.setOutputFileCompression(DataplexCompression.GZIP);
    DataplexFileFormatConversion.run(mainPipeline, options, dataplex, DataplexFileFormatConversionTest::outputPathProvider);
    PCollection<GenericRecord> readParquetFile = readPipeline.apply("ReadParquetFile", ParquetConverters.ReadParquetFile.newBuilder().withInputFileSpec(temporaryFolder.getRoot().getAbsolutePath() + "/**/*.parquet").withSerializedSchema(EXPECT_SERIALIZED_AVRO_SCHEMA).build());
    PAssert.that(readParquetFile).containsInAnyOrder(EXPECTED_GENERIC_RECORDS);
    readPipeline.run();
}
Also used : DataplexClient(com.google.cloud.teleport.v2.clients.DataplexClient) FileFormatConversionOptions(com.google.cloud.teleport.v2.templates.DataplexFileFormatConversion.FileFormatConversionOptions) GenericRecord(org.apache.avro.generic.GenericRecord) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)63 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)25 FailsafeElement (com.google.cloud.teleport.v2.values.FailsafeElement)20 Pipeline (org.apache.beam.sdk.Pipeline)19 CoderRegistry (org.apache.beam.sdk.coders.CoderRegistry)19 BigQueryTable (com.google.cloud.teleport.v2.values.BigQueryTable)15 GenericRecord (org.apache.avro.generic.GenericRecord)12 Category (org.junit.experimental.categories.Category)12 Filter (com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter)10 BigQueryTablePartition (com.google.cloud.teleport.v2.values.BigQueryTablePartition)10 PubSubToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.PubSubToElasticsearchOptions)9 TableRow (com.google.api.services.bigquery.model.TableRow)8 DataplexClient (com.google.cloud.teleport.v2.clients.DataplexClient)8 FileFormatConversionOptions (com.google.cloud.teleport.v2.templates.FileFormatConversion.FileFormatConversionOptions)8 KV (org.apache.beam.sdk.values.KV)8 ArrayList (java.util.ArrayList)7 ElasticsearchWriteOptions (com.google.cloud.teleport.v2.elasticsearch.options.ElasticsearchWriteOptions)6 GCSToElasticsearchOptions (com.google.cloud.teleport.v2.elasticsearch.options.GCSToElasticsearchOptions)6 FileFormatConversionOptions (com.google.cloud.teleport.v2.templates.DataplexFileFormatConversion.FileFormatConversionOptions)6 PubSubProtoToBigQueryOptions (com.google.cloud.teleport.v2.templates.PubsubProtoToBigQuery.PubSubProtoToBigQueryOptions)6