Search in sources :

Example 1 with DataplexBigQueryToGcsFilter

use of com.google.cloud.teleport.v2.utils.DataplexBigQueryToGcsFilter in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexBigQueryToGcs method buildPipeline.

/**
 * Builds the pipeline with the supplied options.
 *
 * @param options The execution parameters to the pipeline.
 * @return The resulting pipeline.
 */
@VisibleForTesting
static Pipeline buildPipeline(DataplexBigQueryToGcsOptions options, BigQueryMetadataLoader metadataLoader, String targetRootPath, DatasetId datasetId) throws ExecutionException, InterruptedException {
    Pipeline pipeline = Pipeline.create(options);
    List<String> existingTargetFiles = StorageUtils.getFilesInDirectory(targetRootPath);
    LOG.info("Loading BigQuery metadata...");
    List<BigQueryTable> tables = metadataLoader.loadDatasetMetadata(datasetId, new DataplexBigQueryToGcsFilter(options, existingTargetFiles));
    LOG.info("Loaded {} table(s).", tables.size());
    if (!tables.isEmpty()) {
        transformPipeline(pipeline, tables, options, targetRootPath, null, null);
    } else {
        pipeline.apply("Nothing to export", new NoopTransform());
    }
    return pipeline;
}
Also used : NoopTransform(com.google.cloud.teleport.v2.transforms.NoopTransform) BigQueryTable(com.google.cloud.teleport.v2.values.BigQueryTable) DataplexBigQueryToGcsFilter(com.google.cloud.teleport.v2.utils.DataplexBigQueryToGcsFilter) Pipeline(org.apache.beam.sdk.Pipeline) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 2 with DataplexBigQueryToGcsFilter

use of com.google.cloud.teleport.v2.utils.DataplexBigQueryToGcsFilter in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexBigQueryToGcsFilterTest method test_whenPartitionedTableHasNoPartitions_filterExcludesTable.

@Test
public void test_whenPartitionedTableHasNoPartitions_filterExcludesTable() {
    options.setTables(null);
    options.setExportDataModifiedBeforeDateTime(null);
    Filter f = new DataplexBigQueryToGcsFilter(options, new ArrayList<String>());
    assertThat(f.shouldSkipPartitionedTable(table(), Collections.emptyList())).isTrue();
}
Also used : Filter(com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter) Test(org.junit.Test)

Example 3 with DataplexBigQueryToGcsFilter

use of com.google.cloud.teleport.v2.utils.DataplexBigQueryToGcsFilter in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexBigQueryToGcsFilterTest method test_whenNoFilterOptions_filterAcceptsAllTablesAndPartitions.

@Test
public void test_whenNoFilterOptions_filterAcceptsAllTablesAndPartitions() {
    BigQueryTable.Builder t = table();
    BigQueryTablePartition p = partition().build();
    options.setTables(null);
    options.setExportDataModifiedBeforeDateTime(null);
    Filter f = new DataplexBigQueryToGcsFilter(options, new ArrayList<String>());
    assertThat(f.shouldSkipUnpartitionedTable(t)).isFalse();
    assertThat(f.shouldSkipPartitionedTable(t, Collections.singletonList(p))).isFalse();
    assertThat(f.shouldSkipPartition(t, p)).isFalse();
}
Also used : BigQueryTablePartition(com.google.cloud.teleport.v2.values.BigQueryTablePartition) Filter(com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter) BigQueryTable(com.google.cloud.teleport.v2.values.BigQueryTable) Test(org.junit.Test)

Example 4 with DataplexBigQueryToGcsFilter

use of com.google.cloud.teleport.v2.utils.DataplexBigQueryToGcsFilter in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexBigQueryToGcsTest method testE2E_withTargetStrategyFail_andEnforceSamePartitionKeyEnabled_throwsException.

/**
 * Tests that the pipeline throws an exception if {@code writeDisposition = FAIL}, {@code
 * enforceSamePartitionKey = true}, and one of the target files exist, when processing a
 * partitioned table.
 *
 * <p>This is a special case because depending on the {@code enforceSamePartitionKey} param the
 * generated file path can be different (for partitioned tables only!), so this verifies that
 * {@link com.google.cloud.teleport.v2.utils.DataplexBigQueryToGcsFilter
 * DataplexBigQueryToGcsFilter} can find such files correctly.
 */
@Test
public void testE2E_withTargetStrategyFail_andEnforceSamePartitionKeyEnabled_throwsException() throws Exception {
    options.setFileFormat(FileFormatOptions.PARQUET);
    options.setWriteDisposition(WriteDispositionOptions.FAIL);
    options.setEnforceSamePartitionKey(true);
    writeOutputFile("partitioned_table/ts=p2", "output-partitioned_table-p2.parquet", "Test data");
    when(bqMock.query(any())).then(invocation -> {
        Iterable<FieldValueList> result = null;
        QueryJobConfiguration q = (QueryJobConfiguration) invocation.getArguments()[0];
        if (TABLE_QUERY_PATTERN.matcher(q.getQuery()).find()) {
            result = Collections.singletonList(fields("partitioned_table", "0", "ts"));
        } else if (PARTITION_QUERY_PATTERN.matcher(q.getQuery()).find()) {
            result = Arrays.asList(fields("p1", "0"), fields("p2", "0"));
        }
        when(tableResultMock.iterateAll()).thenReturn(result);
        return tableResultMock;
    });
    try {
        DataplexBigQueryToGcs.buildPipeline(options, metadataLoader, outDir.getAbsolutePath(), DatasetId.of(PROJECT, DATASET));
        fail("Expected a WriteDispositionException");
    } catch (Exception e) {
        assertThat(e).hasCauseThat().hasCauseThat().isInstanceOf(WriteDispositionException.class);
        assertThat(e).hasCauseThat().hasCauseThat().hasMessageThat().contains("Target File partitioned_table/ts=p2/output-partitioned_table-p2.parquet exists for" + " partitioned_table$p2.");
    }
}
Also used : FieldValueList(com.google.cloud.bigquery.FieldValueList) WriteDispositionException(com.google.cloud.teleport.v2.utils.WriteDisposition.WriteDispositionException) QueryJobConfiguration(com.google.cloud.bigquery.QueryJobConfiguration) FileNotFoundException(java.io.FileNotFoundException) WriteDispositionException(com.google.cloud.teleport.v2.utils.WriteDisposition.WriteDispositionException) IOException(java.io.IOException) Test(org.junit.Test)

Example 5 with DataplexBigQueryToGcsFilter

use of com.google.cloud.teleport.v2.utils.DataplexBigQueryToGcsFilter in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexBigQueryToGcsFilterTest method test_whenTablesSet_filterExcludesTablesByName.

@Test
public void test_whenTablesSet_filterExcludesTablesByName() {
    BigQueryTable.Builder includedTable1 = table().setTableName("includedTable1");
    BigQueryTable.Builder includedTable2 = table().setTableName("includedTable2");
    BigQueryTable.Builder excludedTable = table().setTableName("excludedTable");
    BigQueryTablePartition p = partition().build();
    options.setTables("includedTable1,includedTable2");
    options.setExportDataModifiedBeforeDateTime(null);
    Filter f = new DataplexBigQueryToGcsFilter(options, new ArrayList<String>());
    assertThat(f.shouldSkipUnpartitionedTable(includedTable1)).isFalse();
    assertThat(f.shouldSkipUnpartitionedTable(includedTable2)).isFalse();
    assertThat(f.shouldSkipUnpartitionedTable(excludedTable)).isTrue();
    assertThat(f.shouldSkipPartitionedTable(includedTable1, Collections.singletonList(p))).isFalse();
    assertThat(f.shouldSkipPartitionedTable(includedTable2, Collections.singletonList(p))).isFalse();
    assertThat(f.shouldSkipPartitionedTable(excludedTable, Collections.singletonList(p))).isTrue();
    assertThat(f.shouldSkipPartition(includedTable1, p)).isFalse();
    assertThat(f.shouldSkipPartition(includedTable2, p)).isFalse();
    // Should NOT skip PARTITIONS, only tables as a whole because of their name:
    assertThat(f.shouldSkipPartition(excludedTable, p)).isFalse();
}
Also used : BigQueryTablePartition(com.google.cloud.teleport.v2.values.BigQueryTablePartition) Filter(com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter) BigQueryTable(com.google.cloud.teleport.v2.values.BigQueryTable) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)10 Filter (com.google.cloud.teleport.v2.utils.BigQueryMetadataLoader.Filter)9 BigQueryTable (com.google.cloud.teleport.v2.values.BigQueryTable)9 BigQueryTablePartition (com.google.cloud.teleport.v2.values.BigQueryTablePartition)5 FieldValueList (com.google.cloud.bigquery.FieldValueList)1 QueryJobConfiguration (com.google.cloud.bigquery.QueryJobConfiguration)1 NoopTransform (com.google.cloud.teleport.v2.transforms.NoopTransform)1 DataplexBigQueryToGcsFilter (com.google.cloud.teleport.v2.utils.DataplexBigQueryToGcsFilter)1 WriteDispositionException (com.google.cloud.teleport.v2.utils.WriteDisposition.WriteDispositionException)1 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 Pipeline (org.apache.beam.sdk.Pipeline)1