Search in sources :

Example 1 with WriteDispositionException

use of com.google.cloud.teleport.v2.utils.JdbcIngestionWriteDisposition.WriteDispositionException in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexFileFormatConversion method run.

/**
 * Runs the pipeline to completion with the specified options.
 *
 * @return The pipeline result.
 */
public static PipelineResult run(Pipeline pipeline, FileFormatConversionOptions options, DataplexClient dataplex, OutputPathProvider outputPathProvider) throws IOException {
    boolean isInputAsset = ASSET_PATTERN.matcher(options.getInputAssetOrEntitiesList()).matches();
    if (!isInputAsset && !ENTITIES_PATTERN.matcher(options.getInputAssetOrEntitiesList()).matches()) {
        throw new IllegalArgumentException("Either input asset or input entities list must be provided");
    }
    GoogleCloudDataplexV1Asset outputAsset = dataplex.getAsset(options.getOutputAsset());
    if (outputAsset == null || outputAsset.getResourceSpec() == null || !DataplexAssetResourceSpec.STORAGE_BUCKET.name().equals(outputAsset.getResourceSpec().getType()) || outputAsset.getResourceSpec().getName() == null) {
        throw new IllegalArgumentException("Output asset must be an existing asset with resource spec name being a GCS bucket and" + " resource spec type of " + DataplexAssetResourceSpec.STORAGE_BUCKET.name());
    }
    String outputBucket = outputAsset.getResourceSpec().getName();
    Predicate<String> inputFilesFilter;
    switch(options.getWriteDisposition()) {
        case OVERWRITE:
            inputFilesFilter = inputFilePath -> true;
            break;
        case FAIL:
            Set<String> outputFilePaths = getAllOutputFilePaths(outputBucket);
            inputFilesFilter = inputFilePath -> {
                if (outputFilePaths.contains(inputFilePathToOutputFilePath(outputPathProvider, inputFilePath, outputBucket, options.getOutputFileFormat()))) {
                    throw new WriteDispositionException(String.format("The file %s already exists in the output asset bucket: %s", inputFilePath, outputBucket));
                }
                return true;
            };
            break;
        case SKIP:
            outputFilePaths = getAllOutputFilePaths(outputBucket);
            inputFilesFilter = inputFilePath -> !outputFilePaths.contains(inputFilePathToOutputFilePath(outputPathProvider, inputFilePath, outputBucket, options.getOutputFileFormat()));
            break;
        default:
            throw new UnsupportedOperationException("Unsupported existing file behaviour: " + options.getWriteDisposition());
    }
    ImmutableList<GoogleCloudDataplexV1Entity> entities = isInputAsset ? dataplex.getCloudStorageEntities(options.getInputAssetOrEntitiesList()) : dataplex.getEntities(Splitter.on(',').trimResults().splitToList(options.getInputAssetOrEntitiesList()));
    boolean convertingFiles = false;
    for (GoogleCloudDataplexV1Entity entity : entities) {
        ImmutableList<GoogleCloudDataplexV1Partition> partitions = dataplex.getPartitions(entity.getName());
        if (partitions.isEmpty()) {
            String outputPath = outputPathProvider.outputPathFrom(entity.getDataPath(), outputBucket);
            Iterator<String> inputFilePaths = getFilesFromFilePattern(entityToFileSpec(entity)).filter(inputFilesFilter).iterator();
            convertingFiles = inputFilePaths.hasNext();
            inputFilePaths.forEachRemaining(inputFilePath -> pipeline.apply("Convert " + shortenDataplexName(entity.getName()), new ConvertFiles(entity, inputFilePath, options, outputPath)));
        } else {
            for (GoogleCloudDataplexV1Partition partition : partitions) {
                String outputPath = outputPathProvider.outputPathFrom(partition.getLocation(), outputBucket);
                Iterator<String> inputFilePaths = getFilesFromFilePattern(partitionToFileSpec(partition)).filter(inputFilesFilter).iterator();
                convertingFiles = inputFilePaths.hasNext();
                inputFilePaths.forEachRemaining(inputFilePath -> pipeline.apply("Convert " + shortenDataplexName(partition.getName()), new ConvertFiles(entity, inputFilePath, options, outputPath)));
            }
        }
    }
    if (!convertingFiles) {
        pipeline.apply("Nothing to convert", new NoopTransform());
    }
    return pipeline.run();
}
Also used : NoopTransform(com.google.cloud.teleport.v2.transforms.NoopTransform) GoogleCloudDataplexV1Partition(com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Partition) WriteDispositionException(com.google.cloud.teleport.v2.utils.WriteDisposition.WriteDispositionException) GoogleCloudDataplexV1Asset(com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Asset) GoogleCloudDataplexV1Entity(com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Entity)

Example 2 with WriteDispositionException

use of com.google.cloud.teleport.v2.utils.JdbcIngestionWriteDisposition.WriteDispositionException in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexBigQueryToGcsTest method testE2E_withTargetStrategyFail_andEnforceSamePartitionKeyEnabled_throwsException.

/**
 * Tests that the pipeline throws an exception if {@code writeDisposition = FAIL}, {@code
 * enforceSamePartitionKey = true}, and one of the target files exist, when processing a
 * partitioned table.
 *
 * <p>This is a special case because depending on the {@code enforceSamePartitionKey} param the
 * generated file path can be different (for partitioned tables only!), so this verifies that
 * {@link com.google.cloud.teleport.v2.utils.DataplexBigQueryToGcsFilter
 * DataplexBigQueryToGcsFilter} can find such files correctly.
 */
@Test
public void testE2E_withTargetStrategyFail_andEnforceSamePartitionKeyEnabled_throwsException() throws Exception {
    options.setFileFormat(FileFormatOptions.PARQUET);
    options.setWriteDisposition(WriteDispositionOptions.FAIL);
    options.setEnforceSamePartitionKey(true);
    writeOutputFile("partitioned_table/ts=p2", "output-partitioned_table-p2.parquet", "Test data");
    when(bqMock.query(any())).then(invocation -> {
        Iterable<FieldValueList> result = null;
        QueryJobConfiguration q = (QueryJobConfiguration) invocation.getArguments()[0];
        if (TABLE_QUERY_PATTERN.matcher(q.getQuery()).find()) {
            result = Collections.singletonList(fields("partitioned_table", "0", "ts"));
        } else if (PARTITION_QUERY_PATTERN.matcher(q.getQuery()).find()) {
            result = Arrays.asList(fields("p1", "0"), fields("p2", "0"));
        }
        when(tableResultMock.iterateAll()).thenReturn(result);
        return tableResultMock;
    });
    try {
        DataplexBigQueryToGcs.buildPipeline(options, metadataLoader, outDir.getAbsolutePath(), DatasetId.of(PROJECT, DATASET));
        fail("Expected a WriteDispositionException");
    } catch (Exception e) {
        assertThat(e).hasCauseThat().hasCauseThat().isInstanceOf(WriteDispositionException.class);
        assertThat(e).hasCauseThat().hasCauseThat().hasMessageThat().contains("Target File partitioned_table/ts=p2/output-partitioned_table-p2.parquet exists for" + " partitioned_table$p2.");
    }
}
Also used : FieldValueList(com.google.cloud.bigquery.FieldValueList) WriteDispositionException(com.google.cloud.teleport.v2.utils.WriteDisposition.WriteDispositionException) QueryJobConfiguration(com.google.cloud.bigquery.QueryJobConfiguration) FileNotFoundException(java.io.FileNotFoundException) WriteDispositionException(com.google.cloud.teleport.v2.utils.WriteDisposition.WriteDispositionException) IOException(java.io.IOException) Test(org.junit.Test)

Example 3 with WriteDispositionException

use of com.google.cloud.teleport.v2.utils.JdbcIngestionWriteDisposition.WriteDispositionException in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexJdbcIngestionFilterTest method testFailIfTargetFileExists.

@Test
public void testFailIfTargetFileExists() {
    String targetRootPath = temporaryFolder.getRoot().getAbsolutePath();
    PCollectionTuple result = mainPipeline.apply(Create.<GenericRecord>of(record11, record12, record21).withCoder(AvroCoder.of(SCHEMA))).apply(new DataplexJdbcIngestionFilter(targetRootPath, SERIALIZED_SCHEMA, PARTITION_COLUMN_NAME, PartitioningSchema.MONTHLY, FileFormatOptions.AVRO.getFileSuffix(), WriteDispositionOptions.WRITE_EMPTY, StorageUtils.getFilesInDirectory(targetRootPath), FILTERED_RECORDS_OUT, EXISTING_TARGET_FILES_OUT));
    try {
        mainPipeline.run();
        fail("Expected a WriteDispositionException.");
    } catch (Exception e) {
        assertThat(e).hasCauseThat().isInstanceOf(WriteDispositionException.class);
    }
}
Also used : PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) WriteDispositionException(com.google.cloud.teleport.v2.utils.JdbcIngestionWriteDisposition.WriteDispositionException) WriteDispositionException(com.google.cloud.teleport.v2.utils.JdbcIngestionWriteDisposition.WriteDispositionException) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

WriteDispositionException (com.google.cloud.teleport.v2.utils.WriteDisposition.WriteDispositionException)2 IOException (java.io.IOException)2 Test (org.junit.Test)2 GoogleCloudDataplexV1Asset (com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Asset)1 GoogleCloudDataplexV1Entity (com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Entity)1 GoogleCloudDataplexV1Partition (com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Partition)1 FieldValueList (com.google.cloud.bigquery.FieldValueList)1 QueryJobConfiguration (com.google.cloud.bigquery.QueryJobConfiguration)1 NoopTransform (com.google.cloud.teleport.v2.transforms.NoopTransform)1 WriteDispositionException (com.google.cloud.teleport.v2.utils.JdbcIngestionWriteDisposition.WriteDispositionException)1 FileNotFoundException (java.io.FileNotFoundException)1 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)1