use of com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Asset in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexFileFormatConversion method run.
/**
* Runs the pipeline to completion with the specified options.
*
* @return The pipeline result.
*/
public static PipelineResult run(Pipeline pipeline, FileFormatConversionOptions options, DataplexClient dataplex, OutputPathProvider outputPathProvider) throws IOException {
boolean isInputAsset = ASSET_PATTERN.matcher(options.getInputAssetOrEntitiesList()).matches();
if (!isInputAsset && !ENTITIES_PATTERN.matcher(options.getInputAssetOrEntitiesList()).matches()) {
throw new IllegalArgumentException("Either input asset or input entities list must be provided");
}
GoogleCloudDataplexV1Asset outputAsset = dataplex.getAsset(options.getOutputAsset());
if (outputAsset == null || outputAsset.getResourceSpec() == null || !DataplexAssetResourceSpec.STORAGE_BUCKET.name().equals(outputAsset.getResourceSpec().getType()) || outputAsset.getResourceSpec().getName() == null) {
throw new IllegalArgumentException("Output asset must be an existing asset with resource spec name being a GCS bucket and" + " resource spec type of " + DataplexAssetResourceSpec.STORAGE_BUCKET.name());
}
String outputBucket = outputAsset.getResourceSpec().getName();
Predicate<String> inputFilesFilter;
switch(options.getWriteDisposition()) {
case OVERWRITE:
inputFilesFilter = inputFilePath -> true;
break;
case FAIL:
Set<String> outputFilePaths = getAllOutputFilePaths(outputBucket);
inputFilesFilter = inputFilePath -> {
if (outputFilePaths.contains(inputFilePathToOutputFilePath(outputPathProvider, inputFilePath, outputBucket, options.getOutputFileFormat()))) {
throw new WriteDispositionException(String.format("The file %s already exists in the output asset bucket: %s", inputFilePath, outputBucket));
}
return true;
};
break;
case SKIP:
outputFilePaths = getAllOutputFilePaths(outputBucket);
inputFilesFilter = inputFilePath -> !outputFilePaths.contains(inputFilePathToOutputFilePath(outputPathProvider, inputFilePath, outputBucket, options.getOutputFileFormat()));
break;
default:
throw new UnsupportedOperationException("Unsupported existing file behaviour: " + options.getWriteDisposition());
}
ImmutableList<GoogleCloudDataplexV1Entity> entities = isInputAsset ? dataplex.getCloudStorageEntities(options.getInputAssetOrEntitiesList()) : dataplex.getEntities(Splitter.on(',').trimResults().splitToList(options.getInputAssetOrEntitiesList()));
boolean convertingFiles = false;
for (GoogleCloudDataplexV1Entity entity : entities) {
ImmutableList<GoogleCloudDataplexV1Partition> partitions = dataplex.getPartitions(entity.getName());
if (partitions.isEmpty()) {
String outputPath = outputPathProvider.outputPathFrom(entity.getDataPath(), outputBucket);
Iterator<String> inputFilePaths = getFilesFromFilePattern(entityToFileSpec(entity)).filter(inputFilesFilter).iterator();
convertingFiles = inputFilePaths.hasNext();
inputFilePaths.forEachRemaining(inputFilePath -> pipeline.apply("Convert " + shortenDataplexName(entity.getName()), new ConvertFiles(entity, inputFilePath, options, outputPath)));
} else {
for (GoogleCloudDataplexV1Partition partition : partitions) {
String outputPath = outputPathProvider.outputPathFrom(partition.getLocation(), outputBucket);
Iterator<String> inputFilePaths = getFilesFromFilePattern(partitionToFileSpec(partition)).filter(inputFilesFilter).iterator();
convertingFiles = inputFilePaths.hasNext();
inputFilePaths.forEachRemaining(inputFilePath -> pipeline.apply("Convert " + shortenDataplexName(partition.getName()), new ConvertFiles(entity, inputFilePath, options, outputPath)));
}
}
}
if (!convertingFiles) {
pipeline.apply("Nothing to convert", new NoopTransform());
}
return pipeline.run();
}
use of com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Asset in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexJdbcIngestion method main.
/**
* Main entry point for pipeline execution.
*
* @param args Command line arguments to the pipeline.
*/
public static void main(String[] args) throws IOException {
DataplexJdbcIngestionOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(DataplexJdbcIngestionOptions.class);
Pipeline pipeline = Pipeline.create(options);
DataplexClient dataplexClient = DefaultDataplexClient.withDefaultClient(options.getGcpCredential());
String assetName = options.getOutputAsset();
GoogleCloudDataplexV1Asset asset = resolveAsset(assetName, dataplexClient);
DynamicDataSourceConfiguration dataSourceConfig = configDataSource(options);
String assetType = asset.getResourceSpec().getType();
if (DataplexAssetResourceSpec.BIGQUERY_DATASET.name().equals(assetType)) {
buildBigQueryPipeline(pipeline, options, dataSourceConfig);
} else if (DataplexAssetResourceSpec.STORAGE_BUCKET.name().equals(assetType)) {
String targetRootPath = "gs://" + asset.getResourceSpec().getName() + "/" + options.getOutputTable();
buildGcsPipeline(pipeline, options, dataSourceConfig, targetRootPath);
} else {
throw new IllegalArgumentException(String.format("Asset " + assetName + " is of type " + assetType + ". Only " + DataplexAssetResourceSpec.BIGQUERY_DATASET.name() + "and " + DataplexAssetResourceSpec.STORAGE_BUCKET.name() + " supported."));
}
pipeline.run();
}
use of com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Asset in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexJdbcIngestion method resolveAsset.
/**
* Resolves a Dataplex asset.
*
* @param assetName Asset name from which the Dataplex asset will be resolved.
* @param dataplexClient Dataplex client to connect to Dataplex via asset name.
* @return The resolved asset
*/
private static GoogleCloudDataplexV1Asset resolveAsset(String assetName, DataplexClient dataplexClient) throws IOException {
LOG.info("Resolving asset: {}", assetName);
GoogleCloudDataplexV1Asset asset = dataplexClient.getAsset(assetName);
checkNotNull(asset.getResourceSpec(), "Asset has no ResourceSpec.");
String assetType = asset.getResourceSpec().getType();
checkNotNull(assetType, "Asset has no type.");
LOG.info("Resolved resource type: {}", assetType);
String resourceName = asset.getResourceSpec().getName();
checkNotNull(resourceName, "Asset has no resource name.");
LOG.info("Resolved resource name: {}", resourceName);
return asset;
}
use of com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Asset in project DataflowTemplates by GoogleCloudPlatform.
the class DefaultDataplexClient method shouldSkipCreatingMetadata.
/**
* Determines if we should skip creating metadata under {@code assetName}.
*
* <p>Currently, we skip creating metadata if discovery is enabled on either the zone or asset.
* Trying to create metadata manually when this is enabled can lead to undefined behavior.
*
* @param assetName name of the asset to check
* @return true if we should skip metadata creation, false otherwise
*/
private boolean shouldSkipCreatingMetadata(String assetName) throws IOException {
GoogleCloudDataplexV1Asset asset = getAsset(assetName);
if (asset.getDiscoverySpec().getEnabled()) {
LOG.warn("Automatic discovery enabled for asset `{}`.", assetName);
return true;
}
String zoneName = getZoneFromAsset(assetName);
GoogleCloudDataplexV1Zone zone = getZone(zoneName);
if (zone.getDiscoverySpec().getEnabled()) {
LOG.warn("Automatic discovery enabled for zone `{}`", zoneName);
return true;
}
return false;
}
use of com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Asset in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexBigQueryToGcs method resolveAsset.
/**
* Resolves a Dataplex asset name into the corresponding resource spec, verifying that the asset
* is of the correct type.
*/
private static String resolveAsset(DataplexClient dataplex, String assetName, DataplexAssetResourceSpec expectedType) throws IOException {
LOG.info("Resolving asset: {}", assetName);
GoogleCloudDataplexV1Asset asset = dataplex.getAsset(assetName);
checkNotNull(asset.getResourceSpec(), "Asset has no ResourceSpec.");
String type = asset.getResourceSpec().getType();
if (!expectedType.name().equals(type)) {
throw new IllegalArgumentException(String.format("Asset %s is of type %s, expected: %s.", assetName, type, expectedType.name()));
}
String resourceName = asset.getResourceSpec().getName();
checkNotNull(resourceName, "Asset has no resource name.");
LOG.info("Resolved resource name: {}", resourceName);
return resourceName;
}
Aggregations