use of com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Entity in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexFileFormatConversion method run.
/**
* Runs the pipeline to completion with the specified options.
*
* @return The pipeline result.
*/
public static PipelineResult run(Pipeline pipeline, FileFormatConversionOptions options, DataplexClient dataplex, OutputPathProvider outputPathProvider) throws IOException {
boolean isInputAsset = ASSET_PATTERN.matcher(options.getInputAssetOrEntitiesList()).matches();
if (!isInputAsset && !ENTITIES_PATTERN.matcher(options.getInputAssetOrEntitiesList()).matches()) {
throw new IllegalArgumentException("Either input asset or input entities list must be provided");
}
GoogleCloudDataplexV1Asset outputAsset = dataplex.getAsset(options.getOutputAsset());
if (outputAsset == null || outputAsset.getResourceSpec() == null || !DataplexAssetResourceSpec.STORAGE_BUCKET.name().equals(outputAsset.getResourceSpec().getType()) || outputAsset.getResourceSpec().getName() == null) {
throw new IllegalArgumentException("Output asset must be an existing asset with resource spec name being a GCS bucket and" + " resource spec type of " + DataplexAssetResourceSpec.STORAGE_BUCKET.name());
}
String outputBucket = outputAsset.getResourceSpec().getName();
Predicate<String> inputFilesFilter;
switch(options.getWriteDisposition()) {
case OVERWRITE:
inputFilesFilter = inputFilePath -> true;
break;
case FAIL:
Set<String> outputFilePaths = getAllOutputFilePaths(outputBucket);
inputFilesFilter = inputFilePath -> {
if (outputFilePaths.contains(inputFilePathToOutputFilePath(outputPathProvider, inputFilePath, outputBucket, options.getOutputFileFormat()))) {
throw new WriteDispositionException(String.format("The file %s already exists in the output asset bucket: %s", inputFilePath, outputBucket));
}
return true;
};
break;
case SKIP:
outputFilePaths = getAllOutputFilePaths(outputBucket);
inputFilesFilter = inputFilePath -> !outputFilePaths.contains(inputFilePathToOutputFilePath(outputPathProvider, inputFilePath, outputBucket, options.getOutputFileFormat()));
break;
default:
throw new UnsupportedOperationException("Unsupported existing file behaviour: " + options.getWriteDisposition());
}
ImmutableList<GoogleCloudDataplexV1Entity> entities = isInputAsset ? dataplex.getCloudStorageEntities(options.getInputAssetOrEntitiesList()) : dataplex.getEntities(Splitter.on(',').trimResults().splitToList(options.getInputAssetOrEntitiesList()));
boolean convertingFiles = false;
for (GoogleCloudDataplexV1Entity entity : entities) {
ImmutableList<GoogleCloudDataplexV1Partition> partitions = dataplex.getPartitions(entity.getName());
if (partitions.isEmpty()) {
String outputPath = outputPathProvider.outputPathFrom(entity.getDataPath(), outputBucket);
Iterator<String> inputFilePaths = getFilesFromFilePattern(entityToFileSpec(entity)).filter(inputFilesFilter).iterator();
convertingFiles = inputFilePaths.hasNext();
inputFilePaths.forEachRemaining(inputFilePath -> pipeline.apply("Convert " + shortenDataplexName(entity.getName()), new ConvertFiles(entity, inputFilePath, options, outputPath)));
} else {
for (GoogleCloudDataplexV1Partition partition : partitions) {
String outputPath = outputPathProvider.outputPathFrom(partition.getLocation(), outputBucket);
Iterator<String> inputFilePaths = getFilesFromFilePattern(partitionToFileSpec(partition)).filter(inputFilesFilter).iterator();
convertingFiles = inputFilePaths.hasNext();
inputFilePaths.forEachRemaining(inputFilePath -> pipeline.apply("Convert " + shortenDataplexName(partition.getName()), new ConvertFiles(entity, inputFilePath, options, outputPath)));
}
}
}
if (!convertingFiles) {
pipeline.apply("Nothing to convert", new NoopTransform());
}
return pipeline.run();
}
use of com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Entity in project DataflowTemplates by GoogleCloudPlatform.
the class DefaultDataplexClient method getEntities.
@Override
public ImmutableList<GoogleCloudDataplexV1Entity> getEntities(List<String> entityNames) throws IOException {
Entities entities = client.projects().locations().lakes().zones().entities();
ImmutableList.Builder<GoogleCloudDataplexV1Entity> result = ImmutableList.builder();
for (String entityName : entityNames) {
result.add(entities.get(entityName).setView(GetEntityRequestEntityView.FULL.name()).execute());
}
return result.build();
}
use of com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Entity in project DataflowTemplates by GoogleCloudPlatform.
the class DefaultDataplexClient method getEntitiesUnderAssetStream.
/**
* Gets a stream of all entities under {@code assetName}.
*/
private Stream<GoogleCloudDataplexV1Entity> getEntitiesUnderAssetStream(String assetName) throws IOException {
Entities entities = client.projects().locations().lakes().zones().entities();
String zoneName = getZoneFromAsset(assetName);
GoogleCloudDataplexV1ListEntitiesResponse response = entities.list(zoneName).execute();
Stream<GoogleCloudDataplexV1Entity> result = getEntitiesUnderAssetForPage(response, assetName);
// the result of the list is paginated with the default page size being 10
while (response.getNextPageToken() != null) {
response = entities.list(zoneName).setPageToken(response.getNextPageToken()).execute();
result = Stream.concat(result, getEntitiesUnderAssetForPage(response, assetName));
}
return result;
}
use of com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Entity in project DataflowTemplates by GoogleCloudPlatform.
the class DefaultDataplexClient method updateEntitiesUnderAsset.
/**
* Handles just updating of entities. Each entity is logged after updating.
*/
private Map<EntityMetadata, GoogleCloudDataplexV1Entity> updateEntitiesUnderAsset(String assetName, Map<EntityMetadata, GoogleCloudDataplexV1Entity> metadataToEntity) throws IOException {
Map<EntityMetadata, GoogleCloudDataplexV1Entity> updatedMetadataToEntity = new HashMap<>();
for (Map.Entry<EntityMetadata, GoogleCloudDataplexV1Entity> entry : metadataToEntity.entrySet()) {
EntityMetadata metadata = entry.getKey();
GoogleCloudDataplexV1Entity existing = entry.getValue();
metadata.updateDataplexEntity(existing);
GoogleCloudDataplexV1Entity updated = client.projects().locations().lakes().zones().entities().update(existing.getName(), existing.setAsset(assetName)).execute();
LOG.info("Updated entity with name '{}' that points to data path '{}'", updated.getName(), metadata.dataPath());
updatedMetadataToEntity.put(metadata, updated);
}
return updatedMetadataToEntity;
}
use of com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Entity in project DataflowTemplates by GoogleCloudPlatform.
the class DefaultDataplexClient method createEntitiesUnderAsset.
/**
* Handles just the creation of entities. Each entity is logged after creation.
*/
private Map<EntityMetadata, GoogleCloudDataplexV1Entity> createEntitiesUnderAsset(String assetName, List<EntityMetadata> metadata) throws IOException {
Map<EntityMetadata, GoogleCloudDataplexV1Entity> metadataToEntity = new HashMap<>();
for (EntityMetadata m : metadata) {
GoogleCloudDataplexV1Entity entity = client.projects().locations().lakes().zones().entities().create(assetName, m.toDataplexEntity().setAsset(assetName)).execute();
LOG.info("Created entity with name '{}' pointing to '{}'", entity.getName(), m.dataPath());
metadataToEntity.put(m, entity);
}
return metadataToEntity;
}
Aggregations