Search in sources :

Example 1 with Partitions

use of com.google.api.services.dataplex.v1.CloudDataplex.Projects.Locations.Lakes.Zones.Entities.Partitions in project DataflowTemplates by GoogleCloudPlatform.

the class DataplexFileFormatConversion method run.

/**
 * Runs the pipeline to completion with the specified options.
 *
 * @return The pipeline result.
 */
public static PipelineResult run(Pipeline pipeline, FileFormatConversionOptions options, DataplexClient dataplex, OutputPathProvider outputPathProvider) throws IOException {
    boolean isInputAsset = ASSET_PATTERN.matcher(options.getInputAssetOrEntitiesList()).matches();
    if (!isInputAsset && !ENTITIES_PATTERN.matcher(options.getInputAssetOrEntitiesList()).matches()) {
        throw new IllegalArgumentException("Either input asset or input entities list must be provided");
    }
    GoogleCloudDataplexV1Asset outputAsset = dataplex.getAsset(options.getOutputAsset());
    if (outputAsset == null || outputAsset.getResourceSpec() == null || !DataplexAssetResourceSpec.STORAGE_BUCKET.name().equals(outputAsset.getResourceSpec().getType()) || outputAsset.getResourceSpec().getName() == null) {
        throw new IllegalArgumentException("Output asset must be an existing asset with resource spec name being a GCS bucket and" + " resource spec type of " + DataplexAssetResourceSpec.STORAGE_BUCKET.name());
    }
    String outputBucket = outputAsset.getResourceSpec().getName();
    Predicate<String> inputFilesFilter;
    switch(options.getWriteDisposition()) {
        case OVERWRITE:
            inputFilesFilter = inputFilePath -> true;
            break;
        case FAIL:
            Set<String> outputFilePaths = getAllOutputFilePaths(outputBucket);
            inputFilesFilter = inputFilePath -> {
                if (outputFilePaths.contains(inputFilePathToOutputFilePath(outputPathProvider, inputFilePath, outputBucket, options.getOutputFileFormat()))) {
                    throw new WriteDispositionException(String.format("The file %s already exists in the output asset bucket: %s", inputFilePath, outputBucket));
                }
                return true;
            };
            break;
        case SKIP:
            outputFilePaths = getAllOutputFilePaths(outputBucket);
            inputFilesFilter = inputFilePath -> !outputFilePaths.contains(inputFilePathToOutputFilePath(outputPathProvider, inputFilePath, outputBucket, options.getOutputFileFormat()));
            break;
        default:
            throw new UnsupportedOperationException("Unsupported existing file behaviour: " + options.getWriteDisposition());
    }
    ImmutableList<GoogleCloudDataplexV1Entity> entities = isInputAsset ? dataplex.getCloudStorageEntities(options.getInputAssetOrEntitiesList()) : dataplex.getEntities(Splitter.on(',').trimResults().splitToList(options.getInputAssetOrEntitiesList()));
    boolean convertingFiles = false;
    for (GoogleCloudDataplexV1Entity entity : entities) {
        ImmutableList<GoogleCloudDataplexV1Partition> partitions = dataplex.getPartitions(entity.getName());
        if (partitions.isEmpty()) {
            String outputPath = outputPathProvider.outputPathFrom(entity.getDataPath(), outputBucket);
            Iterator<String> inputFilePaths = getFilesFromFilePattern(entityToFileSpec(entity)).filter(inputFilesFilter).iterator();
            convertingFiles = inputFilePaths.hasNext();
            inputFilePaths.forEachRemaining(inputFilePath -> pipeline.apply("Convert " + shortenDataplexName(entity.getName()), new ConvertFiles(entity, inputFilePath, options, outputPath)));
        } else {
            for (GoogleCloudDataplexV1Partition partition : partitions) {
                String outputPath = outputPathProvider.outputPathFrom(partition.getLocation(), outputBucket);
                Iterator<String> inputFilePaths = getFilesFromFilePattern(partitionToFileSpec(partition)).filter(inputFilesFilter).iterator();
                convertingFiles = inputFilePaths.hasNext();
                inputFilePaths.forEachRemaining(inputFilePath -> pipeline.apply("Convert " + shortenDataplexName(partition.getName()), new ConvertFiles(entity, inputFilePath, options, outputPath)));
            }
        }
    }
    if (!convertingFiles) {
        pipeline.apply("Nothing to convert", new NoopTransform());
    }
    return pipeline.run();
}
Also used : NoopTransform(com.google.cloud.teleport.v2.transforms.NoopTransform) GoogleCloudDataplexV1Partition(com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Partition) WriteDispositionException(com.google.cloud.teleport.v2.utils.WriteDisposition.WriteDispositionException) GoogleCloudDataplexV1Asset(com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Asset) GoogleCloudDataplexV1Entity(com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Entity)

Example 2 with Partitions

use of com.google.api.services.dataplex.v1.CloudDataplex.Projects.Locations.Lakes.Zones.Entities.Partitions in project DataflowTemplates by GoogleCloudPlatform.

the class DefaultDataplexClient method createPartitions.

/**
 * Handles creation of partitions. Each partition is logged after being created.
 */
private void createPartitions(ImmutableMap<String, ImmutableList<PartitionMetadata>> entityNameToPartition) throws IOException {
    for (Map.Entry<String, ImmutableList<PartitionMetadata>> entry : entityNameToPartition.entrySet()) {
        ImmutableList<GoogleCloudDataplexV1Partition> partitions = entry.getValue().stream().map(PartitionMetadata::toDataplexPartition).collect(toImmutableList());
        for (GoogleCloudDataplexV1Partition partition : partitions) {
            GoogleCloudDataplexV1Partition result = client.projects().locations().lakes().zones().entities().partitions().create(entry.getKey(), partition).execute();
            LOG.info("Created partition '{}' under entity '{}'", result.getName(), entry.getKey());
        }
    }
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) GoogleCloudDataplexV1Partition(com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Partition) HashMap(java.util.HashMap) Collectors.toMap(java.util.stream.Collectors.toMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 3 with Partitions

use of com.google.api.services.dataplex.v1.CloudDataplex.Projects.Locations.Lakes.Zones.Entities.Partitions in project DataflowTemplates by GoogleCloudPlatform.

the class DefaultDataplexClient method getPartitions.

@Override
public ImmutableList<GoogleCloudDataplexV1Partition> getPartitions(String entityName) throws IOException {
    ImmutableList.Builder<GoogleCloudDataplexV1Partition> result = ImmutableList.builder();
    Partitions partitions = client.projects().locations().lakes().zones().entities().partitions();
    GoogleCloudDataplexV1ListPartitionsResponse response = partitions.list(entityName).execute();
    if (response.getPartitions() == null) {
        return ImmutableList.of();
    }
    result.addAll(response.getPartitions());
    // the result of the list is paginated with the default page size being 10
    while (response.getNextPageToken() != null) {
        response = partitions.list(entityName).setPageToken(response.getNextPageToken()).execute();
        result.addAll(response.getPartitions());
    }
    return result.build();
}
Also used : Partitions(com.google.api.services.dataplex.v1.CloudDataplex.Projects.Locations.Lakes.Zones.Entities.Partitions) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) GoogleCloudDataplexV1ListPartitionsResponse(com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1ListPartitionsResponse) GoogleCloudDataplexV1Partition(com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Partition)

Example 4 with Partitions

use of com.google.api.services.dataplex.v1.CloudDataplex.Projects.Locations.Lakes.Zones.Entities.Partitions in project DataflowTemplates by GoogleCloudPlatform.

the class DefaultDataplexClientTest method testGetPartitionsByEntityName.

@Test
public void testGetPartitionsByEntityName() throws IOException {
    CloudDataplex cloudDataplexClient = mock(CloudDataplex.class, Answers.RETURNS_DEEP_STUBS);
    Partitions partitions = mock(Partitions.class, Answers.RETURNS_DEEP_STUBS);
    when(cloudDataplexClient.projects().locations().lakes().zones().entities().partitions()).thenReturn(partitions);
    GoogleCloudDataplexV1Partition partition1 = new GoogleCloudDataplexV1Partition().setName("partition1");
    GoogleCloudDataplexV1Partition partition2 = new GoogleCloudDataplexV1Partition().setName("partition2");
    GoogleCloudDataplexV1Partition partition3 = new GoogleCloudDataplexV1Partition().setName("partition2");
    GoogleCloudDataplexV1ListPartitionsResponse response1 = new GoogleCloudDataplexV1ListPartitionsResponse().setPartitions(ImmutableList.of(partition1, partition2)).setNextPageToken(PAGE_TOKEN);
    GoogleCloudDataplexV1ListPartitionsResponse response2 = new GoogleCloudDataplexV1ListPartitionsResponse().setPartitions(ImmutableList.of(partition3));
    when(partitions.list("entity0").execute()).thenReturn(response1);
    when(partitions.list("entity0").setPageToken(eq(PAGE_TOKEN)).execute()).thenReturn(response2);
    assertEquals(ImmutableList.of(partition1, partition2, partition3), DefaultDataplexClient.withClient(cloudDataplexClient).getPartitions("entity0"));
}
Also used : CloudDataplex(com.google.api.services.dataplex.v1.CloudDataplex) Partitions(com.google.api.services.dataplex.v1.CloudDataplex.Projects.Locations.Lakes.Zones.Entities.Partitions) GoogleCloudDataplexV1ListPartitionsResponse(com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1ListPartitionsResponse) GoogleCloudDataplexV1Partition(com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Partition) Test(org.junit.Test)

Example 5 with Partitions

use of com.google.api.services.dataplex.v1.CloudDataplex.Projects.Locations.Lakes.Zones.Entities.Partitions in project DataflowTemplates by GoogleCloudPlatform.

the class DefaultDataplexClientTest method testCreateEntitiesWhenDiscoveryEnabled.

@Test
public void testCreateEntitiesWhenDiscoveryEnabled() throws IOException {
    CloudDataplex dataplex = mock(CloudDataplex.class, Answers.RETURNS_DEEP_STUBS);
    Zones zones = getZones(dataplex);
    Assets assets = getAssets(dataplex);
    // Don't care about the order they're checked, so set up for two calls.
    when(assets.get(any()).execute()).thenReturn(createAsset(ENABLED_ASSET_DISCOVERY_SPEC)).thenReturn(createAsset(DISABLED_ASSET_DISCOVERY_SPEC));
    when(zones.get(any()).execute()).thenReturn(createZone(ENABLED_ZONE_DISCOVERY_SPEC)).thenReturn(createZone(DISABLED_ZONE_DISCOVERY_SPEC));
    DataplexClient client = DefaultDataplexClient.withClient(dataplex);
    client.createMetadata(ASSET_NAME1, ImmutableList.of(UNUSED_METADATA), CreateBehavior.UPDATE_IF_EXISTS);
    client.createMetadata(ASSET_NAME1, ImmutableList.of(UNUSED_METADATA), CreateBehavior.UPDATE_IF_EXISTS);
    verify(assets, atLeastOnce()).get(ASSET_NAME1);
    verify(zones, atLeastOnce()).get(ZONE_NAME);
    // Enough to know nothing was done with partitions
    verify(zones, never()).entities();
}
Also used : CloudDataplex(com.google.api.services.dataplex.v1.CloudDataplex) Assets(com.google.api.services.dataplex.v1.CloudDataplex.Projects.Locations.Lakes.Zones.Assets) Zones(com.google.api.services.dataplex.v1.CloudDataplex.Projects.Locations.Lakes.Zones) Test(org.junit.Test)

Aggregations

GoogleCloudDataplexV1Partition (com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Partition)4 CloudDataplex (com.google.api.services.dataplex.v1.CloudDataplex)2 Partitions (com.google.api.services.dataplex.v1.CloudDataplex.Projects.Locations.Lakes.Zones.Entities.Partitions)2 GoogleCloudDataplexV1ListPartitionsResponse (com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1ListPartitionsResponse)2 ImmutableList (com.google.common.collect.ImmutableList)2 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)2 Test (org.junit.Test)2 Zones (com.google.api.services.dataplex.v1.CloudDataplex.Projects.Locations.Lakes.Zones)1 Assets (com.google.api.services.dataplex.v1.CloudDataplex.Projects.Locations.Lakes.Zones.Assets)1 GoogleCloudDataplexV1Asset (com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Asset)1 GoogleCloudDataplexV1Entity (com.google.api.services.dataplex.v1.model.GoogleCloudDataplexV1Entity)1 NoopTransform (com.google.cloud.teleport.v2.transforms.NoopTransform)1 WriteDispositionException (com.google.cloud.teleport.v2.utils.WriteDisposition.WriteDispositionException)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Collectors.toMap (java.util.stream.Collectors.toMap)1