Search in sources :

Example 1 with TaggedField

use of com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig.TaggedField in project java-docs-samples by GoogleCloudPlatform.

the class RiskAnalysis method calculateKMap.

// [END dlp_l_diversity]
// [START dlp_k_map]
/**
 * Calculate k-map risk estimation for an attribute relative to quasi-identifiers in a BigQuery
 * table.
 *
 * @param projectId The Google Cloud Platform project ID to run the API call under.
 * @param datasetId The BigQuery dataset to analyze.
 * @param tableId The BigQuery table to analyze.
 * @param quasiIds A set of column names that form a composite key ('quasi-identifiers').
 * @param infoTypes The infoTypes corresponding to each quasi-id column
 * @param regionCode An ISO-3166-1 region code specifying the k-map distribution region
 * @param topicId The name of the Pub/Sub topic to notify once the job completes
 * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job
 *     completion status.
 */
private static void calculateKMap(String projectId, String datasetId, String tableId, List<String> quasiIds, List<InfoType> infoTypes, String regionCode, String topicId, String subscriptionId) throws Exception {
    // Instantiates a client
    try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
        Iterator<String> quasiIdsIterator = quasiIds.iterator();
        Iterator<InfoType> infoTypesIterator = infoTypes.iterator();
        if (quasiIds.size() != infoTypes.size()) {
            throw new IllegalArgumentException("The numbers of quasi-IDs and infoTypes must be equal!");
        }
        ArrayList<TaggedField> taggedFields = new ArrayList();
        while (quasiIdsIterator.hasNext() || infoTypesIterator.hasNext()) {
            taggedFields.add(TaggedField.newBuilder().setField(FieldId.newBuilder().setName(quasiIdsIterator.next()).build()).setInfoType(infoTypesIterator.next()).build());
        }
        KMapEstimationConfig kmapConfig = KMapEstimationConfig.newBuilder().addAllQuasiIds(taggedFields).setRegionCode(regionCode).build();
        BigQueryTable bigQueryTable = BigQueryTable.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build();
        PrivacyMetric privacyMetric = PrivacyMetric.newBuilder().setKMapEstimationConfig(kmapConfig).build();
        String topicName = String.format("projects/%s/topics/%s", projectId, topicId);
        PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build();
        // Create action to publish job status notifications over Google Cloud Pub/Sub
        Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
        RiskAnalysisJobConfig riskAnalysisJobConfig = RiskAnalysisJobConfig.newBuilder().setSourceTable(bigQueryTable).setPrivacyMetric(privacyMetric).addActions(action).build();
        CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setRiskJob(riskAnalysisJobConfig).build();
        DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
        String dlpJobName = dlpJob.getName();
        final SettableApiFuture<Boolean> done = SettableApiFuture.create();
        // Set up a Pub/Sub subscriber to listen on the job completion status
        Subscriber subscriber = Subscriber.newBuilder(ProjectSubscriptionName.newBuilder().setProject(projectId).setSubscription(subscriptionId).build(), (pubsubMessage, ackReplyConsumer) -> {
            if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) {
                // notify job completion
                done.set(true);
                ackReplyConsumer.ack();
            }
        }).build();
        subscriber.startAsync();
        // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
        try {
            done.get(1, TimeUnit.MINUTES);
            // Wait for the job to become available
            Thread.sleep(500);
        } catch (TimeoutException e) {
            System.out.println("Unable to verify job completion.");
        }
        // retrieve completed job status
        DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build());
        System.out.println("Job status: " + completedJob.getState());
        AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails();
        KMapEstimationResult kmapResult = riskDetails.getKMapEstimationResult();
        for (KMapEstimationHistogramBucket result : kmapResult.getKMapEstimationHistogramList()) {
            System.out.printf("\tAnonymity range: [%d, %d]\n", result.getMinAnonymity(), result.getMaxAnonymity());
            System.out.printf("\tSize: %d\n", result.getBucketSize());
            for (KMapEstimationQuasiIdValues valueBucket : result.getBucketValuesList()) {
                String quasiIdValues = valueBucket.getQuasiIdsValuesList().stream().map(v -> {
                    String s = v.toString();
                    return s.substring(s.indexOf(':') + 1).trim();
                }).collect(Collectors.joining(", "));
                System.out.printf("\tValues: {%s}\n", quasiIdValues);
                System.out.printf("\tEstimated k-map anonymity: %d\n", valueBucket.getEstimatedAnonymity());
            }
        }
    } catch (Exception e) {
        System.out.println("Error in calculateKMap: " + e.getMessage());
    }
}
Also used : Arrays(java.util.Arrays) TimeoutException(java.util.concurrent.TimeoutException) Subscriber(com.google.cloud.pubsub.v1.Subscriber) DefaultParser(org.apache.commons.cli.DefaultParser) KMapEstimationHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationHistogramBucket) LDiversityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityEquivalenceClass) ValueFrequency(com.google.privacy.dlp.v2.ValueFrequency) LDiversityConfig(com.google.privacy.dlp.v2.PrivacyMetric.LDiversityConfig) NumericalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.NumericalStatsConfig) Action(com.google.privacy.dlp.v2.Action) KMapEstimationConfig(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig) CategoricalStatsHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.CategoricalStatsResult.CategoricalStatsHistogramBucket) KAnonymityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityEquivalenceClass) Value(com.google.privacy.dlp.v2.Value) TaggedField(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig.TaggedField) RiskAnalysisJobConfig(com.google.privacy.dlp.v2.RiskAnalysisJobConfig) Collectors(java.util.stream.Collectors) SettableApiFuture(com.google.api.core.SettableApiFuture) List(java.util.List) KAnonymityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult) ParseException(org.apache.commons.cli.ParseException) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) ProjectSubscriptionName(com.google.pubsub.v1.ProjectSubscriptionName) AnalyzeDataSourceRiskDetails(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails) Options(org.apache.commons.cli.Options) HelpFormatter(org.apache.commons.cli.HelpFormatter) CategoricalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.CategoricalStatsConfig) ArrayList(java.util.ArrayList) ServiceOptions(com.google.cloud.ServiceOptions) CommandLine(org.apache.commons.cli.CommandLine) FieldId(com.google.privacy.dlp.v2.FieldId) ProjectTopicName(com.google.pubsub.v1.ProjectTopicName) Option(org.apache.commons.cli.Option) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) Iterator(java.util.Iterator) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) CommandLineParser(org.apache.commons.cli.CommandLineParser) KMapEstimationResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult) KMapEstimationQuasiIdValues(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationQuasiIdValues) InfoType(com.google.privacy.dlp.v2.InfoType) LDiversityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult) KAnonymityConfig(com.google.privacy.dlp.v2.PrivacyMetric.KAnonymityConfig) TimeUnit(java.util.concurrent.TimeUnit) PublishToPubSub(com.google.privacy.dlp.v2.Action.PublishToPubSub) ProjectName(com.google.privacy.dlp.v2.ProjectName) GetDlpJobRequest(com.google.privacy.dlp.v2.GetDlpJobRequest) LDiversityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityHistogramBucket) KAnonymityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityHistogramBucket) PrivacyMetric(com.google.privacy.dlp.v2.PrivacyMetric) OptionGroup(org.apache.commons.cli.OptionGroup) DlpJob(com.google.privacy.dlp.v2.DlpJob) Collections(java.util.Collections) Action(com.google.privacy.dlp.v2.Action) ArrayList(java.util.ArrayList) TaggedField(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig.TaggedField) PrivacyMetric(com.google.privacy.dlp.v2.PrivacyMetric) PublishToPubSub(com.google.privacy.dlp.v2.Action.PublishToPubSub) Subscriber(com.google.cloud.pubsub.v1.Subscriber) AnalyzeDataSourceRiskDetails(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails) InfoType(com.google.privacy.dlp.v2.InfoType) TimeoutException(java.util.concurrent.TimeoutException) KMapEstimationQuasiIdValues(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationQuasiIdValues) KMapEstimationConfig(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig) RiskAnalysisJobConfig(com.google.privacy.dlp.v2.RiskAnalysisJobConfig) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) KMapEstimationHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationHistogramBucket) TimeoutException(java.util.concurrent.TimeoutException) ParseException(org.apache.commons.cli.ParseException) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) DlpJob(com.google.privacy.dlp.v2.DlpJob) KMapEstimationResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult)

Aggregations

SettableApiFuture (com.google.api.core.SettableApiFuture)1 ServiceOptions (com.google.cloud.ServiceOptions)1 DlpServiceClient (com.google.cloud.dlp.v2.DlpServiceClient)1 Subscriber (com.google.cloud.pubsub.v1.Subscriber)1 Action (com.google.privacy.dlp.v2.Action)1 PublishToPubSub (com.google.privacy.dlp.v2.Action.PublishToPubSub)1 AnalyzeDataSourceRiskDetails (com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails)1 CategoricalStatsHistogramBucket (com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.CategoricalStatsResult.CategoricalStatsHistogramBucket)1 KAnonymityResult (com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult)1 KAnonymityEquivalenceClass (com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityEquivalenceClass)1 KAnonymityHistogramBucket (com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityHistogramBucket)1 KMapEstimationResult (com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult)1 KMapEstimationHistogramBucket (com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationHistogramBucket)1 KMapEstimationQuasiIdValues (com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationQuasiIdValues)1 LDiversityResult (com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult)1 LDiversityEquivalenceClass (com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityEquivalenceClass)1 LDiversityHistogramBucket (com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityHistogramBucket)1 BigQueryTable (com.google.privacy.dlp.v2.BigQueryTable)1 CreateDlpJobRequest (com.google.privacy.dlp.v2.CreateDlpJobRequest)1 DlpJob (com.google.privacy.dlp.v2.DlpJob)1