Search in sources :

Example 6 with Table

use of com.google.privacy.dlp.v2.Table in project java-docs-samples by GoogleCloudPlatform.

the class RiskAnalysis method calculateLDiversity.

// [END dlp_k_anonymity]
// [START dlp_l_diversity]
/**
 * Calculate l-diversity for an attribute relative to quasi-identifiers in a BigQuery table.
 *
 * @param projectId The Google Cloud Platform project ID to run the API call under.
 * @param datasetId The BigQuery dataset to analyze.
 * @param tableId The BigQuery table to analyze.
 * @param sensitiveAttribute The name of the attribute to compare the quasi-ID against
 * @param quasiIds A set of column names that form a composite key ('quasi-identifiers').
 * @param topicId The name of the Pub/Sub topic to notify once the job completes
 * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job
 *     completion status.
 */
private static void calculateLDiversity(String projectId, String datasetId, String tableId, String sensitiveAttribute, List<String> quasiIds, String topicId, String subscriptionId) throws Exception {
    // Instantiates a client
    try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
        FieldId sensitiveAttributeField = FieldId.newBuilder().setName(sensitiveAttribute).build();
        List<FieldId> quasiIdFields = quasiIds.stream().map(columnName -> FieldId.newBuilder().setName(columnName).build()).collect(Collectors.toList());
        LDiversityConfig ldiversityConfig = LDiversityConfig.newBuilder().addAllQuasiIds(quasiIdFields).setSensitiveAttribute(sensitiveAttributeField).build();
        BigQueryTable bigQueryTable = BigQueryTable.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build();
        PrivacyMetric privacyMetric = PrivacyMetric.newBuilder().setLDiversityConfig(ldiversityConfig).build();
        String topicName = String.format("projects/%s/topics/%s", projectId, topicId);
        PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build();
        // Create action to publish job status notifications over Google Cloud Pub/Sub
        Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
        RiskAnalysisJobConfig riskAnalysisJobConfig = RiskAnalysisJobConfig.newBuilder().setSourceTable(bigQueryTable).setPrivacyMetric(privacyMetric).addActions(action).build();
        CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setRiskJob(riskAnalysisJobConfig).build();
        DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
        String dlpJobName = dlpJob.getName();
        final SettableApiFuture<Boolean> done = SettableApiFuture.create();
        // Set up a Pub/Sub subscriber to listen on the job completion status
        Subscriber subscriber = Subscriber.newBuilder(ProjectSubscriptionName.newBuilder().setProject(projectId).setSubscription(subscriptionId).build(), (pubsubMessage, ackReplyConsumer) -> {
            if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) {
                // notify job completion
                done.set(true);
                ackReplyConsumer.ack();
            }
        }).build();
        subscriber.startAsync();
        // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
        try {
            done.get(1, TimeUnit.MINUTES);
            // Wait for the job to become available
            Thread.sleep(500);
        } catch (TimeoutException e) {
            System.out.println("Unable to verify job completion.");
        }
        // retrieve completed job status
        DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build());
        System.out.println("Job status: " + completedJob.getState());
        AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails();
        LDiversityResult ldiversityResult = riskDetails.getLDiversityResult();
        for (LDiversityHistogramBucket result : ldiversityResult.getSensitiveValueFrequencyHistogramBucketsList()) {
            for (LDiversityEquivalenceClass bucket : result.getBucketValuesList()) {
                List<String> quasiIdValues = bucket.getQuasiIdsValuesList().stream().map(Value::toString).collect(Collectors.toList());
                System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues));
                System.out.println("\tClass size: " + bucket.getEquivalenceClassSize());
                for (ValueFrequency valueFrequency : bucket.getTopSensitiveValuesList()) {
                    System.out.printf("\t\tSensitive value %s occurs %d time(s).\n", valueFrequency.getValue().toString(), valueFrequency.getCount());
                }
            }
        }
    } catch (Exception e) {
        System.out.println("Error in calculateLDiversity: " + e.getMessage());
    }
}
Also used : Arrays(java.util.Arrays) TimeoutException(java.util.concurrent.TimeoutException) Subscriber(com.google.cloud.pubsub.v1.Subscriber) DefaultParser(org.apache.commons.cli.DefaultParser) KMapEstimationHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationHistogramBucket) LDiversityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityEquivalenceClass) ValueFrequency(com.google.privacy.dlp.v2.ValueFrequency) LDiversityConfig(com.google.privacy.dlp.v2.PrivacyMetric.LDiversityConfig) NumericalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.NumericalStatsConfig) Action(com.google.privacy.dlp.v2.Action) KMapEstimationConfig(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig) CategoricalStatsHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.CategoricalStatsResult.CategoricalStatsHistogramBucket) KAnonymityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityEquivalenceClass) Value(com.google.privacy.dlp.v2.Value) TaggedField(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig.TaggedField) RiskAnalysisJobConfig(com.google.privacy.dlp.v2.RiskAnalysisJobConfig) Collectors(java.util.stream.Collectors) SettableApiFuture(com.google.api.core.SettableApiFuture) List(java.util.List) KAnonymityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult) ParseException(org.apache.commons.cli.ParseException) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) ProjectSubscriptionName(com.google.pubsub.v1.ProjectSubscriptionName) AnalyzeDataSourceRiskDetails(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails) Options(org.apache.commons.cli.Options) HelpFormatter(org.apache.commons.cli.HelpFormatter) CategoricalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.CategoricalStatsConfig) ArrayList(java.util.ArrayList) ServiceOptions(com.google.cloud.ServiceOptions) CommandLine(org.apache.commons.cli.CommandLine) FieldId(com.google.privacy.dlp.v2.FieldId) ProjectTopicName(com.google.pubsub.v1.ProjectTopicName) Option(org.apache.commons.cli.Option) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) Iterator(java.util.Iterator) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) CommandLineParser(org.apache.commons.cli.CommandLineParser) KMapEstimationResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult) KMapEstimationQuasiIdValues(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationQuasiIdValues) InfoType(com.google.privacy.dlp.v2.InfoType) LDiversityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult) KAnonymityConfig(com.google.privacy.dlp.v2.PrivacyMetric.KAnonymityConfig) TimeUnit(java.util.concurrent.TimeUnit) PublishToPubSub(com.google.privacy.dlp.v2.Action.PublishToPubSub) ProjectName(com.google.privacy.dlp.v2.ProjectName) GetDlpJobRequest(com.google.privacy.dlp.v2.GetDlpJobRequest) LDiversityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityHistogramBucket) KAnonymityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityHistogramBucket) PrivacyMetric(com.google.privacy.dlp.v2.PrivacyMetric) OptionGroup(org.apache.commons.cli.OptionGroup) DlpJob(com.google.privacy.dlp.v2.DlpJob) Collections(java.util.Collections) LDiversityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityHistogramBucket) LDiversityConfig(com.google.privacy.dlp.v2.PrivacyMetric.LDiversityConfig) Action(com.google.privacy.dlp.v2.Action) RiskAnalysisJobConfig(com.google.privacy.dlp.v2.RiskAnalysisJobConfig) PrivacyMetric(com.google.privacy.dlp.v2.PrivacyMetric) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) TimeoutException(java.util.concurrent.TimeoutException) ParseException(org.apache.commons.cli.ParseException) PublishToPubSub(com.google.privacy.dlp.v2.Action.PublishToPubSub) Subscriber(com.google.cloud.pubsub.v1.Subscriber) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) ValueFrequency(com.google.privacy.dlp.v2.ValueFrequency) FieldId(com.google.privacy.dlp.v2.FieldId) LDiversityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult) DlpJob(com.google.privacy.dlp.v2.DlpJob) AnalyzeDataSourceRiskDetails(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails) LDiversityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityEquivalenceClass) TimeoutException(java.util.concurrent.TimeoutException)

Example 7 with Table

use of com.google.privacy.dlp.v2.Table in project java-docs-samples by GoogleCloudPlatform.

the class RiskAnalysis method calculateKMap.

// [END dlp_l_diversity]
// [START dlp_k_map]
/**
 * Calculate k-map risk estimation for an attribute relative to quasi-identifiers in a BigQuery
 * table.
 *
 * @param projectId The Google Cloud Platform project ID to run the API call under.
 * @param datasetId The BigQuery dataset to analyze.
 * @param tableId The BigQuery table to analyze.
 * @param quasiIds A set of column names that form a composite key ('quasi-identifiers').
 * @param infoTypes The infoTypes corresponding to each quasi-id column
 * @param regionCode An ISO-3166-1 region code specifying the k-map distribution region
 * @param topicId The name of the Pub/Sub topic to notify once the job completes
 * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job
 *     completion status.
 */
private static void calculateKMap(String projectId, String datasetId, String tableId, List<String> quasiIds, List<InfoType> infoTypes, String regionCode, String topicId, String subscriptionId) throws Exception {
    // Instantiates a client
    try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
        Iterator<String> quasiIdsIterator = quasiIds.iterator();
        Iterator<InfoType> infoTypesIterator = infoTypes.iterator();
        if (quasiIds.size() != infoTypes.size()) {
            throw new IllegalArgumentException("The numbers of quasi-IDs and infoTypes must be equal!");
        }
        ArrayList<TaggedField> taggedFields = new ArrayList();
        while (quasiIdsIterator.hasNext() || infoTypesIterator.hasNext()) {
            taggedFields.add(TaggedField.newBuilder().setField(FieldId.newBuilder().setName(quasiIdsIterator.next()).build()).setInfoType(infoTypesIterator.next()).build());
        }
        KMapEstimationConfig kmapConfig = KMapEstimationConfig.newBuilder().addAllQuasiIds(taggedFields).setRegionCode(regionCode).build();
        BigQueryTable bigQueryTable = BigQueryTable.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build();
        PrivacyMetric privacyMetric = PrivacyMetric.newBuilder().setKMapEstimationConfig(kmapConfig).build();
        String topicName = String.format("projects/%s/topics/%s", projectId, topicId);
        PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build();
        // Create action to publish job status notifications over Google Cloud Pub/Sub
        Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
        RiskAnalysisJobConfig riskAnalysisJobConfig = RiskAnalysisJobConfig.newBuilder().setSourceTable(bigQueryTable).setPrivacyMetric(privacyMetric).addActions(action).build();
        CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setRiskJob(riskAnalysisJobConfig).build();
        DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
        String dlpJobName = dlpJob.getName();
        final SettableApiFuture<Boolean> done = SettableApiFuture.create();
        // Set up a Pub/Sub subscriber to listen on the job completion status
        Subscriber subscriber = Subscriber.newBuilder(ProjectSubscriptionName.newBuilder().setProject(projectId).setSubscription(subscriptionId).build(), (pubsubMessage, ackReplyConsumer) -> {
            if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) {
                // notify job completion
                done.set(true);
                ackReplyConsumer.ack();
            }
        }).build();
        subscriber.startAsync();
        // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
        try {
            done.get(1, TimeUnit.MINUTES);
            // Wait for the job to become available
            Thread.sleep(500);
        } catch (TimeoutException e) {
            System.out.println("Unable to verify job completion.");
        }
        // retrieve completed job status
        DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build());
        System.out.println("Job status: " + completedJob.getState());
        AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails();
        KMapEstimationResult kmapResult = riskDetails.getKMapEstimationResult();
        for (KMapEstimationHistogramBucket result : kmapResult.getKMapEstimationHistogramList()) {
            System.out.printf("\tAnonymity range: [%d, %d]\n", result.getMinAnonymity(), result.getMaxAnonymity());
            System.out.printf("\tSize: %d\n", result.getBucketSize());
            for (KMapEstimationQuasiIdValues valueBucket : result.getBucketValuesList()) {
                String quasiIdValues = valueBucket.getQuasiIdsValuesList().stream().map(v -> {
                    String s = v.toString();
                    return s.substring(s.indexOf(':') + 1).trim();
                }).collect(Collectors.joining(", "));
                System.out.printf("\tValues: {%s}\n", quasiIdValues);
                System.out.printf("\tEstimated k-map anonymity: %d\n", valueBucket.getEstimatedAnonymity());
            }
        }
    } catch (Exception e) {
        System.out.println("Error in calculateKMap: " + e.getMessage());
    }
}
Also used : Arrays(java.util.Arrays) TimeoutException(java.util.concurrent.TimeoutException) Subscriber(com.google.cloud.pubsub.v1.Subscriber) DefaultParser(org.apache.commons.cli.DefaultParser) KMapEstimationHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationHistogramBucket) LDiversityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityEquivalenceClass) ValueFrequency(com.google.privacy.dlp.v2.ValueFrequency) LDiversityConfig(com.google.privacy.dlp.v2.PrivacyMetric.LDiversityConfig) NumericalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.NumericalStatsConfig) Action(com.google.privacy.dlp.v2.Action) KMapEstimationConfig(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig) CategoricalStatsHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.CategoricalStatsResult.CategoricalStatsHistogramBucket) KAnonymityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityEquivalenceClass) Value(com.google.privacy.dlp.v2.Value) TaggedField(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig.TaggedField) RiskAnalysisJobConfig(com.google.privacy.dlp.v2.RiskAnalysisJobConfig) Collectors(java.util.stream.Collectors) SettableApiFuture(com.google.api.core.SettableApiFuture) List(java.util.List) KAnonymityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult) ParseException(org.apache.commons.cli.ParseException) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) ProjectSubscriptionName(com.google.pubsub.v1.ProjectSubscriptionName) AnalyzeDataSourceRiskDetails(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails) Options(org.apache.commons.cli.Options) HelpFormatter(org.apache.commons.cli.HelpFormatter) CategoricalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.CategoricalStatsConfig) ArrayList(java.util.ArrayList) ServiceOptions(com.google.cloud.ServiceOptions) CommandLine(org.apache.commons.cli.CommandLine) FieldId(com.google.privacy.dlp.v2.FieldId) ProjectTopicName(com.google.pubsub.v1.ProjectTopicName) Option(org.apache.commons.cli.Option) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) Iterator(java.util.Iterator) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) CommandLineParser(org.apache.commons.cli.CommandLineParser) KMapEstimationResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult) KMapEstimationQuasiIdValues(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationQuasiIdValues) InfoType(com.google.privacy.dlp.v2.InfoType) LDiversityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult) KAnonymityConfig(com.google.privacy.dlp.v2.PrivacyMetric.KAnonymityConfig) TimeUnit(java.util.concurrent.TimeUnit) PublishToPubSub(com.google.privacy.dlp.v2.Action.PublishToPubSub) ProjectName(com.google.privacy.dlp.v2.ProjectName) GetDlpJobRequest(com.google.privacy.dlp.v2.GetDlpJobRequest) LDiversityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityHistogramBucket) KAnonymityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityHistogramBucket) PrivacyMetric(com.google.privacy.dlp.v2.PrivacyMetric) OptionGroup(org.apache.commons.cli.OptionGroup) DlpJob(com.google.privacy.dlp.v2.DlpJob) Collections(java.util.Collections) Action(com.google.privacy.dlp.v2.Action) ArrayList(java.util.ArrayList) TaggedField(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig.TaggedField) PrivacyMetric(com.google.privacy.dlp.v2.PrivacyMetric) PublishToPubSub(com.google.privacy.dlp.v2.Action.PublishToPubSub) Subscriber(com.google.cloud.pubsub.v1.Subscriber) AnalyzeDataSourceRiskDetails(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails) InfoType(com.google.privacy.dlp.v2.InfoType) TimeoutException(java.util.concurrent.TimeoutException) KMapEstimationQuasiIdValues(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationQuasiIdValues) KMapEstimationConfig(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig) RiskAnalysisJobConfig(com.google.privacy.dlp.v2.RiskAnalysisJobConfig) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) KMapEstimationHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationHistogramBucket) TimeoutException(java.util.concurrent.TimeoutException) ParseException(org.apache.commons.cli.ParseException) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) DlpJob(com.google.privacy.dlp.v2.DlpJob) KMapEstimationResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult)

Example 8 with Table

use of com.google.privacy.dlp.v2.Table in project java-docs-samples by GoogleCloudPlatform.

the class RiskAnalysis method calculateKAnonymity.

// [END dlp_categorical_stats]
// [START dlp_k_anonymity]
/**
 * Calculate k-anonymity for quasi-identifiers in a BigQuery table using the DLP API.
 *
 * @param projectId The Google Cloud Platform project ID to run the API call under.
 * @param datasetId The BigQuery dataset to analyze.
 * @param tableId The BigQuery table to analyze.
 * @param quasiIds The names of columns that form a composite key ('quasi-identifiers').
 * @param topicId The name of the Pub/Sub topic to notify once the job completes
 * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job
 *     completion status.
 */
private static void calculateKAnonymity(String projectId, String datasetId, String tableId, List<String> quasiIds, String topicId, String subscriptionId) throws Exception {
    // Instantiates a client
    try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
        List<FieldId> quasiIdFields = quasiIds.stream().map(columnName -> FieldId.newBuilder().setName(columnName).build()).collect(Collectors.toList());
        KAnonymityConfig kanonymityConfig = KAnonymityConfig.newBuilder().addAllQuasiIds(quasiIdFields).build();
        BigQueryTable bigQueryTable = BigQueryTable.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build();
        PrivacyMetric privacyMetric = PrivacyMetric.newBuilder().setKAnonymityConfig(kanonymityConfig).build();
        String topicName = String.format("projects/%s/topics/%s", projectId, topicId);
        PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build();
        // Create action to publish job status notifications over Google Cloud Pub/Sub
        Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
        RiskAnalysisJobConfig riskAnalysisJobConfig = RiskAnalysisJobConfig.newBuilder().setSourceTable(bigQueryTable).setPrivacyMetric(privacyMetric).addActions(action).build();
        CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setRiskJob(riskAnalysisJobConfig).build();
        DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
        String dlpJobName = dlpJob.getName();
        final SettableApiFuture<Boolean> done = SettableApiFuture.create();
        // Set up a Pub/Sub subscriber to listen on the job completion status
        Subscriber subscriber = Subscriber.newBuilder(ProjectSubscriptionName.newBuilder().setProject(projectId).setSubscription(subscriptionId).build(), (pubsubMessage, ackReplyConsumer) -> {
            if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) {
                // notify job completion
                done.set(true);
                ackReplyConsumer.ack();
            }
        }).build();
        subscriber.startAsync();
        // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
        try {
            done.get(1, TimeUnit.MINUTES);
            // Wait for the job to become available
            Thread.sleep(500);
        } catch (TimeoutException e) {
            System.out.println("Unable to verify job completion.");
        }
        // Retrieve completed job status
        DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build());
        System.out.println("Job status: " + completedJob.getState());
        AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails();
        KAnonymityResult kanonymityResult = riskDetails.getKAnonymityResult();
        for (KAnonymityHistogramBucket result : kanonymityResult.getEquivalenceClassHistogramBucketsList()) {
            System.out.printf("Bucket size range: [%d, %d]\n", result.getEquivalenceClassSizeLowerBound(), result.getEquivalenceClassSizeUpperBound());
            for (KAnonymityEquivalenceClass bucket : result.getBucketValuesList()) {
                List<String> quasiIdValues = bucket.getQuasiIdsValuesList().stream().map(v -> v.toString()).collect(Collectors.toList());
                System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues));
                System.out.println("\tClass size: " + bucket.getEquivalenceClassSize());
            }
        }
    } catch (Exception e) {
        System.out.println("Error in calculateKAnonymity: " + e.getMessage());
    }
}
Also used : Arrays(java.util.Arrays) TimeoutException(java.util.concurrent.TimeoutException) Subscriber(com.google.cloud.pubsub.v1.Subscriber) DefaultParser(org.apache.commons.cli.DefaultParser) KMapEstimationHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationHistogramBucket) LDiversityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityEquivalenceClass) ValueFrequency(com.google.privacy.dlp.v2.ValueFrequency) LDiversityConfig(com.google.privacy.dlp.v2.PrivacyMetric.LDiversityConfig) NumericalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.NumericalStatsConfig) Action(com.google.privacy.dlp.v2.Action) KMapEstimationConfig(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig) CategoricalStatsHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.CategoricalStatsResult.CategoricalStatsHistogramBucket) KAnonymityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityEquivalenceClass) Value(com.google.privacy.dlp.v2.Value) TaggedField(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig.TaggedField) RiskAnalysisJobConfig(com.google.privacy.dlp.v2.RiskAnalysisJobConfig) Collectors(java.util.stream.Collectors) SettableApiFuture(com.google.api.core.SettableApiFuture) List(java.util.List) KAnonymityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult) ParseException(org.apache.commons.cli.ParseException) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) ProjectSubscriptionName(com.google.pubsub.v1.ProjectSubscriptionName) AnalyzeDataSourceRiskDetails(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails) Options(org.apache.commons.cli.Options) HelpFormatter(org.apache.commons.cli.HelpFormatter) CategoricalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.CategoricalStatsConfig) ArrayList(java.util.ArrayList) ServiceOptions(com.google.cloud.ServiceOptions) CommandLine(org.apache.commons.cli.CommandLine) FieldId(com.google.privacy.dlp.v2.FieldId) ProjectTopicName(com.google.pubsub.v1.ProjectTopicName) Option(org.apache.commons.cli.Option) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) Iterator(java.util.Iterator) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) CommandLineParser(org.apache.commons.cli.CommandLineParser) KMapEstimationResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult) KMapEstimationQuasiIdValues(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationQuasiIdValues) InfoType(com.google.privacy.dlp.v2.InfoType) LDiversityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult) KAnonymityConfig(com.google.privacy.dlp.v2.PrivacyMetric.KAnonymityConfig) TimeUnit(java.util.concurrent.TimeUnit) PublishToPubSub(com.google.privacy.dlp.v2.Action.PublishToPubSub) ProjectName(com.google.privacy.dlp.v2.ProjectName) GetDlpJobRequest(com.google.privacy.dlp.v2.GetDlpJobRequest) LDiversityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityHistogramBucket) KAnonymityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityHistogramBucket) PrivacyMetric(com.google.privacy.dlp.v2.PrivacyMetric) OptionGroup(org.apache.commons.cli.OptionGroup) DlpJob(com.google.privacy.dlp.v2.DlpJob) Collections(java.util.Collections) KAnonymityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityHistogramBucket) Action(com.google.privacy.dlp.v2.Action) RiskAnalysisJobConfig(com.google.privacy.dlp.v2.RiskAnalysisJobConfig) PrivacyMetric(com.google.privacy.dlp.v2.PrivacyMetric) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) TimeoutException(java.util.concurrent.TimeoutException) ParseException(org.apache.commons.cli.ParseException) KAnonymityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityEquivalenceClass) KAnonymityConfig(com.google.privacy.dlp.v2.PrivacyMetric.KAnonymityConfig) PublishToPubSub(com.google.privacy.dlp.v2.Action.PublishToPubSub) Subscriber(com.google.cloud.pubsub.v1.Subscriber) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) FieldId(com.google.privacy.dlp.v2.FieldId) DlpJob(com.google.privacy.dlp.v2.DlpJob) AnalyzeDataSourceRiskDetails(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails) TimeoutException(java.util.concurrent.TimeoutException) KAnonymityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult)

Example 9 with Table

use of com.google.privacy.dlp.v2.Table in project java-docs-samples by GoogleCloudPlatform.

the class RiskAnalysis method numericalStatsAnalysis.

// [START dlp_numerical_stats]
/**
 * Calculate numerical statistics for a column in a BigQuery table using the DLP API.
 *
 * @param projectId The Google Cloud Platform project ID to run the API call under.
 * @param datasetId The BigQuery dataset to analyze.
 * @param tableId The BigQuery table to analyze.
 * @param columnName The name of the column to analyze, which must contain only numerical data.
 * @param topicId The name of the Pub/Sub topic to notify once the job completes
 * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job
 *     completion status.
 */
private static void numericalStatsAnalysis(String projectId, String datasetId, String tableId, String columnName, String topicId, String subscriptionId) throws Exception {
    // Instantiates a client
    try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
        BigQueryTable bigQueryTable = BigQueryTable.newBuilder().setTableId(tableId).setDatasetId(datasetId).setProjectId(projectId).build();
        FieldId fieldId = FieldId.newBuilder().setName(columnName).build();
        NumericalStatsConfig numericalStatsConfig = NumericalStatsConfig.newBuilder().setField(fieldId).build();
        PrivacyMetric privacyMetric = PrivacyMetric.newBuilder().setNumericalStatsConfig(numericalStatsConfig).build();
        String topicName = String.format("projects/%s/topics/%s", projectId, topicId);
        PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build();
        // Create action to publish job status notifications over Google Cloud Pub/Sub
        Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
        RiskAnalysisJobConfig riskAnalysisJobConfig = RiskAnalysisJobConfig.newBuilder().setSourceTable(bigQueryTable).setPrivacyMetric(privacyMetric).addActions(action).build();
        CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setRiskJob(riskAnalysisJobConfig).build();
        DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
        String dlpJobName = dlpJob.getName();
        final SettableApiFuture<Boolean> done = SettableApiFuture.create();
        // Set up a Pub/Sub subscriber to listen on the job completion status
        Subscriber subscriber = Subscriber.newBuilder(ProjectSubscriptionName.newBuilder().setProject(projectId).setSubscription(subscriptionId).build(), (pubsubMessage, ackReplyConsumer) -> {
            if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) {
                // notify job completion
                done.set(true);
                ackReplyConsumer.ack();
            }
        }).build();
        subscriber.startAsync();
        // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
        try {
            done.get(1, TimeUnit.MINUTES);
            // Wait for the job to become available
            Thread.sleep(500);
        } catch (TimeoutException e) {
            System.out.println("Unable to verify job completion.");
        }
        // Retrieve completed job status
        DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build());
        System.out.println("Job status: " + completedJob.getState());
        AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails();
        AnalyzeDataSourceRiskDetails.NumericalStatsResult result = riskDetails.getNumericalStatsResult();
        System.out.printf("Value range : [%.3f, %.3f]\n", result.getMinValue().getFloatValue(), result.getMaxValue().getFloatValue());
        int percent = 1;
        Double lastValue = null;
        for (Value quantileValue : result.getQuantileValuesList()) {
            Double currentValue = quantileValue.getFloatValue();
            if (lastValue == null || !lastValue.equals(currentValue)) {
                System.out.printf("Value at %s %% quantile : %.3f", percent, currentValue);
            }
            lastValue = currentValue;
        }
    } catch (Exception e) {
        System.out.println("Error in categoricalStatsAnalysis: " + e.getMessage());
    }
}
Also used : Arrays(java.util.Arrays) TimeoutException(java.util.concurrent.TimeoutException) Subscriber(com.google.cloud.pubsub.v1.Subscriber) DefaultParser(org.apache.commons.cli.DefaultParser) KMapEstimationHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationHistogramBucket) LDiversityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityEquivalenceClass) ValueFrequency(com.google.privacy.dlp.v2.ValueFrequency) LDiversityConfig(com.google.privacy.dlp.v2.PrivacyMetric.LDiversityConfig) NumericalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.NumericalStatsConfig) Action(com.google.privacy.dlp.v2.Action) KMapEstimationConfig(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig) CategoricalStatsHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.CategoricalStatsResult.CategoricalStatsHistogramBucket) KAnonymityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityEquivalenceClass) Value(com.google.privacy.dlp.v2.Value) TaggedField(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig.TaggedField) RiskAnalysisJobConfig(com.google.privacy.dlp.v2.RiskAnalysisJobConfig) Collectors(java.util.stream.Collectors) SettableApiFuture(com.google.api.core.SettableApiFuture) List(java.util.List) KAnonymityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult) ParseException(org.apache.commons.cli.ParseException) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) ProjectSubscriptionName(com.google.pubsub.v1.ProjectSubscriptionName) AnalyzeDataSourceRiskDetails(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails) Options(org.apache.commons.cli.Options) HelpFormatter(org.apache.commons.cli.HelpFormatter) CategoricalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.CategoricalStatsConfig) ArrayList(java.util.ArrayList) ServiceOptions(com.google.cloud.ServiceOptions) CommandLine(org.apache.commons.cli.CommandLine) FieldId(com.google.privacy.dlp.v2.FieldId) ProjectTopicName(com.google.pubsub.v1.ProjectTopicName) Option(org.apache.commons.cli.Option) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) Iterator(java.util.Iterator) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) CommandLineParser(org.apache.commons.cli.CommandLineParser) KMapEstimationResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult) KMapEstimationQuasiIdValues(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationQuasiIdValues) InfoType(com.google.privacy.dlp.v2.InfoType) LDiversityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult) KAnonymityConfig(com.google.privacy.dlp.v2.PrivacyMetric.KAnonymityConfig) TimeUnit(java.util.concurrent.TimeUnit) PublishToPubSub(com.google.privacy.dlp.v2.Action.PublishToPubSub) ProjectName(com.google.privacy.dlp.v2.ProjectName) GetDlpJobRequest(com.google.privacy.dlp.v2.GetDlpJobRequest) LDiversityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityHistogramBucket) KAnonymityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityHistogramBucket) PrivacyMetric(com.google.privacy.dlp.v2.PrivacyMetric) OptionGroup(org.apache.commons.cli.OptionGroup) DlpJob(com.google.privacy.dlp.v2.DlpJob) Collections(java.util.Collections) Action(com.google.privacy.dlp.v2.Action) RiskAnalysisJobConfig(com.google.privacy.dlp.v2.RiskAnalysisJobConfig) PrivacyMetric(com.google.privacy.dlp.v2.PrivacyMetric) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) TimeoutException(java.util.concurrent.TimeoutException) ParseException(org.apache.commons.cli.ParseException) PublishToPubSub(com.google.privacy.dlp.v2.Action.PublishToPubSub) Subscriber(com.google.cloud.pubsub.v1.Subscriber) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) FieldId(com.google.privacy.dlp.v2.FieldId) Value(com.google.privacy.dlp.v2.Value) DlpJob(com.google.privacy.dlp.v2.DlpJob) AnalyzeDataSourceRiskDetails(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails) NumericalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.NumericalStatsConfig) TimeoutException(java.util.concurrent.TimeoutException)

Aggregations

ServiceOptions (com.google.cloud.ServiceOptions)8 InfoType (com.google.privacy.dlp.v2.InfoType)8 DlpServiceClient (com.google.cloud.dlp.v2.DlpServiceClient)7 ProjectName (com.google.privacy.dlp.v2.ProjectName)7 ArrayList (java.util.ArrayList)7 SettableApiFuture (com.google.api.core.SettableApiFuture)6 Subscriber (com.google.cloud.pubsub.v1.Subscriber)6 Action (com.google.privacy.dlp.v2.Action)6 BigQueryTable (com.google.privacy.dlp.v2.BigQueryTable)6 CreateDlpJobRequest (com.google.privacy.dlp.v2.CreateDlpJobRequest)6 DlpJob (com.google.privacy.dlp.v2.DlpJob)6 FieldId (com.google.privacy.dlp.v2.FieldId)6 GetDlpJobRequest (com.google.privacy.dlp.v2.GetDlpJobRequest)6 Value (com.google.privacy.dlp.v2.Value)6 ProjectSubscriptionName (com.google.pubsub.v1.ProjectSubscriptionName)6 ProjectTopicName (com.google.pubsub.v1.ProjectTopicName)6 Arrays (java.util.Arrays)6 Collections (java.util.Collections)6 CommandLine (org.apache.commons.cli.CommandLine)6 CommandLineParser (org.apache.commons.cli.CommandLineParser)6