Search in sources :

Example 6 with BigQueryTable

use of com.google.privacy.dlp.v2.BigQueryTable in project java-docs-samples by GoogleCloudPlatform.

the class RiskAnalysis method numericalStatsAnalysis.

// [START dlp_numerical_stats]
/**
 * Calculate numerical statistics for a column in a BigQuery table using the DLP API.
 *
 * @param projectId The Google Cloud Platform project ID to run the API call under.
 * @param datasetId The BigQuery dataset to analyze.
 * @param tableId The BigQuery table to analyze.
 * @param columnName The name of the column to analyze, which must contain only numerical data.
 * @param topicId The name of the Pub/Sub topic to notify once the job completes
 * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job
 *     completion status.
 */
private static void numericalStatsAnalysis(String projectId, String datasetId, String tableId, String columnName, String topicId, String subscriptionId) throws Exception {
    // Instantiates a client
    try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
        BigQueryTable bigQueryTable = BigQueryTable.newBuilder().setTableId(tableId).setDatasetId(datasetId).setProjectId(projectId).build();
        FieldId fieldId = FieldId.newBuilder().setName(columnName).build();
        NumericalStatsConfig numericalStatsConfig = NumericalStatsConfig.newBuilder().setField(fieldId).build();
        PrivacyMetric privacyMetric = PrivacyMetric.newBuilder().setNumericalStatsConfig(numericalStatsConfig).build();
        String topicName = String.format("projects/%s/topics/%s", projectId, topicId);
        PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build();
        // Create action to publish job status notifications over Google Cloud Pub/Sub
        Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
        RiskAnalysisJobConfig riskAnalysisJobConfig = RiskAnalysisJobConfig.newBuilder().setSourceTable(bigQueryTable).setPrivacyMetric(privacyMetric).addActions(action).build();
        CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setRiskJob(riskAnalysisJobConfig).build();
        DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
        String dlpJobName = dlpJob.getName();
        final SettableApiFuture<Boolean> done = SettableApiFuture.create();
        // Set up a Pub/Sub subscriber to listen on the job completion status
        Subscriber subscriber = Subscriber.newBuilder(ProjectSubscriptionName.newBuilder().setProject(projectId).setSubscription(subscriptionId).build(), (pubsubMessage, ackReplyConsumer) -> {
            if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) {
                // notify job completion
                done.set(true);
                ackReplyConsumer.ack();
            }
        }).build();
        subscriber.startAsync();
        // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
        try {
            done.get(1, TimeUnit.MINUTES);
            // Wait for the job to become available
            Thread.sleep(500);
        } catch (TimeoutException e) {
            System.out.println("Unable to verify job completion.");
        }
        // Retrieve completed job status
        DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build());
        System.out.println("Job status: " + completedJob.getState());
        AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails();
        AnalyzeDataSourceRiskDetails.NumericalStatsResult result = riskDetails.getNumericalStatsResult();
        System.out.printf("Value range : [%.3f, %.3f]\n", result.getMinValue().getFloatValue(), result.getMaxValue().getFloatValue());
        int percent = 1;
        Double lastValue = null;
        for (Value quantileValue : result.getQuantileValuesList()) {
            Double currentValue = quantileValue.getFloatValue();
            if (lastValue == null || !lastValue.equals(currentValue)) {
                System.out.printf("Value at %s %% quantile : %.3f", percent, currentValue);
            }
            lastValue = currentValue;
        }
    } catch (Exception e) {
        System.out.println("Error in categoricalStatsAnalysis: " + e.getMessage());
    }
}
Also used : Arrays(java.util.Arrays) TimeoutException(java.util.concurrent.TimeoutException) Subscriber(com.google.cloud.pubsub.v1.Subscriber) DefaultParser(org.apache.commons.cli.DefaultParser) KMapEstimationHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationHistogramBucket) LDiversityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityEquivalenceClass) ValueFrequency(com.google.privacy.dlp.v2.ValueFrequency) LDiversityConfig(com.google.privacy.dlp.v2.PrivacyMetric.LDiversityConfig) NumericalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.NumericalStatsConfig) Action(com.google.privacy.dlp.v2.Action) KMapEstimationConfig(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig) CategoricalStatsHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.CategoricalStatsResult.CategoricalStatsHistogramBucket) KAnonymityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityEquivalenceClass) Value(com.google.privacy.dlp.v2.Value) TaggedField(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig.TaggedField) RiskAnalysisJobConfig(com.google.privacy.dlp.v2.RiskAnalysisJobConfig) Collectors(java.util.stream.Collectors) SettableApiFuture(com.google.api.core.SettableApiFuture) List(java.util.List) KAnonymityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult) ParseException(org.apache.commons.cli.ParseException) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) ProjectSubscriptionName(com.google.pubsub.v1.ProjectSubscriptionName) AnalyzeDataSourceRiskDetails(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails) Options(org.apache.commons.cli.Options) HelpFormatter(org.apache.commons.cli.HelpFormatter) CategoricalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.CategoricalStatsConfig) ArrayList(java.util.ArrayList) ServiceOptions(com.google.cloud.ServiceOptions) CommandLine(org.apache.commons.cli.CommandLine) FieldId(com.google.privacy.dlp.v2.FieldId) ProjectTopicName(com.google.pubsub.v1.ProjectTopicName) Option(org.apache.commons.cli.Option) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) Iterator(java.util.Iterator) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) CommandLineParser(org.apache.commons.cli.CommandLineParser) KMapEstimationResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult) KMapEstimationQuasiIdValues(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationQuasiIdValues) InfoType(com.google.privacy.dlp.v2.InfoType) LDiversityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult) KAnonymityConfig(com.google.privacy.dlp.v2.PrivacyMetric.KAnonymityConfig) TimeUnit(java.util.concurrent.TimeUnit) PublishToPubSub(com.google.privacy.dlp.v2.Action.PublishToPubSub) ProjectName(com.google.privacy.dlp.v2.ProjectName) GetDlpJobRequest(com.google.privacy.dlp.v2.GetDlpJobRequest) LDiversityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityHistogramBucket) KAnonymityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityHistogramBucket) PrivacyMetric(com.google.privacy.dlp.v2.PrivacyMetric) OptionGroup(org.apache.commons.cli.OptionGroup) DlpJob(com.google.privacy.dlp.v2.DlpJob) Collections(java.util.Collections) Action(com.google.privacy.dlp.v2.Action) RiskAnalysisJobConfig(com.google.privacy.dlp.v2.RiskAnalysisJobConfig) PrivacyMetric(com.google.privacy.dlp.v2.PrivacyMetric) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) TimeoutException(java.util.concurrent.TimeoutException) ParseException(org.apache.commons.cli.ParseException) PublishToPubSub(com.google.privacy.dlp.v2.Action.PublishToPubSub) Subscriber(com.google.cloud.pubsub.v1.Subscriber) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) FieldId(com.google.privacy.dlp.v2.FieldId) Value(com.google.privacy.dlp.v2.Value) DlpJob(com.google.privacy.dlp.v2.DlpJob) AnalyzeDataSourceRiskDetails(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails) NumericalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.NumericalStatsConfig) TimeoutException(java.util.concurrent.TimeoutException)

Aggregations

SettableApiFuture (com.google.api.core.SettableApiFuture)6 ServiceOptions (com.google.cloud.ServiceOptions)6 DlpServiceClient (com.google.cloud.dlp.v2.DlpServiceClient)6 Subscriber (com.google.cloud.pubsub.v1.Subscriber)6 Action (com.google.privacy.dlp.v2.Action)6 BigQueryTable (com.google.privacy.dlp.v2.BigQueryTable)6 CreateDlpJobRequest (com.google.privacy.dlp.v2.CreateDlpJobRequest)6 DlpJob (com.google.privacy.dlp.v2.DlpJob)6 GetDlpJobRequest (com.google.privacy.dlp.v2.GetDlpJobRequest)6 InfoType (com.google.privacy.dlp.v2.InfoType)6 ProjectName (com.google.privacy.dlp.v2.ProjectName)6 ProjectSubscriptionName (com.google.pubsub.v1.ProjectSubscriptionName)6 ProjectTopicName (com.google.pubsub.v1.ProjectTopicName)6 ArrayList (java.util.ArrayList)6 Collections (java.util.Collections)6 PublishToPubSub (com.google.privacy.dlp.v2.Action.PublishToPubSub)5 AnalyzeDataSourceRiskDetails (com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails)5 CategoricalStatsHistogramBucket (com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.CategoricalStatsResult.CategoricalStatsHistogramBucket)5 KAnonymityResult (com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult)5 KAnonymityEquivalenceClass (com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityEquivalenceClass)5