Search in sources :

Example 6 with Subscriber

use of com.google.cloud.pubsub.v1.Subscriber in project spring-cloud-gcp by spring-cloud.

the class PubSubTemplate method pull.

/**
 * Pulls messages synchronously, on demand, using the pull request in argument.
 *
 * <p>This method acknowledges all received messages.
 * @param pullRequest pull request containing the subscription name
 * @return the list of {@link PubsubMessage} containing the headers and payload
 */
private List<PubsubMessage> pull(PullRequest pullRequest, RetrySettings retrySettings) {
    Assert.notNull(pullRequest, "The pull request cannot be null.");
    try {
        SubscriberStub subscriber = this.subscriberFactory.createSubscriberStub(retrySettings);
        Assert.notNull(subscriber, "A SubscriberStub is needed to execute the pull request.");
        PullResponse pullResponse = subscriber.pullCallable().call(pullRequest);
        // Ack received messages.
        if (pullResponse.getReceivedMessagesCount() > 0) {
            List<String> ackIds = pullResponse.getReceivedMessagesList().stream().map(ReceivedMessage::getAckId).collect(Collectors.toList());
            AcknowledgeRequest acknowledgeRequest = AcknowledgeRequest.newBuilder().setSubscription(pullRequest.getSubscription()).addAllAckIds(ackIds).build();
            subscriber.acknowledgeCallable().call(acknowledgeRequest);
        }
        return pullResponse.getReceivedMessagesList().stream().map(ReceivedMessage::getMessage).collect(Collectors.toList());
    } catch (Exception ioe) {
        throw new PubSubException("Error pulling messages from subscription " + pullRequest.getSubscription() + ".", ioe);
    }
}
Also used : PullResponse(com.google.pubsub.v1.PullResponse) AcknowledgeRequest(com.google.pubsub.v1.AcknowledgeRequest) SubscriberStub(com.google.cloud.pubsub.v1.stub.SubscriberStub) ByteString(com.google.protobuf.ByteString)

Example 7 with Subscriber

use of com.google.cloud.pubsub.v1.Subscriber in project java-docs-samples by GoogleCloudPlatform.

the class Inspect method inspectDatastore.

// [END dlp_inspect_gcs]
// [START dlp_inspect_datastore]
/**
 * Inspect a Datastore kind
 *
 * @param projectId The project ID containing the target Datastore
 * @param namespaceId The ID namespace of the Datastore document to inspect
 * @param kind The kind of the Datastore entity to inspect
 * @param minLikelihood The minimum likelihood required before returning a match
 * @param infoTypes The infoTypes of information to match
 * @param maxFindings max number of findings
 * @param topicId Google Cloud Pub/Sub topic to notify job status updates
 * @param subscriptionId Google Cloud Pub/Sub subscription to above topic to receive status
 *     updates
 */
private static void inspectDatastore(String projectId, String namespaceId, String kind, Likelihood minLikelihood, List<InfoType> infoTypes, int maxFindings, String topicId, String subscriptionId) {
    // Instantiates a client
    try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
        // Reference to the Datastore namespace
        PartitionId partitionId = PartitionId.newBuilder().setProjectId(projectId).setNamespaceId(namespaceId).build();
        // Reference to the Datastore kind
        KindExpression kindExpression = KindExpression.newBuilder().setName(kind).build();
        DatastoreOptions datastoreOptions = DatastoreOptions.newBuilder().setKind(kindExpression).setPartitionId(partitionId).build();
        // Construct Datastore configuration to be inspected
        StorageConfig storageConfig = StorageConfig.newBuilder().setDatastoreOptions(datastoreOptions).build();
        FindingLimits findingLimits = FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build();
        InspectConfig inspectConfig = InspectConfig.newBuilder().addAllInfoTypes(infoTypes).setMinLikelihood(minLikelihood).setLimits(findingLimits).build();
        String pubSubTopic = String.format("projects/%s/topics/%s", projectId, topicId);
        Action.PublishToPubSub publishToPubSub = Action.PublishToPubSub.newBuilder().setTopic(pubSubTopic).build();
        Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
        InspectJobConfig inspectJobConfig = InspectJobConfig.newBuilder().setStorageConfig(storageConfig).setInspectConfig(inspectConfig).addActions(action).build();
        // Asynchronously submit an inspect job, and wait on results
        CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setInspectJob(inspectJobConfig).build();
        DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
        System.out.println("Job created with ID:" + dlpJob.getName());
        final SettableApiFuture<Boolean> done = SettableApiFuture.create();
        // Set up a Pub/Sub subscriber to listen on the job completion status
        Subscriber subscriber = Subscriber.newBuilder(ProjectSubscriptionName.of(projectId, subscriptionId), (pubsubMessage, ackReplyConsumer) -> {
            if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJob.getName())) {
                // notify job completion
                done.set(true);
                ackReplyConsumer.ack();
            }
        }).build();
        subscriber.startAsync();
        // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
        try {
            done.get(1, TimeUnit.MINUTES);
            // Wait for the job to become available
            Thread.sleep(500);
        } catch (Exception e) {
            System.out.println("Unable to verify job completion.");
        }
        DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build());
        System.out.println("Job status: " + completedJob.getState());
        InspectDataSourceDetails inspectDataSourceDetails = completedJob.getInspectDetails();
        InspectDataSourceDetails.Result result = inspectDataSourceDetails.getResult();
        if (result.getInfoTypeStatsCount() > 0) {
            System.out.println("Findings: ");
            for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) {
                System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName());
                System.out.println("\tCount: " + infoTypeStat.getCount());
            }
        } else {
            System.out.println("No findings.");
        }
    } catch (Exception e) {
        System.out.println("inspectDatastore Problems: " + e.getMessage());
    }
}
Also used : ByteContentItem(com.google.privacy.dlp.v2.ByteContentItem) InspectResult(com.google.privacy.dlp.v2.InspectResult) KindExpression(com.google.privacy.dlp.v2.KindExpression) Options(org.apache.commons.cli.Options) Likelihood(com.google.privacy.dlp.v2.Likelihood) Subscriber(com.google.cloud.pubsub.v1.Subscriber) BigQueryOptions(com.google.privacy.dlp.v2.BigQueryOptions) HelpFormatter(org.apache.commons.cli.HelpFormatter) MimetypesFileTypeMap(javax.activation.MimetypesFileTypeMap) CloudStorageOptions(com.google.privacy.dlp.v2.CloudStorageOptions) ArrayList(java.util.ArrayList) DefaultParser(org.apache.commons.cli.DefaultParser) InspectDataSourceDetails(com.google.privacy.dlp.v2.InspectDataSourceDetails) FindingLimits(com.google.privacy.dlp.v2.InspectConfig.FindingLimits) InspectContentResponse(com.google.privacy.dlp.v2.InspectContentResponse) ServiceOptions(com.google.cloud.ServiceOptions) URLConnection(java.net.URLConnection) StorageConfig(com.google.privacy.dlp.v2.StorageConfig) CommandLine(org.apache.commons.cli.CommandLine) PartitionId(com.google.privacy.dlp.v2.PartitionId) Action(com.google.privacy.dlp.v2.Action) DatastoreOptions(com.google.privacy.dlp.v2.DatastoreOptions) ProjectTopicName(com.google.pubsub.v1.ProjectTopicName) Option(org.apache.commons.cli.Option) InspectJobConfig(com.google.privacy.dlp.v2.InspectJobConfig) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) Finding(com.google.privacy.dlp.v2.Finding) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) Files(java.nio.file.Files) CommandLineParser(org.apache.commons.cli.CommandLineParser) ContentItem(com.google.privacy.dlp.v2.ContentItem) InfoType(com.google.privacy.dlp.v2.InfoType) InfoTypeStats(com.google.privacy.dlp.v2.InfoTypeStats) SettableApiFuture(com.google.api.core.SettableApiFuture) ByteString(com.google.protobuf.ByteString) TimeUnit(java.util.concurrent.TimeUnit) InspectConfig(com.google.privacy.dlp.v2.InspectConfig) List(java.util.List) ProjectName(com.google.privacy.dlp.v2.ProjectName) GetDlpJobRequest(com.google.privacy.dlp.v2.GetDlpJobRequest) Paths(java.nio.file.Paths) ParseException(org.apache.commons.cli.ParseException) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) ProjectSubscriptionName(com.google.pubsub.v1.ProjectSubscriptionName) OptionGroup(org.apache.commons.cli.OptionGroup) DlpJob(com.google.privacy.dlp.v2.DlpJob) InspectContentRequest(com.google.privacy.dlp.v2.InspectContentRequest) Collections(java.util.Collections) Action(com.google.privacy.dlp.v2.Action) FindingLimits(com.google.privacy.dlp.v2.InspectConfig.FindingLimits) StorageConfig(com.google.privacy.dlp.v2.StorageConfig) InspectDataSourceDetails(com.google.privacy.dlp.v2.InspectDataSourceDetails) ByteString(com.google.protobuf.ByteString) PartitionId(com.google.privacy.dlp.v2.PartitionId) InspectConfig(com.google.privacy.dlp.v2.InspectConfig) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) ParseException(org.apache.commons.cli.ParseException) InfoTypeStats(com.google.privacy.dlp.v2.InfoTypeStats) Subscriber(com.google.cloud.pubsub.v1.Subscriber) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) DlpJob(com.google.privacy.dlp.v2.DlpJob) DatastoreOptions(com.google.privacy.dlp.v2.DatastoreOptions) KindExpression(com.google.privacy.dlp.v2.KindExpression) InspectJobConfig(com.google.privacy.dlp.v2.InspectJobConfig)

Example 8 with Subscriber

use of com.google.cloud.pubsub.v1.Subscriber in project java-docs-samples by GoogleCloudPlatform.

the class Inspect method inspectBigquery.

// [END dlp_inspect_datastore]
// [START dlp_inspect_bigquery]
/**
 * Inspect a BigQuery table
 *
 * @param projectId The project ID to run the API call under
 * @param datasetId The ID of the dataset to inspect, e.g. 'my_dataset'
 * @param tableId The ID of the table to inspect, e.g. 'my_table'
 * @param minLikelihood The minimum likelihood required before returning a match
 * @param infoTypes The infoTypes of information to match
 * @param maxFindings The maximum number of findings to report (0 = server maximum)
 * @param topicId Topic ID for pubsub.
 * @param subscriptionId Subscription ID for pubsub.
 */
private static void inspectBigquery(String projectId, String datasetId, String tableId, Likelihood minLikelihood, List<InfoType> infoTypes, int maxFindings, String topicId, String subscriptionId) {
    // Instantiates a client
    try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
        // Reference to the BigQuery table
        BigQueryTable tableReference = BigQueryTable.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build();
        BigQueryOptions bigQueryOptions = BigQueryOptions.newBuilder().setTableReference(tableReference).build();
        // Construct BigQuery configuration to be inspected
        StorageConfig storageConfig = StorageConfig.newBuilder().setBigQueryOptions(bigQueryOptions).build();
        FindingLimits findingLimits = FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build();
        InspectConfig inspectConfig = InspectConfig.newBuilder().addAllInfoTypes(infoTypes).setMinLikelihood(minLikelihood).setLimits(findingLimits).build();
        ProjectTopicName topic = ProjectTopicName.of(projectId, topicId);
        Action.PublishToPubSub publishToPubSub = Action.PublishToPubSub.newBuilder().setTopic(topic.toString()).build();
        Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
        InspectJobConfig inspectJobConfig = InspectJobConfig.newBuilder().setStorageConfig(storageConfig).setInspectConfig(inspectConfig).addActions(action).build();
        // Asynchronously submit an inspect job, and wait on results
        CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setInspectJob(inspectJobConfig).build();
        DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
        System.out.println("Job created with ID:" + dlpJob.getName());
        // Wait for job completion semi-synchronously
        // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
        final SettableApiFuture<Boolean> done = SettableApiFuture.create();
        // Set up a Pub/Sub subscriber to listen on the job completion status
        Subscriber subscriber = Subscriber.newBuilder(ProjectSubscriptionName.of(projectId, subscriptionId), (pubsubMessage, ackReplyConsumer) -> {
            if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJob.getName())) {
                // notify job completion
                done.set(true);
                ackReplyConsumer.ack();
            }
        }).build();
        subscriber.startAsync();
        try {
            done.get(1, TimeUnit.MINUTES);
            // Wait for the job to become available
            Thread.sleep(500);
        } catch (Exception e) {
            System.out.println("Unable to verify job completion.");
        }
        DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build());
        System.out.println("Job status: " + completedJob.getState());
        InspectDataSourceDetails inspectDataSourceDetails = completedJob.getInspectDetails();
        InspectDataSourceDetails.Result result = inspectDataSourceDetails.getResult();
        if (result.getInfoTypeStatsCount() > 0) {
            System.out.println("Findings: ");
            for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) {
                System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName());
                System.out.println("\tCount: " + infoTypeStat.getCount());
            }
        } else {
            System.out.println("No findings.");
        }
    } catch (Exception e) {
        System.out.println("inspectBigquery Problems: " + e.getMessage());
    }
}
Also used : ByteContentItem(com.google.privacy.dlp.v2.ByteContentItem) InspectResult(com.google.privacy.dlp.v2.InspectResult) KindExpression(com.google.privacy.dlp.v2.KindExpression) Options(org.apache.commons.cli.Options) Likelihood(com.google.privacy.dlp.v2.Likelihood) Subscriber(com.google.cloud.pubsub.v1.Subscriber) BigQueryOptions(com.google.privacy.dlp.v2.BigQueryOptions) HelpFormatter(org.apache.commons.cli.HelpFormatter) MimetypesFileTypeMap(javax.activation.MimetypesFileTypeMap) CloudStorageOptions(com.google.privacy.dlp.v2.CloudStorageOptions) ArrayList(java.util.ArrayList) DefaultParser(org.apache.commons.cli.DefaultParser) InspectDataSourceDetails(com.google.privacy.dlp.v2.InspectDataSourceDetails) FindingLimits(com.google.privacy.dlp.v2.InspectConfig.FindingLimits) InspectContentResponse(com.google.privacy.dlp.v2.InspectContentResponse) ServiceOptions(com.google.cloud.ServiceOptions) URLConnection(java.net.URLConnection) StorageConfig(com.google.privacy.dlp.v2.StorageConfig) CommandLine(org.apache.commons.cli.CommandLine) PartitionId(com.google.privacy.dlp.v2.PartitionId) Action(com.google.privacy.dlp.v2.Action) DatastoreOptions(com.google.privacy.dlp.v2.DatastoreOptions) ProjectTopicName(com.google.pubsub.v1.ProjectTopicName) Option(org.apache.commons.cli.Option) InspectJobConfig(com.google.privacy.dlp.v2.InspectJobConfig) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) Finding(com.google.privacy.dlp.v2.Finding) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) Files(java.nio.file.Files) CommandLineParser(org.apache.commons.cli.CommandLineParser) ContentItem(com.google.privacy.dlp.v2.ContentItem) InfoType(com.google.privacy.dlp.v2.InfoType) InfoTypeStats(com.google.privacy.dlp.v2.InfoTypeStats) SettableApiFuture(com.google.api.core.SettableApiFuture) ByteString(com.google.protobuf.ByteString) TimeUnit(java.util.concurrent.TimeUnit) InspectConfig(com.google.privacy.dlp.v2.InspectConfig) List(java.util.List) ProjectName(com.google.privacy.dlp.v2.ProjectName) GetDlpJobRequest(com.google.privacy.dlp.v2.GetDlpJobRequest) Paths(java.nio.file.Paths) ParseException(org.apache.commons.cli.ParseException) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) ProjectSubscriptionName(com.google.pubsub.v1.ProjectSubscriptionName) OptionGroup(org.apache.commons.cli.OptionGroup) DlpJob(com.google.privacy.dlp.v2.DlpJob) InspectContentRequest(com.google.privacy.dlp.v2.InspectContentRequest) Collections(java.util.Collections) Action(com.google.privacy.dlp.v2.Action) FindingLimits(com.google.privacy.dlp.v2.InspectConfig.FindingLimits) StorageConfig(com.google.privacy.dlp.v2.StorageConfig) InspectDataSourceDetails(com.google.privacy.dlp.v2.InspectDataSourceDetails) BigQueryOptions(com.google.privacy.dlp.v2.BigQueryOptions) InspectConfig(com.google.privacy.dlp.v2.InspectConfig) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) ParseException(org.apache.commons.cli.ParseException) InfoTypeStats(com.google.privacy.dlp.v2.InfoTypeStats) Subscriber(com.google.cloud.pubsub.v1.Subscriber) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) DlpJob(com.google.privacy.dlp.v2.DlpJob) ProjectTopicName(com.google.pubsub.v1.ProjectTopicName) InspectJobConfig(com.google.privacy.dlp.v2.InspectJobConfig)

Example 9 with Subscriber

use of com.google.cloud.pubsub.v1.Subscriber in project java-docs-samples by GoogleCloudPlatform.

the class RiskAnalysis method categoricalStatsAnalysis.

// [END dlp_numerical_stats]
// [START dlp_categorical_stats]
/**
 * Calculate categorical statistics for a column in a BigQuery table using the DLP API.
 *
 * @param projectId The Google Cloud Platform project ID to run the API call under.
 * @param datasetId The BigQuery dataset to analyze.
 * @param tableId The BigQuery table to analyze.
 * @param columnName The name of the column to analyze, which need not contain numerical data.
 * @param topicId The name of the Pub/Sub topic to notify once the job completes
 * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job
 *     completion status.
 */
private static void categoricalStatsAnalysis(String projectId, String datasetId, String tableId, String columnName, String topicId, String subscriptionId) {
    // Instantiates a client
    try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
        FieldId fieldId = FieldId.newBuilder().setName(columnName).build();
        CategoricalStatsConfig categoricalStatsConfig = CategoricalStatsConfig.newBuilder().setField(fieldId).build();
        BigQueryTable bigQueryTable = BigQueryTable.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build();
        PrivacyMetric privacyMetric = PrivacyMetric.newBuilder().setCategoricalStatsConfig(categoricalStatsConfig).build();
        ProjectTopicName topicName = ProjectTopicName.of(projectId, topicId);
        PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName.toString()).build();
        // Create action to publish job status notifications over Google Cloud Pub/Sub
        Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
        RiskAnalysisJobConfig riskAnalysisJobConfig = RiskAnalysisJobConfig.newBuilder().setSourceTable(bigQueryTable).setPrivacyMetric(privacyMetric).addActions(action).build();
        CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setRiskJob(riskAnalysisJobConfig).build();
        DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
        String dlpJobName = dlpJob.getName();
        final SettableApiFuture<Boolean> done = SettableApiFuture.create();
        // Set up a Pub/Sub subscriber to listen on the job completion status
        Subscriber subscriber = Subscriber.newBuilder(ProjectSubscriptionName.newBuilder().setProject(projectId).setSubscription(subscriptionId).build(), (pubsubMessage, ackReplyConsumer) -> {
            if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) {
                // notify job completion
                done.set(true);
                ackReplyConsumer.ack();
            }
        }).build();
        subscriber.startAsync();
        // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
        try {
            done.get(1, TimeUnit.MINUTES);
            // Wait for the job to become available
            Thread.sleep(500);
        } catch (TimeoutException e) {
            System.out.println("Unable to verify job completion.");
        }
        // Retrieve completed job status
        DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build());
        System.out.println("Job status: " + completedJob.getState());
        AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails();
        AnalyzeDataSourceRiskDetails.CategoricalStatsResult result = riskDetails.getCategoricalStatsResult();
        for (CategoricalStatsHistogramBucket bucket : result.getValueFrequencyHistogramBucketsList()) {
            System.out.printf("Most common value occurs %d time(s).\n", bucket.getValueFrequencyUpperBound());
            System.out.printf("Least common value occurs %d time(s).\n", bucket.getValueFrequencyLowerBound());
            for (ValueFrequency valueFrequency : bucket.getBucketValuesList()) {
                System.out.printf("Value %s occurs %d time(s).\n", valueFrequency.getValue().toString(), valueFrequency.getCount());
            }
        }
    } catch (Exception e) {
        System.out.println("Error in categoricalStatsAnalysis: " + e.getMessage());
    }
}
Also used : Arrays(java.util.Arrays) TimeoutException(java.util.concurrent.TimeoutException) Subscriber(com.google.cloud.pubsub.v1.Subscriber) DefaultParser(org.apache.commons.cli.DefaultParser) KMapEstimationHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationHistogramBucket) LDiversityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityEquivalenceClass) ValueFrequency(com.google.privacy.dlp.v2.ValueFrequency) LDiversityConfig(com.google.privacy.dlp.v2.PrivacyMetric.LDiversityConfig) NumericalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.NumericalStatsConfig) Action(com.google.privacy.dlp.v2.Action) KMapEstimationConfig(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig) CategoricalStatsHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.CategoricalStatsResult.CategoricalStatsHistogramBucket) KAnonymityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityEquivalenceClass) Value(com.google.privacy.dlp.v2.Value) TaggedField(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig.TaggedField) RiskAnalysisJobConfig(com.google.privacy.dlp.v2.RiskAnalysisJobConfig) Collectors(java.util.stream.Collectors) SettableApiFuture(com.google.api.core.SettableApiFuture) List(java.util.List) KAnonymityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult) ParseException(org.apache.commons.cli.ParseException) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) ProjectSubscriptionName(com.google.pubsub.v1.ProjectSubscriptionName) AnalyzeDataSourceRiskDetails(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails) Options(org.apache.commons.cli.Options) HelpFormatter(org.apache.commons.cli.HelpFormatter) CategoricalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.CategoricalStatsConfig) ArrayList(java.util.ArrayList) ServiceOptions(com.google.cloud.ServiceOptions) CommandLine(org.apache.commons.cli.CommandLine) FieldId(com.google.privacy.dlp.v2.FieldId) ProjectTopicName(com.google.pubsub.v1.ProjectTopicName) Option(org.apache.commons.cli.Option) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) Iterator(java.util.Iterator) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) CommandLineParser(org.apache.commons.cli.CommandLineParser) KMapEstimationResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult) KMapEstimationQuasiIdValues(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationQuasiIdValues) InfoType(com.google.privacy.dlp.v2.InfoType) LDiversityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult) KAnonymityConfig(com.google.privacy.dlp.v2.PrivacyMetric.KAnonymityConfig) TimeUnit(java.util.concurrent.TimeUnit) PublishToPubSub(com.google.privacy.dlp.v2.Action.PublishToPubSub) ProjectName(com.google.privacy.dlp.v2.ProjectName) GetDlpJobRequest(com.google.privacy.dlp.v2.GetDlpJobRequest) LDiversityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityHistogramBucket) KAnonymityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityHistogramBucket) PrivacyMetric(com.google.privacy.dlp.v2.PrivacyMetric) OptionGroup(org.apache.commons.cli.OptionGroup) DlpJob(com.google.privacy.dlp.v2.DlpJob) Collections(java.util.Collections) CategoricalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.CategoricalStatsConfig) Action(com.google.privacy.dlp.v2.Action) CategoricalStatsHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.CategoricalStatsResult.CategoricalStatsHistogramBucket) RiskAnalysisJobConfig(com.google.privacy.dlp.v2.RiskAnalysisJobConfig) PrivacyMetric(com.google.privacy.dlp.v2.PrivacyMetric) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) TimeoutException(java.util.concurrent.TimeoutException) ParseException(org.apache.commons.cli.ParseException) PublishToPubSub(com.google.privacy.dlp.v2.Action.PublishToPubSub) Subscriber(com.google.cloud.pubsub.v1.Subscriber) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) ValueFrequency(com.google.privacy.dlp.v2.ValueFrequency) FieldId(com.google.privacy.dlp.v2.FieldId) DlpJob(com.google.privacy.dlp.v2.DlpJob) ProjectTopicName(com.google.pubsub.v1.ProjectTopicName) AnalyzeDataSourceRiskDetails(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails) TimeoutException(java.util.concurrent.TimeoutException)

Example 10 with Subscriber

use of com.google.cloud.pubsub.v1.Subscriber in project java-docs-samples by GoogleCloudPlatform.

the class RiskAnalysis method calculateLDiversity.

// [END dlp_k_anonymity]
// [START dlp_l_diversity]
/**
 * Calculate l-diversity for an attribute relative to quasi-identifiers in a BigQuery table.
 *
 * @param projectId The Google Cloud Platform project ID to run the API call under.
 * @param datasetId The BigQuery dataset to analyze.
 * @param tableId The BigQuery table to analyze.
 * @param sensitiveAttribute The name of the attribute to compare the quasi-ID against
 * @param quasiIds A set of column names that form a composite key ('quasi-identifiers').
 * @param topicId The name of the Pub/Sub topic to notify once the job completes
 * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job
 *     completion status.
 */
private static void calculateLDiversity(String projectId, String datasetId, String tableId, String sensitiveAttribute, List<String> quasiIds, String topicId, String subscriptionId) throws Exception {
    // Instantiates a client
    try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
        FieldId sensitiveAttributeField = FieldId.newBuilder().setName(sensitiveAttribute).build();
        List<FieldId> quasiIdFields = quasiIds.stream().map(columnName -> FieldId.newBuilder().setName(columnName).build()).collect(Collectors.toList());
        LDiversityConfig ldiversityConfig = LDiversityConfig.newBuilder().addAllQuasiIds(quasiIdFields).setSensitiveAttribute(sensitiveAttributeField).build();
        BigQueryTable bigQueryTable = BigQueryTable.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build();
        PrivacyMetric privacyMetric = PrivacyMetric.newBuilder().setLDiversityConfig(ldiversityConfig).build();
        String topicName = String.format("projects/%s/topics/%s", projectId, topicId);
        PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build();
        // Create action to publish job status notifications over Google Cloud Pub/Sub
        Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
        RiskAnalysisJobConfig riskAnalysisJobConfig = RiskAnalysisJobConfig.newBuilder().setSourceTable(bigQueryTable).setPrivacyMetric(privacyMetric).addActions(action).build();
        CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setRiskJob(riskAnalysisJobConfig).build();
        DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
        String dlpJobName = dlpJob.getName();
        final SettableApiFuture<Boolean> done = SettableApiFuture.create();
        // Set up a Pub/Sub subscriber to listen on the job completion status
        Subscriber subscriber = Subscriber.newBuilder(ProjectSubscriptionName.newBuilder().setProject(projectId).setSubscription(subscriptionId).build(), (pubsubMessage, ackReplyConsumer) -> {
            if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) {
                // notify job completion
                done.set(true);
                ackReplyConsumer.ack();
            }
        }).build();
        subscriber.startAsync();
        // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
        try {
            done.get(1, TimeUnit.MINUTES);
            // Wait for the job to become available
            Thread.sleep(500);
        } catch (TimeoutException e) {
            System.out.println("Unable to verify job completion.");
        }
        // retrieve completed job status
        DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build());
        System.out.println("Job status: " + completedJob.getState());
        AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails();
        LDiversityResult ldiversityResult = riskDetails.getLDiversityResult();
        for (LDiversityHistogramBucket result : ldiversityResult.getSensitiveValueFrequencyHistogramBucketsList()) {
            for (LDiversityEquivalenceClass bucket : result.getBucketValuesList()) {
                List<String> quasiIdValues = bucket.getQuasiIdsValuesList().stream().map(Value::toString).collect(Collectors.toList());
                System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues));
                System.out.println("\tClass size: " + bucket.getEquivalenceClassSize());
                for (ValueFrequency valueFrequency : bucket.getTopSensitiveValuesList()) {
                    System.out.printf("\t\tSensitive value %s occurs %d time(s).\n", valueFrequency.getValue().toString(), valueFrequency.getCount());
                }
            }
        }
    } catch (Exception e) {
        System.out.println("Error in calculateLDiversity: " + e.getMessage());
    }
}
Also used : Arrays(java.util.Arrays) TimeoutException(java.util.concurrent.TimeoutException) Subscriber(com.google.cloud.pubsub.v1.Subscriber) DefaultParser(org.apache.commons.cli.DefaultParser) KMapEstimationHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationHistogramBucket) LDiversityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityEquivalenceClass) ValueFrequency(com.google.privacy.dlp.v2.ValueFrequency) LDiversityConfig(com.google.privacy.dlp.v2.PrivacyMetric.LDiversityConfig) NumericalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.NumericalStatsConfig) Action(com.google.privacy.dlp.v2.Action) KMapEstimationConfig(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig) CategoricalStatsHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.CategoricalStatsResult.CategoricalStatsHistogramBucket) KAnonymityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityEquivalenceClass) Value(com.google.privacy.dlp.v2.Value) TaggedField(com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig.TaggedField) RiskAnalysisJobConfig(com.google.privacy.dlp.v2.RiskAnalysisJobConfig) Collectors(java.util.stream.Collectors) SettableApiFuture(com.google.api.core.SettableApiFuture) List(java.util.List) KAnonymityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult) ParseException(org.apache.commons.cli.ParseException) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) ProjectSubscriptionName(com.google.pubsub.v1.ProjectSubscriptionName) AnalyzeDataSourceRiskDetails(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails) Options(org.apache.commons.cli.Options) HelpFormatter(org.apache.commons.cli.HelpFormatter) CategoricalStatsConfig(com.google.privacy.dlp.v2.PrivacyMetric.CategoricalStatsConfig) ArrayList(java.util.ArrayList) ServiceOptions(com.google.cloud.ServiceOptions) CommandLine(org.apache.commons.cli.CommandLine) FieldId(com.google.privacy.dlp.v2.FieldId) ProjectTopicName(com.google.pubsub.v1.ProjectTopicName) Option(org.apache.commons.cli.Option) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) Iterator(java.util.Iterator) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) CommandLineParser(org.apache.commons.cli.CommandLineParser) KMapEstimationResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult) KMapEstimationQuasiIdValues(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationQuasiIdValues) InfoType(com.google.privacy.dlp.v2.InfoType) LDiversityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult) KAnonymityConfig(com.google.privacy.dlp.v2.PrivacyMetric.KAnonymityConfig) TimeUnit(java.util.concurrent.TimeUnit) PublishToPubSub(com.google.privacy.dlp.v2.Action.PublishToPubSub) ProjectName(com.google.privacy.dlp.v2.ProjectName) GetDlpJobRequest(com.google.privacy.dlp.v2.GetDlpJobRequest) LDiversityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityHistogramBucket) KAnonymityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityHistogramBucket) PrivacyMetric(com.google.privacy.dlp.v2.PrivacyMetric) OptionGroup(org.apache.commons.cli.OptionGroup) DlpJob(com.google.privacy.dlp.v2.DlpJob) Collections(java.util.Collections) LDiversityHistogramBucket(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityHistogramBucket) LDiversityConfig(com.google.privacy.dlp.v2.PrivacyMetric.LDiversityConfig) Action(com.google.privacy.dlp.v2.Action) RiskAnalysisJobConfig(com.google.privacy.dlp.v2.RiskAnalysisJobConfig) PrivacyMetric(com.google.privacy.dlp.v2.PrivacyMetric) CreateDlpJobRequest(com.google.privacy.dlp.v2.CreateDlpJobRequest) TimeoutException(java.util.concurrent.TimeoutException) ParseException(org.apache.commons.cli.ParseException) PublishToPubSub(com.google.privacy.dlp.v2.Action.PublishToPubSub) Subscriber(com.google.cloud.pubsub.v1.Subscriber) DlpServiceClient(com.google.cloud.dlp.v2.DlpServiceClient) BigQueryTable(com.google.privacy.dlp.v2.BigQueryTable) ValueFrequency(com.google.privacy.dlp.v2.ValueFrequency) FieldId(com.google.privacy.dlp.v2.FieldId) LDiversityResult(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult) DlpJob(com.google.privacy.dlp.v2.DlpJob) AnalyzeDataSourceRiskDetails(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails) LDiversityEquivalenceClass(com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityEquivalenceClass) TimeoutException(java.util.concurrent.TimeoutException)

Aggregations

Subscriber (com.google.cloud.pubsub.v1.Subscriber)15 ProjectSubscriptionName (com.google.pubsub.v1.ProjectSubscriptionName)11 SettableApiFuture (com.google.api.core.SettableApiFuture)8 ServiceOptions (com.google.cloud.ServiceOptions)8 DlpServiceClient (com.google.cloud.dlp.v2.DlpServiceClient)8 Action (com.google.privacy.dlp.v2.Action)8 BigQueryTable (com.google.privacy.dlp.v2.BigQueryTable)8 CreateDlpJobRequest (com.google.privacy.dlp.v2.CreateDlpJobRequest)8 DlpJob (com.google.privacy.dlp.v2.DlpJob)8 GetDlpJobRequest (com.google.privacy.dlp.v2.GetDlpJobRequest)8 InfoType (com.google.privacy.dlp.v2.InfoType)8 ProjectName (com.google.privacy.dlp.v2.ProjectName)8 ProjectTopicName (com.google.pubsub.v1.ProjectTopicName)8 ArrayList (java.util.ArrayList)8 Collections (java.util.Collections)8 List (java.util.List)8 TimeUnit (java.util.concurrent.TimeUnit)8 CommandLine (org.apache.commons.cli.CommandLine)8 CommandLineParser (org.apache.commons.cli.CommandLineParser)8 DefaultParser (org.apache.commons.cli.DefaultParser)8