use of com.google.privacy.dlp.v2.PrivacyMetric.KAnonymityConfig in project java-docs-samples by GoogleCloudPlatform.
the class RiskAnalysis method calculateKAnonymity.
// [END dlp_categorical_stats]
// [START dlp_k_anonymity]
/**
* Calculate k-anonymity for quasi-identifiers in a BigQuery table using the DLP API.
*
* @param projectId The Google Cloud Platform project ID to run the API call under.
* @param datasetId The BigQuery dataset to analyze.
* @param tableId The BigQuery table to analyze.
* @param quasiIds The names of columns that form a composite key ('quasi-identifiers').
* @param topicId The name of the Pub/Sub topic to notify once the job completes
* @param subscriptionId The name of the Pub/Sub subscription to use when listening for job
* completion status.
*/
private static void calculateKAnonymity(String projectId, String datasetId, String tableId, List<String> quasiIds, String topicId, String subscriptionId) throws Exception {
// Instantiates a client
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
List<FieldId> quasiIdFields = quasiIds.stream().map(columnName -> FieldId.newBuilder().setName(columnName).build()).collect(Collectors.toList());
KAnonymityConfig kanonymityConfig = KAnonymityConfig.newBuilder().addAllQuasiIds(quasiIdFields).build();
BigQueryTable bigQueryTable = BigQueryTable.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build();
PrivacyMetric privacyMetric = PrivacyMetric.newBuilder().setKAnonymityConfig(kanonymityConfig).build();
String topicName = String.format("projects/%s/topics/%s", projectId, topicId);
PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build();
// Create action to publish job status notifications over Google Cloud Pub/Sub
Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
RiskAnalysisJobConfig riskAnalysisJobConfig = RiskAnalysisJobConfig.newBuilder().setSourceTable(bigQueryTable).setPrivacyMetric(privacyMetric).addActions(action).build();
CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setRiskJob(riskAnalysisJobConfig).build();
DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
String dlpJobName = dlpJob.getName();
final SettableApiFuture<Boolean> done = SettableApiFuture.create();
// Set up a Pub/Sub subscriber to listen on the job completion status
Subscriber subscriber = Subscriber.newBuilder(ProjectSubscriptionName.newBuilder().setProject(projectId).setSubscription(subscriptionId).build(), (pubsubMessage, ackReplyConsumer) -> {
if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) {
// notify job completion
done.set(true);
ackReplyConsumer.ack();
}
}).build();
subscriber.startAsync();
// For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
try {
done.get(1, TimeUnit.MINUTES);
// Wait for the job to become available
Thread.sleep(500);
} catch (TimeoutException e) {
System.out.println("Unable to verify job completion.");
}
// Retrieve completed job status
DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build());
System.out.println("Job status: " + completedJob.getState());
AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails();
KAnonymityResult kanonymityResult = riskDetails.getKAnonymityResult();
for (KAnonymityHistogramBucket result : kanonymityResult.getEquivalenceClassHistogramBucketsList()) {
System.out.printf("Bucket size range: [%d, %d]\n", result.getEquivalenceClassSizeLowerBound(), result.getEquivalenceClassSizeUpperBound());
for (KAnonymityEquivalenceClass bucket : result.getBucketValuesList()) {
List<String> quasiIdValues = bucket.getQuasiIdsValuesList().stream().map(v -> v.toString()).collect(Collectors.toList());
System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues));
System.out.println("\tClass size: " + bucket.getEquivalenceClassSize());
}
}
} catch (Exception e) {
System.out.println("Error in calculateKAnonymity: " + e.getMessage());
}
}
Aggregations