use of com.google.privacy.dlp.v2.DlpJobName in project java-docs-samples by GoogleCloudPlatform.
the class Inspect method inspectDatastore.
// [END dlp_inspect_gcs]
// [START dlp_inspect_datastore]
/**
* Inspect a Datastore kind
*
* @param projectId The project ID containing the target Datastore
* @param namespaceId The ID namespace of the Datastore document to inspect
* @param kind The kind of the Datastore entity to inspect
* @param minLikelihood The minimum likelihood required before returning a match
* @param infoTypes The infoTypes of information to match
* @param maxFindings max number of findings
* @param topicId Google Cloud Pub/Sub topic to notify job status updates
* @param subscriptionId Google Cloud Pub/Sub subscription to above topic to receive status
* updates
*/
private static void inspectDatastore(String projectId, String namespaceId, String kind, Likelihood minLikelihood, List<InfoType> infoTypes, int maxFindings, String topicId, String subscriptionId) {
// Instantiates a client
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
// Reference to the Datastore namespace
PartitionId partitionId = PartitionId.newBuilder().setProjectId(projectId).setNamespaceId(namespaceId).build();
// Reference to the Datastore kind
KindExpression kindExpression = KindExpression.newBuilder().setName(kind).build();
DatastoreOptions datastoreOptions = DatastoreOptions.newBuilder().setKind(kindExpression).setPartitionId(partitionId).build();
// Construct Datastore configuration to be inspected
StorageConfig storageConfig = StorageConfig.newBuilder().setDatastoreOptions(datastoreOptions).build();
FindingLimits findingLimits = FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build();
InspectConfig inspectConfig = InspectConfig.newBuilder().addAllInfoTypes(infoTypes).setMinLikelihood(minLikelihood).setLimits(findingLimits).build();
String pubSubTopic = String.format("projects/%s/topics/%s", projectId, topicId);
Action.PublishToPubSub publishToPubSub = Action.PublishToPubSub.newBuilder().setTopic(pubSubTopic).build();
Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
InspectJobConfig inspectJobConfig = InspectJobConfig.newBuilder().setStorageConfig(storageConfig).setInspectConfig(inspectConfig).addActions(action).build();
// Asynchronously submit an inspect job, and wait on results
CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setInspectJob(inspectJobConfig).build();
DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
System.out.println("Job created with ID:" + dlpJob.getName());
final SettableApiFuture<Boolean> done = SettableApiFuture.create();
// Set up a Pub/Sub subscriber to listen on the job completion status
Subscriber subscriber = Subscriber.newBuilder(ProjectSubscriptionName.of(projectId, subscriptionId), (pubsubMessage, ackReplyConsumer) -> {
if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJob.getName())) {
// notify job completion
done.set(true);
ackReplyConsumer.ack();
}
}).build();
subscriber.startAsync();
// For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
try {
done.get(1, TimeUnit.MINUTES);
// Wait for the job to become available
Thread.sleep(500);
} catch (Exception e) {
System.out.println("Unable to verify job completion.");
}
DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build());
System.out.println("Job status: " + completedJob.getState());
InspectDataSourceDetails inspectDataSourceDetails = completedJob.getInspectDetails();
InspectDataSourceDetails.Result result = inspectDataSourceDetails.getResult();
if (result.getInfoTypeStatsCount() > 0) {
System.out.println("Findings: ");
for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) {
System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName());
System.out.println("\tCount: " + infoTypeStat.getCount());
}
} else {
System.out.println("No findings.");
}
} catch (Exception e) {
System.out.println("inspectDatastore Problems: " + e.getMessage());
}
}
use of com.google.privacy.dlp.v2.DlpJobName in project java-docs-samples by GoogleCloudPlatform.
the class Inspect method inspectBigquery.
// [END dlp_inspect_datastore]
// [START dlp_inspect_bigquery]
/**
* Inspect a BigQuery table
*
* @param projectId The project ID to run the API call under
* @param datasetId The ID of the dataset to inspect, e.g. 'my_dataset'
* @param tableId The ID of the table to inspect, e.g. 'my_table'
* @param minLikelihood The minimum likelihood required before returning a match
* @param infoTypes The infoTypes of information to match
* @param maxFindings The maximum number of findings to report (0 = server maximum)
* @param topicId Topic ID for pubsub.
* @param subscriptionId Subscription ID for pubsub.
*/
private static void inspectBigquery(String projectId, String datasetId, String tableId, Likelihood minLikelihood, List<InfoType> infoTypes, int maxFindings, String topicId, String subscriptionId) {
// Instantiates a client
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
// Reference to the BigQuery table
BigQueryTable tableReference = BigQueryTable.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build();
BigQueryOptions bigQueryOptions = BigQueryOptions.newBuilder().setTableReference(tableReference).build();
// Construct BigQuery configuration to be inspected
StorageConfig storageConfig = StorageConfig.newBuilder().setBigQueryOptions(bigQueryOptions).build();
FindingLimits findingLimits = FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build();
InspectConfig inspectConfig = InspectConfig.newBuilder().addAllInfoTypes(infoTypes).setMinLikelihood(minLikelihood).setLimits(findingLimits).build();
ProjectTopicName topic = ProjectTopicName.of(projectId, topicId);
Action.PublishToPubSub publishToPubSub = Action.PublishToPubSub.newBuilder().setTopic(topic.toString()).build();
Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
InspectJobConfig inspectJobConfig = InspectJobConfig.newBuilder().setStorageConfig(storageConfig).setInspectConfig(inspectConfig).addActions(action).build();
// Asynchronously submit an inspect job, and wait on results
CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setInspectJob(inspectJobConfig).build();
DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
System.out.println("Job created with ID:" + dlpJob.getName());
// Wait for job completion semi-synchronously
// For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
final SettableApiFuture<Boolean> done = SettableApiFuture.create();
// Set up a Pub/Sub subscriber to listen on the job completion status
Subscriber subscriber = Subscriber.newBuilder(ProjectSubscriptionName.of(projectId, subscriptionId), (pubsubMessage, ackReplyConsumer) -> {
if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJob.getName())) {
// notify job completion
done.set(true);
ackReplyConsumer.ack();
}
}).build();
subscriber.startAsync();
try {
done.get(1, TimeUnit.MINUTES);
// Wait for the job to become available
Thread.sleep(500);
} catch (Exception e) {
System.out.println("Unable to verify job completion.");
}
DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build());
System.out.println("Job status: " + completedJob.getState());
InspectDataSourceDetails inspectDataSourceDetails = completedJob.getInspectDetails();
InspectDataSourceDetails.Result result = inspectDataSourceDetails.getResult();
if (result.getInfoTypeStatsCount() > 0) {
System.out.println("Findings: ");
for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) {
System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName());
System.out.println("\tCount: " + infoTypeStat.getCount());
}
} else {
System.out.println("No findings.");
}
} catch (Exception e) {
System.out.println("inspectBigquery Problems: " + e.getMessage());
}
}
use of com.google.privacy.dlp.v2.DlpJobName in project java-docs-samples by GoogleCloudPlatform.
the class RiskAnalysis method categoricalStatsAnalysis.
// [END dlp_numerical_stats]
// [START dlp_categorical_stats]
/**
* Calculate categorical statistics for a column in a BigQuery table using the DLP API.
*
* @param projectId The Google Cloud Platform project ID to run the API call under.
* @param datasetId The BigQuery dataset to analyze.
* @param tableId The BigQuery table to analyze.
* @param columnName The name of the column to analyze, which need not contain numerical data.
* @param topicId The name of the Pub/Sub topic to notify once the job completes
* @param subscriptionId The name of the Pub/Sub subscription to use when listening for job
* completion status.
*/
private static void categoricalStatsAnalysis(String projectId, String datasetId, String tableId, String columnName, String topicId, String subscriptionId) {
// Instantiates a client
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
FieldId fieldId = FieldId.newBuilder().setName(columnName).build();
CategoricalStatsConfig categoricalStatsConfig = CategoricalStatsConfig.newBuilder().setField(fieldId).build();
BigQueryTable bigQueryTable = BigQueryTable.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build();
PrivacyMetric privacyMetric = PrivacyMetric.newBuilder().setCategoricalStatsConfig(categoricalStatsConfig).build();
ProjectTopicName topicName = ProjectTopicName.of(projectId, topicId);
PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName.toString()).build();
// Create action to publish job status notifications over Google Cloud Pub/Sub
Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
RiskAnalysisJobConfig riskAnalysisJobConfig = RiskAnalysisJobConfig.newBuilder().setSourceTable(bigQueryTable).setPrivacyMetric(privacyMetric).addActions(action).build();
CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setRiskJob(riskAnalysisJobConfig).build();
DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
String dlpJobName = dlpJob.getName();
final SettableApiFuture<Boolean> done = SettableApiFuture.create();
// Set up a Pub/Sub subscriber to listen on the job completion status
Subscriber subscriber = Subscriber.newBuilder(ProjectSubscriptionName.newBuilder().setProject(projectId).setSubscription(subscriptionId).build(), (pubsubMessage, ackReplyConsumer) -> {
if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) {
// notify job completion
done.set(true);
ackReplyConsumer.ack();
}
}).build();
subscriber.startAsync();
// For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
try {
done.get(1, TimeUnit.MINUTES);
// Wait for the job to become available
Thread.sleep(500);
} catch (TimeoutException e) {
System.out.println("Unable to verify job completion.");
}
// Retrieve completed job status
DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build());
System.out.println("Job status: " + completedJob.getState());
AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails();
AnalyzeDataSourceRiskDetails.CategoricalStatsResult result = riskDetails.getCategoricalStatsResult();
for (CategoricalStatsHistogramBucket bucket : result.getValueFrequencyHistogramBucketsList()) {
System.out.printf("Most common value occurs %d time(s).\n", bucket.getValueFrequencyUpperBound());
System.out.printf("Least common value occurs %d time(s).\n", bucket.getValueFrequencyLowerBound());
for (ValueFrequency valueFrequency : bucket.getBucketValuesList()) {
System.out.printf("Value %s occurs %d time(s).\n", valueFrequency.getValue().toString(), valueFrequency.getCount());
}
}
} catch (Exception e) {
System.out.println("Error in categoricalStatsAnalysis: " + e.getMessage());
}
}
use of com.google.privacy.dlp.v2.DlpJobName in project java-docs-samples by GoogleCloudPlatform.
the class RiskAnalysis method calculateLDiversity.
// [END dlp_k_anonymity]
// [START dlp_l_diversity]
/**
* Calculate l-diversity for an attribute relative to quasi-identifiers in a BigQuery table.
*
* @param projectId The Google Cloud Platform project ID to run the API call under.
* @param datasetId The BigQuery dataset to analyze.
* @param tableId The BigQuery table to analyze.
* @param sensitiveAttribute The name of the attribute to compare the quasi-ID against
* @param quasiIds A set of column names that form a composite key ('quasi-identifiers').
* @param topicId The name of the Pub/Sub topic to notify once the job completes
* @param subscriptionId The name of the Pub/Sub subscription to use when listening for job
* completion status.
*/
private static void calculateLDiversity(String projectId, String datasetId, String tableId, String sensitiveAttribute, List<String> quasiIds, String topicId, String subscriptionId) throws Exception {
// Instantiates a client
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
FieldId sensitiveAttributeField = FieldId.newBuilder().setName(sensitiveAttribute).build();
List<FieldId> quasiIdFields = quasiIds.stream().map(columnName -> FieldId.newBuilder().setName(columnName).build()).collect(Collectors.toList());
LDiversityConfig ldiversityConfig = LDiversityConfig.newBuilder().addAllQuasiIds(quasiIdFields).setSensitiveAttribute(sensitiveAttributeField).build();
BigQueryTable bigQueryTable = BigQueryTable.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build();
PrivacyMetric privacyMetric = PrivacyMetric.newBuilder().setLDiversityConfig(ldiversityConfig).build();
String topicName = String.format("projects/%s/topics/%s", projectId, topicId);
PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build();
// Create action to publish job status notifications over Google Cloud Pub/Sub
Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
RiskAnalysisJobConfig riskAnalysisJobConfig = RiskAnalysisJobConfig.newBuilder().setSourceTable(bigQueryTable).setPrivacyMetric(privacyMetric).addActions(action).build();
CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setRiskJob(riskAnalysisJobConfig).build();
DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
String dlpJobName = dlpJob.getName();
final SettableApiFuture<Boolean> done = SettableApiFuture.create();
// Set up a Pub/Sub subscriber to listen on the job completion status
Subscriber subscriber = Subscriber.newBuilder(ProjectSubscriptionName.newBuilder().setProject(projectId).setSubscription(subscriptionId).build(), (pubsubMessage, ackReplyConsumer) -> {
if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) {
// notify job completion
done.set(true);
ackReplyConsumer.ack();
}
}).build();
subscriber.startAsync();
// For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
try {
done.get(1, TimeUnit.MINUTES);
// Wait for the job to become available
Thread.sleep(500);
} catch (TimeoutException e) {
System.out.println("Unable to verify job completion.");
}
// retrieve completed job status
DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build());
System.out.println("Job status: " + completedJob.getState());
AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails();
LDiversityResult ldiversityResult = riskDetails.getLDiversityResult();
for (LDiversityHistogramBucket result : ldiversityResult.getSensitiveValueFrequencyHistogramBucketsList()) {
for (LDiversityEquivalenceClass bucket : result.getBucketValuesList()) {
List<String> quasiIdValues = bucket.getQuasiIdsValuesList().stream().map(Value::toString).collect(Collectors.toList());
System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues));
System.out.println("\tClass size: " + bucket.getEquivalenceClassSize());
for (ValueFrequency valueFrequency : bucket.getTopSensitiveValuesList()) {
System.out.printf("\t\tSensitive value %s occurs %d time(s).\n", valueFrequency.getValue().toString(), valueFrequency.getCount());
}
}
}
} catch (Exception e) {
System.out.println("Error in calculateLDiversity: " + e.getMessage());
}
}
use of com.google.privacy.dlp.v2.DlpJobName in project java-docs-samples by GoogleCloudPlatform.
the class RiskAnalysis method calculateKMap.
// [END dlp_l_diversity]
// [START dlp_k_map]
/**
* Calculate k-map risk estimation for an attribute relative to quasi-identifiers in a BigQuery
* table.
*
* @param projectId The Google Cloud Platform project ID to run the API call under.
* @param datasetId The BigQuery dataset to analyze.
* @param tableId The BigQuery table to analyze.
* @param quasiIds A set of column names that form a composite key ('quasi-identifiers').
* @param infoTypes The infoTypes corresponding to each quasi-id column
* @param regionCode An ISO-3166-1 region code specifying the k-map distribution region
* @param topicId The name of the Pub/Sub topic to notify once the job completes
* @param subscriptionId The name of the Pub/Sub subscription to use when listening for job
* completion status.
*/
private static void calculateKMap(String projectId, String datasetId, String tableId, List<String> quasiIds, List<InfoType> infoTypes, String regionCode, String topicId, String subscriptionId) throws Exception {
// Instantiates a client
try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) {
Iterator<String> quasiIdsIterator = quasiIds.iterator();
Iterator<InfoType> infoTypesIterator = infoTypes.iterator();
if (quasiIds.size() != infoTypes.size()) {
throw new IllegalArgumentException("The numbers of quasi-IDs and infoTypes must be equal!");
}
ArrayList<TaggedField> taggedFields = new ArrayList();
while (quasiIdsIterator.hasNext() || infoTypesIterator.hasNext()) {
taggedFields.add(TaggedField.newBuilder().setField(FieldId.newBuilder().setName(quasiIdsIterator.next()).build()).setInfoType(infoTypesIterator.next()).build());
}
KMapEstimationConfig kmapConfig = KMapEstimationConfig.newBuilder().addAllQuasiIds(taggedFields).setRegionCode(regionCode).build();
BigQueryTable bigQueryTable = BigQueryTable.newBuilder().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId).build();
PrivacyMetric privacyMetric = PrivacyMetric.newBuilder().setKMapEstimationConfig(kmapConfig).build();
String topicName = String.format("projects/%s/topics/%s", projectId, topicId);
PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build();
// Create action to publish job status notifications over Google Cloud Pub/Sub
Action action = Action.newBuilder().setPubSub(publishToPubSub).build();
RiskAnalysisJobConfig riskAnalysisJobConfig = RiskAnalysisJobConfig.newBuilder().setSourceTable(bigQueryTable).setPrivacyMetric(privacyMetric).addActions(action).build();
CreateDlpJobRequest createDlpJobRequest = CreateDlpJobRequest.newBuilder().setParent(ProjectName.of(projectId).toString()).setRiskJob(riskAnalysisJobConfig).build();
DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest);
String dlpJobName = dlpJob.getName();
final SettableApiFuture<Boolean> done = SettableApiFuture.create();
// Set up a Pub/Sub subscriber to listen on the job completion status
Subscriber subscriber = Subscriber.newBuilder(ProjectSubscriptionName.newBuilder().setProject(projectId).setSubscription(subscriptionId).build(), (pubsubMessage, ackReplyConsumer) -> {
if (pubsubMessage.getAttributesCount() > 0 && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) {
// notify job completion
done.set(true);
ackReplyConsumer.ack();
}
}).build();
subscriber.startAsync();
// For long jobs, consider using a truly asynchronous execution model such as Cloud Functions
try {
done.get(1, TimeUnit.MINUTES);
// Wait for the job to become available
Thread.sleep(500);
} catch (TimeoutException e) {
System.out.println("Unable to verify job completion.");
}
// retrieve completed job status
DlpJob completedJob = dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build());
System.out.println("Job status: " + completedJob.getState());
AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails();
KMapEstimationResult kmapResult = riskDetails.getKMapEstimationResult();
for (KMapEstimationHistogramBucket result : kmapResult.getKMapEstimationHistogramList()) {
System.out.printf("\tAnonymity range: [%d, %d]\n", result.getMinAnonymity(), result.getMaxAnonymity());
System.out.printf("\tSize: %d\n", result.getBucketSize());
for (KMapEstimationQuasiIdValues valueBucket : result.getBucketValuesList()) {
String quasiIdValues = valueBucket.getQuasiIdsValuesList().stream().map(v -> {
String s = v.toString();
return s.substring(s.indexOf(':') + 1).trim();
}).collect(Collectors.joining(", "));
System.out.printf("\tValues: {%s}\n", quasiIdValues);
System.out.printf("\tEstimated k-map anonymity: %d\n", valueBucket.getEstimatedAnonymity());
}
}
} catch (Exception e) {
System.out.println("Error in calculateKMap: " + e.getMessage());
}
}
Aggregations