use of in project bq-pii-classifier by GoogleCloudPlatform.
the class AutoDlpResultsScannerImpl method listParents.
public // return: List("project.dataset")
List<String> listParents(String project) throws NonRetryableApplicationException, InterruptedException {
String queryTemplate = "SELECT DISTINCT " + "CONCAT(column_profile.dataset_project_id, '.', column_profile.dataset_id) AS dataset " + "FROM %s.%s.%s r " + "INNER JOIN %s.INFORMATION_SCHEMA.SCHEMATA s ON s.schema_name = r.column_profile.dataset_id " + "WHERE r.column_profile.dataset_project_id = '%s'";
String formattedQuery = String.format(queryTemplate, hostProject, hostDataset, dlpFindingsTable, project, project);
// Create a job ID so that we can safely retry.
Job queryJob = bqService.submitJob(formattedQuery);
TableResult result = bqService.waitAndGetJobResults(queryJob);
List<String> projectDatasets = new ArrayList<>();
// Construct a mapping between field names and DLP infotypes
for (FieldValueList row : result.iterateAll()) {
if (row.get("dataset").isNull()) {
throw new NonRetryableApplicationException("processProjects query returned rows with null 'dataset' field.");
String datasetSpec = row.get("dataset").getStringValue();
return projectDatasets;
use of in project bq-pii-classifier by GoogleCloudPlatform.
the class AutoDlpResultsScannerImpl method listChildren.
public // List all dlp job IDs for tables in a dataset/project that have DLP findings within the latest inspection run
List<String> listChildren(String project, String dataset) throws InterruptedException, NonRetryableApplicationException {
String queryTemplate = "SELECT DISTINCT CONCAT(column_profile.dataset_project_id, '.', column_profile.dataset_id, '.', column_profile.table_id) AS table FROM %s.%s.%s WHERE column_profile.dataset_project_id = '%s' AND column_profile.dataset_id = '%s'";
String formattedQuery = String.format(queryTemplate, hostProject, hostDataset, dlpFindingsTable, project, dataset);
// Create a job ID so that we can safely retry.
Job queryJob = bqService.submitJob(formattedQuery);
TableResult result = bqService.waitAndGetJobResults(queryJob);
// Construct a mapping between field names and DLP infotypes
List<String> datasetTables = new ArrayList<>();
for (FieldValueList row : result.iterateAll()) {
if (row.get("table").isNull()) {
throw new NonRetryableApplicationException("processDatasets query returned rows with null 'job_name' field.");
String tableSpec = row.get("table").getStringValue();
return datasetTables;
use of in project bq-pii-classifier by GoogleCloudPlatform.
the class DispatcherTest method testWithInput.
// @Test
// public void testDispatcher_withDatasets () throws IOException {
// String jsonPayLoad = "{\"tablesInclude\":\"\""
// + ",\"tablesExclude\":\"p1.d1.t1\""
// + ",\"datasetsInclude\":\"p1.d1, p1.d2\""
// + ",\"datasetsExclude\":\"\""
// + ",\"projectsInclude\":\"p2\"" // should have no effect
// + "}";
// List<String> expectedOutput = Lists.newArrayList("p1.d1.t2", "p1.d2.t1", "p1.d2.t2");
// List<String> actualOutput = testWithInput(jsonPayLoad);
// assertEquals(expectedOutput, actualOutput);
// }
// @Test
// public void testDispatcher_withProjects () throws IOException {
// String jsonPayLoad = "{\"tablesInclude\":\"\""
// + ",\"tablesExclude\":\"p1.d2.t1\""
// + ",\"datasetsInclude\":\"\""
// + ",\"datasetsExclude\":\"p1.d1\""
// + ",\"projectsInclude\":\"p1, p2\"" // should have no effect
// + "}";
// List<String> expectedOutput = Lists.newArrayList("p1.d2.t2", "p2.d1.t1", "p2.d1.t2");
// List<String> actualOutput = testWithInput(jsonPayLoad);
// assertEquals(expectedOutput, actualOutput);
// }
private List<String> testWithInput(BigQueryScope bigQueryScope) throws IOException, NonRetryableApplicationException, InterruptedException {
// Dispatcher function = new Dispatcher(envMock, bqServiceMock, cloudTasksServiceMock);
PubSubPublishResults results = function.execute(bigQueryScope, "");
PubSubServiceImpl pubSubServiceMock = mock(PubSubServiceImpl.class);
lenient().when(pubSubServiceMock.publishTableOperationRequests(anyString(), anyString(), any())).thenReturn(new PubSubPublishResults(Arrays.asList(new SuccessPubSubMessage(new Operation("p1.d1.t1", "runId", "trackingId"), "publishedMessageId"), new SuccessPubSubMessage(new Operation("p1.d1.t2", "runId", "trackingId"), "publishedMessageId")), Arrays.asList(new FailedPubSubMessage(new Operation("", "", ""), new Exception("test fail message")))));
return results.getSuccessMessages().stream().map(x -> ((Operation) x.getMsg()).getEntityKey()).collect(Collectors.toList());
use of in project bq-pii-classifier by GoogleCloudPlatform.
the class InspectorController method receiveMessage.
@RequestMapping(value = "/", method = RequestMethod.POST)
public ResponseEntity receiveMessage(@RequestBody PubSubEvent requestBody) {
String trackingId = "0000000000000-z";
DlpService dlpService = null;
BigQueryService bqService = null;
try {
if (requestBody == null || requestBody.getMessage() == null) {
String msg = "Bad Request: invalid message format";
logger.logSevereWithTracker(trackingId, msg);
throw new NonRetryableApplicationException("Request body or message is Null.");
String requestJsonString = requestBody.getMessage().dataToUtf8String();
// remove any escape characters (e.g. from Terraform
requestJsonString = requestJsonString.replace("\\", "");
logger.logInfoWithTracker(trackingId, String.format("Received payload: %s", requestJsonString));
Operation operation = gson.fromJson(requestJsonString, Operation.class);
trackingId = operation.getTrackingId();
logger.logInfoWithTracker(trackingId, String.format("Parsed Request: %s", operation.toString()));
dlpService = new DlpServiceImpl();
bqService = new BigQueryServiceImpl();
Inspector inspector = new Inspector(environment.toConfig(), dlpService, bqService, new GCSPersistentSetImpl(environment.getGcsFlagsBucket()), "inspector-flags");
inspector.execute(operation, trackingId, requestBody.getMessage().getMessageId());
return new ResponseEntity("Process completed successfully.", HttpStatus.OK);
} catch (Exception e) {
return ControllerExceptionHelper.handleException(e, logger, trackingId);
} finally {
if (dlpService != null) {
use of in project bq-pii-classifier by GoogleCloudPlatform.
the class StandardDlpResultsScannerImpl method listChildren.
public // We return the jobName and not the table spec because this column is clustered and the Tagger can utilize that for lookups
List<String> listChildren(String project, String dataset) throws InterruptedException, NonRetryableApplicationException {
// dlp job names start with unix timestamp. Max() will get us the latest job
String queryTemplate = "SELECT DISTINCT\n" + "l.record_location.record_key.big_query_key.table_reference.table_id,\n" + "MAX(job_name) AS latest_job_name\n" + "FROM \n" + "`%s.%s.%s`, UNNEST(location.content_locations) l\n" + "WHERE l.record_location.record_key.big_query_key.table_reference.project_id = '%s'\n" + "AND l.record_location.record_key.big_query_key.table_reference.dataset_id = '%s'\n" + "GROUP BY 1\n" + "ORDER BY 1,2 DESC\n";
String formattedQuery = String.format(queryTemplate, hostProject, hostDataset, dlpFindingsTable, project, dataset);
// Create a job ID so that we can safely retry.
Job queryJob = bqService.submitJob(formattedQuery);
TableResult result = bqService.waitAndGetJobResults(queryJob);
// Construct a mapping between field names and DLP infotypes
List<String> datasetTablesDlpJobs = new ArrayList<>();
for (FieldValueList row : result.iterateAll()) {
if (row.get("latest_job_name").isNull()) {
throw new NonRetryableApplicationException("processDatasets query returned rows with null 'latest_job_name' field.");
String jobName = row.get("latest_job_name").getStringValue();
return datasetTablesDlpJobs;