Search in sources :

Example 1 with NonRetryableApplicationException

use of com.google.cloud.pso.bq_pii_classifier.entities.NonRetryableApplicationException in project bq-pii-classifier by GoogleCloudPlatform.

the class AutoDlpResultsScannerImpl method listParents.

@Override
public // return: List("project.dataset")
List<String> listParents(String project) throws NonRetryableApplicationException, InterruptedException {
    String queryTemplate = "SELECT DISTINCT " + "CONCAT(column_profile.dataset_project_id, '.', column_profile.dataset_id) AS dataset " + "FROM %s.%s.%s r " + "INNER JOIN %s.INFORMATION_SCHEMA.SCHEMATA s ON s.schema_name = r.column_profile.dataset_id " + "WHERE r.column_profile.dataset_project_id = '%s'";
    String formattedQuery = String.format(queryTemplate, hostProject, hostDataset, dlpFindingsTable, project, project);
    // Create a job ID so that we can safely retry.
    Job queryJob = bqService.submitJob(formattedQuery);
    TableResult result = bqService.waitAndGetJobResults(queryJob);
    List<String> projectDatasets = new ArrayList<>();
    // Construct a mapping between field names and DLP infotypes
    for (FieldValueList row : result.iterateAll()) {
        if (row.get("dataset").isNull()) {
            throw new NonRetryableApplicationException("processProjects query returned rows with null 'dataset' field.");
        }
        String datasetSpec = row.get("dataset").getStringValue();
        projectDatasets.add(datasetSpec);
    }
    return projectDatasets;
}
Also used : TableResult(com.google.cloud.bigquery.TableResult) ArrayList(java.util.ArrayList) NonRetryableApplicationException(com.google.cloud.pso.bq_pii_classifier.entities.NonRetryableApplicationException) FieldValueList(com.google.cloud.bigquery.FieldValueList) Job(com.google.cloud.bigquery.Job)

Example 2 with NonRetryableApplicationException

use of com.google.cloud.pso.bq_pii_classifier.entities.NonRetryableApplicationException in project bq-pii-classifier by GoogleCloudPlatform.

the class AutoDlpResultsScannerImpl method listChildren.

@Override
public // List all dlp job IDs for tables in a dataset/project that have DLP findings within the latest inspection run
List<String> listChildren(String project, String dataset) throws InterruptedException, NonRetryableApplicationException {
    String queryTemplate = "SELECT DISTINCT CONCAT(column_profile.dataset_project_id, '.', column_profile.dataset_id, '.', column_profile.table_id) AS table FROM %s.%s.%s WHERE column_profile.dataset_project_id = '%s' AND column_profile.dataset_id = '%s'";
    String formattedQuery = String.format(queryTemplate, hostProject, hostDataset, dlpFindingsTable, project, dataset);
    // Create a job ID so that we can safely retry.
    Job queryJob = bqService.submitJob(formattedQuery);
    TableResult result = bqService.waitAndGetJobResults(queryJob);
    // Construct a mapping between field names and DLP infotypes
    List<String> datasetTables = new ArrayList<>();
    for (FieldValueList row : result.iterateAll()) {
        if (row.get("table").isNull()) {
            throw new NonRetryableApplicationException("processDatasets query returned rows with null 'job_name' field.");
        }
        String tableSpec = row.get("table").getStringValue();
        datasetTables.add(tableSpec);
    }
    return datasetTables;
}
Also used : TableResult(com.google.cloud.bigquery.TableResult) ArrayList(java.util.ArrayList) NonRetryableApplicationException(com.google.cloud.pso.bq_pii_classifier.entities.NonRetryableApplicationException) FieldValueList(com.google.cloud.bigquery.FieldValueList) Job(com.google.cloud.bigquery.Job)

Example 3 with NonRetryableApplicationException

use of com.google.cloud.pso.bq_pii_classifier.entities.NonRetryableApplicationException in project bq-pii-classifier by GoogleCloudPlatform.

the class DispatcherTest method testWithInput.

// @Test
// public void testDispatcher_withDatasets () throws IOException {
// 
// String jsonPayLoad = "{\"tablesInclude\":\"\""
// + ",\"tablesExclude\":\"p1.d1.t1\""
// + ",\"datasetsInclude\":\"p1.d1, p1.d2\""
// + ",\"datasetsExclude\":\"\""
// + ",\"projectsInclude\":\"p2\"" // should have no effect
// + "}";
// 
// List<String> expectedOutput = Lists.newArrayList("p1.d1.t2", "p1.d2.t1", "p1.d2.t2");
// List<String> actualOutput = testWithInput(jsonPayLoad);
// 
// assertEquals(expectedOutput, actualOutput);
// }
// 
// @Test
// public void testDispatcher_withProjects () throws IOException {
// 
// String jsonPayLoad = "{\"tablesInclude\":\"\""
// + ",\"tablesExclude\":\"p1.d2.t1\""
// + ",\"datasetsInclude\":\"\""
// + ",\"datasetsExclude\":\"p1.d1\""
// + ",\"projectsInclude\":\"p1, p2\"" // should have no effect
// + "}";
// 
// List<String> expectedOutput = Lists.newArrayList("p1.d2.t2", "p2.d1.t1", "p2.d1.t2");
// List<String> actualOutput = testWithInput(jsonPayLoad);
// 
// assertEquals(expectedOutput, actualOutput);
// }
private List<String> testWithInput(BigQueryScope bigQueryScope) throws IOException, NonRetryableApplicationException, InterruptedException {
    // Dispatcher function = new Dispatcher(envMock, bqServiceMock, cloudTasksServiceMock);
    PubSubPublishResults results = function.execute(bigQueryScope, "");
    PubSubServiceImpl pubSubServiceMock = mock(PubSubServiceImpl.class);
    lenient().when(pubSubServiceMock.publishTableOperationRequests(anyString(), anyString(), any())).thenReturn(new PubSubPublishResults(Arrays.asList(new SuccessPubSubMessage(new Operation("p1.d1.t1", "runId", "trackingId"), "publishedMessageId"), new SuccessPubSubMessage(new Operation("p1.d1.t2", "runId", "trackingId"), "publishedMessageId")), Arrays.asList(new FailedPubSubMessage(new Operation("", "", ""), new Exception("test fail message")))));
    return results.getSuccessMessages().stream().map(x -> ((Operation) x.getMsg()).getEntityKey()).collect(Collectors.toList());
}
Also used : ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) Arrays(java.util.Arrays) DispatcherType(com.google.cloud.pso.bq_pii_classifier.entities.DispatcherType) Mock(org.mockito.Mock) RunWith(org.junit.runner.RunWith) PubSubServiceImpl(com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubServiceImpl) SolutionMode(com.google.cloud.pso.bq_pii_classifier.entities.SolutionMode) PubSubPublishResults(com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubPublishResults) Mockito.lenient(org.mockito.Mockito.lenient) ArrayList(java.util.ArrayList) StandardDlpResultsScannerImpl(com.google.cloud.pso.bq_pii_classifier.services.scan.StandardDlpResultsScannerImpl) Lists(com.google.common.collect.Lists) Before(org.junit.Before) InjectMocks(org.mockito.InjectMocks) BigQueryServiceImpl(com.google.cloud.pso.bq_pii_classifier.services.bq.BigQueryServiceImpl) NonRetryableApplicationException(com.google.cloud.pso.bq_pii_classifier.entities.NonRetryableApplicationException) FailedPubSubMessage(com.google.cloud.pso.bq_pii_classifier.services.pubsub.FailedPubSubMessage) Test(org.junit.Test) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Operation(com.google.cloud.pso.bq_pii_classifier.entities.Operation) List(java.util.List) SuccessPubSubMessage(com.google.cloud.pso.bq_pii_classifier.services.pubsub.SuccessPubSubMessage) MockitoJUnitRunner(org.mockito.junit.MockitoJUnitRunner) Assert.assertEquals(org.junit.Assert.assertEquals) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) Mockito.mock(org.mockito.Mockito.mock) PubSubPublishResults(com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubPublishResults) FailedPubSubMessage(com.google.cloud.pso.bq_pii_classifier.services.pubsub.FailedPubSubMessage) Operation(com.google.cloud.pso.bq_pii_classifier.entities.Operation) SuccessPubSubMessage(com.google.cloud.pso.bq_pii_classifier.services.pubsub.SuccessPubSubMessage) NonRetryableApplicationException(com.google.cloud.pso.bq_pii_classifier.entities.NonRetryableApplicationException) IOException(java.io.IOException) PubSubServiceImpl(com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubServiceImpl)

Example 4 with NonRetryableApplicationException

use of com.google.cloud.pso.bq_pii_classifier.entities.NonRetryableApplicationException in project bq-pii-classifier by GoogleCloudPlatform.

the class InspectorController method receiveMessage.

@RequestMapping(value = "/", method = RequestMethod.POST)
public ResponseEntity receiveMessage(@RequestBody PubSubEvent requestBody) {
    String trackingId = "0000000000000-z";
    DlpService dlpService = null;
    BigQueryService bqService = null;
    try {
        if (requestBody == null || requestBody.getMessage() == null) {
            String msg = "Bad Request: invalid message format";
            logger.logSevereWithTracker(trackingId, msg);
            throw new NonRetryableApplicationException("Request body or message is Null.");
        }
        String requestJsonString = requestBody.getMessage().dataToUtf8String();
        // remove any escape characters (e.g. from Terraform
        requestJsonString = requestJsonString.replace("\\", "");
        logger.logInfoWithTracker(trackingId, String.format("Received payload: %s", requestJsonString));
        Operation operation = gson.fromJson(requestJsonString, Operation.class);
        trackingId = operation.getTrackingId();
        logger.logInfoWithTracker(trackingId, String.format("Parsed Request: %s", operation.toString()));
        dlpService = new DlpServiceImpl();
        bqService = new BigQueryServiceImpl();
        Inspector inspector = new Inspector(environment.toConfig(), dlpService, bqService, new GCSPersistentSetImpl(environment.getGcsFlagsBucket()), "inspector-flags");
        inspector.execute(operation, trackingId, requestBody.getMessage().getMessageId());
        return new ResponseEntity("Process completed successfully.", HttpStatus.OK);
    } catch (Exception e) {
        return ControllerExceptionHelper.handleException(e, logger, trackingId);
    } finally {
        if (dlpService != null) {
            dlpService.shutDown();
        }
    }
}
Also used : DlpServiceImpl(com.google.cloud.pso.bq_pii_classifier.services.dlp.DlpServiceImpl) ResponseEntity(org.springframework.http.ResponseEntity) BigQueryServiceImpl(com.google.cloud.pso.bq_pii_classifier.services.bq.BigQueryServiceImpl) BigQueryService(com.google.cloud.pso.bq_pii_classifier.services.bq.BigQueryService) NonRetryableApplicationException(com.google.cloud.pso.bq_pii_classifier.entities.NonRetryableApplicationException) Inspector(com.google.cloud.pso.bq_pii_classifier.functions.inspector.Inspector) Operation(com.google.cloud.pso.bq_pii_classifier.entities.Operation) GCSPersistentSetImpl(com.google.cloud.pso.bq_pii_classifier.services.set.GCSPersistentSetImpl) DlpService(com.google.cloud.pso.bq_pii_classifier.services.dlp.DlpService) NonRetryableApplicationException(com.google.cloud.pso.bq_pii_classifier.entities.NonRetryableApplicationException) RequestMapping(org.springframework.web.bind.annotation.RequestMapping)

Example 5 with NonRetryableApplicationException

use of com.google.cloud.pso.bq_pii_classifier.entities.NonRetryableApplicationException in project bq-pii-classifier by GoogleCloudPlatform.

the class StandardDlpResultsScannerImpl method listChildren.

@Override
public // We return the jobName and not the table spec because this column is clustered and the Tagger can utilize that for lookups
List<String> listChildren(String project, String dataset) throws InterruptedException, NonRetryableApplicationException {
    // dlp job names start with unix timestamp. Max() will get us the latest job
    String queryTemplate = "SELECT DISTINCT\n" + "l.record_location.record_key.big_query_key.table_reference.table_id,\n" + "MAX(job_name) AS latest_job_name\n" + "FROM \n" + "`%s.%s.%s`, UNNEST(location.content_locations) l\n" + "WHERE l.record_location.record_key.big_query_key.table_reference.project_id = '%s'\n" + "AND l.record_location.record_key.big_query_key.table_reference.dataset_id  = '%s'\n" + "GROUP BY 1\n" + "ORDER BY 1,2 DESC\n";
    String formattedQuery = String.format(queryTemplate, hostProject, hostDataset, dlpFindingsTable, project, dataset);
    // Create a job ID so that we can safely retry.
    Job queryJob = bqService.submitJob(formattedQuery);
    TableResult result = bqService.waitAndGetJobResults(queryJob);
    // Construct a mapping between field names and DLP infotypes
    List<String> datasetTablesDlpJobs = new ArrayList<>();
    for (FieldValueList row : result.iterateAll()) {
        if (row.get("latest_job_name").isNull()) {
            throw new NonRetryableApplicationException("processDatasets query returned rows with null 'latest_job_name' field.");
        }
        String jobName = row.get("latest_job_name").getStringValue();
        datasetTablesDlpJobs.add(jobName);
    }
    return datasetTablesDlpJobs;
}
Also used : TableResult(com.google.cloud.bigquery.TableResult) ArrayList(java.util.ArrayList) NonRetryableApplicationException(com.google.cloud.pso.bq_pii_classifier.entities.NonRetryableApplicationException) FieldValueList(com.google.cloud.bigquery.FieldValueList) Job(com.google.cloud.bigquery.Job)

Aggregations

NonRetryableApplicationException (com.google.cloud.pso.bq_pii_classifier.entities.NonRetryableApplicationException)13 FieldValueList (com.google.cloud.bigquery.FieldValueList)6 Job (com.google.cloud.bigquery.Job)6 TableResult (com.google.cloud.bigquery.TableResult)6 BigQueryServiceImpl (com.google.cloud.pso.bq_pii_classifier.services.bq.BigQueryServiceImpl)5 ArrayList (java.util.ArrayList)5 Operation (com.google.cloud.pso.bq_pii_classifier.entities.Operation)4 GCSPersistentSetImpl (com.google.cloud.pso.bq_pii_classifier.services.set.GCSPersistentSetImpl)4 ResponseEntity (org.springframework.http.ResponseEntity)4 RequestMapping (org.springframework.web.bind.annotation.RequestMapping)4 PubSubPublishResults (com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubPublishResults)3 PubSubServiceImpl (com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubServiceImpl)3 TablePolicyTags (com.google.cloud.pso.bq_pii_classifier.entities.TablePolicyTags)2 TableSpec (com.google.cloud.pso.bq_pii_classifier.entities.TableSpec)2 BigQueryScope (com.google.cloud.pso.bq_pii_classifier.functions.dispatcher.BigQueryScope)2 Dispatcher (com.google.cloud.pso.bq_pii_classifier.functions.dispatcher.Dispatcher)2 BigQueryService (com.google.cloud.pso.bq_pii_classifier.services.bq.BigQueryService)2 InvalidProtocolBufferException (com.google.protobuf.InvalidProtocolBufferException)2 HashMap (java.util.HashMap)2 DispatcherType (com.google.cloud.pso.bq_pii_classifier.entities.DispatcherType)1