Search in sources :

Example 1 with SuccessPubSubMessage

use of com.google.cloud.pso.bq_pii_classifier.services.pubsub.SuccessPubSubMessage in project bq-pii-classifier by GoogleCloudPlatform.

the class DispatcherTest method testWithInput.

// @Test
// public void testDispatcher_withDatasets () throws IOException {
// 
// String jsonPayLoad = "{\"tablesInclude\":\"\""
// + ",\"tablesExclude\":\"p1.d1.t1\""
// + ",\"datasetsInclude\":\"p1.d1, p1.d2\""
// + ",\"datasetsExclude\":\"\""
// + ",\"projectsInclude\":\"p2\"" // should have no effect
// + "}";
// 
// List<String> expectedOutput = Lists.newArrayList("p1.d1.t2", "p1.d2.t1", "p1.d2.t2");
// List<String> actualOutput = testWithInput(jsonPayLoad);
// 
// assertEquals(expectedOutput, actualOutput);
// }
// 
// @Test
// public void testDispatcher_withProjects () throws IOException {
// 
// String jsonPayLoad = "{\"tablesInclude\":\"\""
// + ",\"tablesExclude\":\"p1.d2.t1\""
// + ",\"datasetsInclude\":\"\""
// + ",\"datasetsExclude\":\"p1.d1\""
// + ",\"projectsInclude\":\"p1, p2\"" // should have no effect
// + "}";
// 
// List<String> expectedOutput = Lists.newArrayList("p1.d2.t2", "p2.d1.t1", "p2.d1.t2");
// List<String> actualOutput = testWithInput(jsonPayLoad);
// 
// assertEquals(expectedOutput, actualOutput);
// }
private List<String> testWithInput(BigQueryScope bigQueryScope) throws IOException, NonRetryableApplicationException, InterruptedException {
    // Dispatcher function = new Dispatcher(envMock, bqServiceMock, cloudTasksServiceMock);
    PubSubPublishResults results = function.execute(bigQueryScope, "");
    PubSubServiceImpl pubSubServiceMock = mock(PubSubServiceImpl.class);
    lenient().when(pubSubServiceMock.publishTableOperationRequests(anyString(), anyString(), any())).thenReturn(new PubSubPublishResults(Arrays.asList(new SuccessPubSubMessage(new Operation("p1.d1.t1", "runId", "trackingId"), "publishedMessageId"), new SuccessPubSubMessage(new Operation("p1.d1.t2", "runId", "trackingId"), "publishedMessageId")), Arrays.asList(new FailedPubSubMessage(new Operation("", "", ""), new Exception("test fail message")))));
    return results.getSuccessMessages().stream().map(x -> ((Operation) x.getMsg()).getEntityKey()).collect(Collectors.toList());
}
Also used : ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) Arrays(java.util.Arrays) DispatcherType(com.google.cloud.pso.bq_pii_classifier.entities.DispatcherType) Mock(org.mockito.Mock) RunWith(org.junit.runner.RunWith) PubSubServiceImpl(com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubServiceImpl) SolutionMode(com.google.cloud.pso.bq_pii_classifier.entities.SolutionMode) PubSubPublishResults(com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubPublishResults) Mockito.lenient(org.mockito.Mockito.lenient) ArrayList(java.util.ArrayList) StandardDlpResultsScannerImpl(com.google.cloud.pso.bq_pii_classifier.services.scan.StandardDlpResultsScannerImpl) Lists(com.google.common.collect.Lists) Before(org.junit.Before) InjectMocks(org.mockito.InjectMocks) BigQueryServiceImpl(com.google.cloud.pso.bq_pii_classifier.services.bq.BigQueryServiceImpl) NonRetryableApplicationException(com.google.cloud.pso.bq_pii_classifier.entities.NonRetryableApplicationException) FailedPubSubMessage(com.google.cloud.pso.bq_pii_classifier.services.pubsub.FailedPubSubMessage) Test(org.junit.Test) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Operation(com.google.cloud.pso.bq_pii_classifier.entities.Operation) List(java.util.List) SuccessPubSubMessage(com.google.cloud.pso.bq_pii_classifier.services.pubsub.SuccessPubSubMessage) MockitoJUnitRunner(org.mockito.junit.MockitoJUnitRunner) Assert.assertEquals(org.junit.Assert.assertEquals) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) Mockito.mock(org.mockito.Mockito.mock) PubSubPublishResults(com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubPublishResults) FailedPubSubMessage(com.google.cloud.pso.bq_pii_classifier.services.pubsub.FailedPubSubMessage) Operation(com.google.cloud.pso.bq_pii_classifier.entities.Operation) SuccessPubSubMessage(com.google.cloud.pso.bq_pii_classifier.services.pubsub.SuccessPubSubMessage) NonRetryableApplicationException(com.google.cloud.pso.bq_pii_classifier.entities.NonRetryableApplicationException) IOException(java.io.IOException) PubSubServiceImpl(com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubServiceImpl)

Example 2 with SuccessPubSubMessage

use of com.google.cloud.pso.bq_pii_classifier.services.pubsub.SuccessPubSubMessage in project bq-pii-classifier by GoogleCloudPlatform.

the class Dispatcher method execute.

public PubSubPublishResults execute(BigQueryScope bqScope, String pubSubMessageId) throws IOException, NonRetryableApplicationException, InterruptedException {
    /**
     *  Check if we already processed this pubSubMessageId before to avoid re-running the dispatcher (and the whole process)
     *  in case we have unexpected errors with PubSub re-sending the message. This is an extra measure to avoid unnecessary cost.
     *  We do that by keeping simple flag files in GCS with the pubSubMessageId as file name.
     */
    String flagFileName = String.format("%s/%s", persistentSetObjectPrefix, pubSubMessageId);
    if (persistentSet.contains(flagFileName)) {
        // log error and ACK and return
        String msg = String.format("PubSub message ID '%s' has been processed before by the dispatcher. The message should be ACK to PubSub to stop retries. Please investigate further why the message was retried in the first place.", pubSubMessageId);
        throw new NonRetryableApplicationException(msg);
    } else {
        logger.logInfoWithTracker(runId, String.format("Persisting processing key for PubSub message ID %s", pubSubMessageId));
        persistentSet.add(flagFileName);
    }
    /**
     * Detecting which resources to tag is done bottom up TABLES > DATASETS > PROJECTS where lower levels configs (e.g. Tables)
     * ignore higher level configs (e.g. Datasets)
     * For example:
     * If TABLES_INCLUDE list is provided:
     *  * Tag only these tables
     *  * SKIP tables in TABLES_EXCLUDE list
     *  * IGNORE all other INCLUDE lists
     * If DATASETS_INCLUDE list is provided:
     *  * Tag only tables in these datasets
     *  * SKIP datasets in DATASETS_EXCLUDE
     *  * SKIP tables in TABLES_EXCLUDE
     *  * IGNORE all other INCLUDE lists
     * If PROJECTS_INCLUDE list is provided:
     *  * Tag only datasets and tables in these projects
     *  * SKIP datasets in DATASETS_EXCLUDE
     *  * SKIP tables in TABLES_EXCLUDE
     *  * IGNORE all other INCLUDE lists
     */
    // List down which tables to publish a Tagging request for based on the input scan scope and DLP results table
    List<JsonMessage> pubSubMessagesToPublish;
    if (!bqScope.getTableIncludeList().isEmpty()) {
        pubSubMessagesToPublish = processTables(bqScope.getTableIncludeList(), bqScope.getTableExcludeList());
    } else {
        if (!bqScope.getDatasetIncludeList().isEmpty()) {
            pubSubMessagesToPublish = processDatasets(bqScope.getDatasetIncludeList(), bqScope.getDatasetExcludeList(), bqScope.getTableExcludeList(), config.getDataRegionId());
        } else {
            if (!bqScope.getProjectIncludeList().isEmpty()) {
                pubSubMessagesToPublish = processProjects(bqScope.getProjectIncludeList(), bqScope.getDatasetExcludeList(), bqScope.getTableExcludeList(), config.getDataRegionId());
            } else {
                throw new NonRetryableApplicationException("At least one of of the following params must be not empty [tableIncludeList, datasetIncludeList, projectIncludeList]");
            }
        }
    }
    // Publish the list of tagging requests to PubSub
    PubSubPublishResults publishResults = pubSubService.publishTableOperationRequests(config.getProjectId(), config.getOutputTopic(), pubSubMessagesToPublish);
    for (FailedPubSubMessage msg : publishResults.getFailedMessages()) {
        String logMsg = String.format("Failed to publish this messages %s", msg.toString());
        logger.logWarnWithTracker(runId, logMsg);
    }
    for (SuccessPubSubMessage msg : publishResults.getSuccessMessages()) {
        // this enable us to detect dispatched messages within a runId that fail in later stages (i.e. Tagger)
        Operation request = (Operation) msg.getMsg();
        // Log the dispatched tracking ID to be able to track the progress of this run
        if (config.getDispatcherType().equals(DispatcherType.INSPECTION) || config.getSolutionMode().equals(SolutionMode.AUTO_DLP)) {
            // Inspection Dispatcher (in Standard Mode) and Auto DLP mode outputs contains the table spec (for the inspector service to use)
            TableSpec tableSpec = TableSpec.fromSqlString(request.getEntityKey());
            logger.logSuccessDispatcherTrackingId(runId, request.getTrackingId(), tableSpec);
        } else {
            // Tagger Dispatcher in Standard mode outputs contains the table spec (for the inspector service to use)
            logger.logSuccessDispatcherTrackingId(runId, request.getTrackingId());
        }
    }
    logger.logFunctionEnd(runId);
    return publishResults;
}
Also used : PubSubPublishResults(com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubPublishResults) FailedPubSubMessage(com.google.cloud.pso.bq_pii_classifier.services.pubsub.FailedPubSubMessage) SuccessPubSubMessage(com.google.cloud.pso.bq_pii_classifier.services.pubsub.SuccessPubSubMessage)

Aggregations

FailedPubSubMessage (com.google.cloud.pso.bq_pii_classifier.services.pubsub.FailedPubSubMessage)2 PubSubPublishResults (com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubPublishResults)2 SuccessPubSubMessage (com.google.cloud.pso.bq_pii_classifier.services.pubsub.SuccessPubSubMessage)2 DispatcherType (com.google.cloud.pso.bq_pii_classifier.entities.DispatcherType)1 NonRetryableApplicationException (com.google.cloud.pso.bq_pii_classifier.entities.NonRetryableApplicationException)1 Operation (com.google.cloud.pso.bq_pii_classifier.entities.Operation)1 SolutionMode (com.google.cloud.pso.bq_pii_classifier.entities.SolutionMode)1 BigQueryServiceImpl (com.google.cloud.pso.bq_pii_classifier.services.bq.BigQueryServiceImpl)1 PubSubServiceImpl (com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubServiceImpl)1 StandardDlpResultsScannerImpl (com.google.cloud.pso.bq_pii_classifier.services.scan.StandardDlpResultsScannerImpl)1 Lists (com.google.common.collect.Lists)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 List (java.util.List)1 Collectors (java.util.stream.Collectors)1 Assert.assertEquals (org.junit.Assert.assertEquals)1 Before (org.junit.Before)1 Test (org.junit.Test)1 RunWith (org.junit.runner.RunWith)1