use of com.google.cloud.pso.bq_pii_classifier.services.pubsub.SuccessPubSubMessage in project bq-pii-classifier by GoogleCloudPlatform.
the class DispatcherTest method testWithInput.
// @Test
// public void testDispatcher_withDatasets () throws IOException {
//
// String jsonPayLoad = "{\"tablesInclude\":\"\""
// + ",\"tablesExclude\":\"p1.d1.t1\""
// + ",\"datasetsInclude\":\"p1.d1, p1.d2\""
// + ",\"datasetsExclude\":\"\""
// + ",\"projectsInclude\":\"p2\"" // should have no effect
// + "}";
//
// List<String> expectedOutput = Lists.newArrayList("p1.d1.t2", "p1.d2.t1", "p1.d2.t2");
// List<String> actualOutput = testWithInput(jsonPayLoad);
//
// assertEquals(expectedOutput, actualOutput);
// }
//
// @Test
// public void testDispatcher_withProjects () throws IOException {
//
// String jsonPayLoad = "{\"tablesInclude\":\"\""
// + ",\"tablesExclude\":\"p1.d2.t1\""
// + ",\"datasetsInclude\":\"\""
// + ",\"datasetsExclude\":\"p1.d1\""
// + ",\"projectsInclude\":\"p1, p2\"" // should have no effect
// + "}";
//
// List<String> expectedOutput = Lists.newArrayList("p1.d2.t2", "p2.d1.t1", "p2.d1.t2");
// List<String> actualOutput = testWithInput(jsonPayLoad);
//
// assertEquals(expectedOutput, actualOutput);
// }
private List<String> testWithInput(BigQueryScope bigQueryScope) throws IOException, NonRetryableApplicationException, InterruptedException {
// Dispatcher function = new Dispatcher(envMock, bqServiceMock, cloudTasksServiceMock);
PubSubPublishResults results = function.execute(bigQueryScope, "");
PubSubServiceImpl pubSubServiceMock = mock(PubSubServiceImpl.class);
lenient().when(pubSubServiceMock.publishTableOperationRequests(anyString(), anyString(), any())).thenReturn(new PubSubPublishResults(Arrays.asList(new SuccessPubSubMessage(new Operation("p1.d1.t1", "runId", "trackingId"), "publishedMessageId"), new SuccessPubSubMessage(new Operation("p1.d1.t2", "runId", "trackingId"), "publishedMessageId")), Arrays.asList(new FailedPubSubMessage(new Operation("", "", ""), new Exception("test fail message")))));
return results.getSuccessMessages().stream().map(x -> ((Operation) x.getMsg()).getEntityKey()).collect(Collectors.toList());
}
use of com.google.cloud.pso.bq_pii_classifier.services.pubsub.SuccessPubSubMessage in project bq-pii-classifier by GoogleCloudPlatform.
the class Dispatcher method execute.
public PubSubPublishResults execute(BigQueryScope bqScope, String pubSubMessageId) throws IOException, NonRetryableApplicationException, InterruptedException {
/**
* Check if we already processed this pubSubMessageId before to avoid re-running the dispatcher (and the whole process)
* in case we have unexpected errors with PubSub re-sending the message. This is an extra measure to avoid unnecessary cost.
* We do that by keeping simple flag files in GCS with the pubSubMessageId as file name.
*/
String flagFileName = String.format("%s/%s", persistentSetObjectPrefix, pubSubMessageId);
if (persistentSet.contains(flagFileName)) {
// log error and ACK and return
String msg = String.format("PubSub message ID '%s' has been processed before by the dispatcher. The message should be ACK to PubSub to stop retries. Please investigate further why the message was retried in the first place.", pubSubMessageId);
throw new NonRetryableApplicationException(msg);
} else {
logger.logInfoWithTracker(runId, String.format("Persisting processing key for PubSub message ID %s", pubSubMessageId));
persistentSet.add(flagFileName);
}
/**
* Detecting which resources to tag is done bottom up TABLES > DATASETS > PROJECTS where lower levels configs (e.g. Tables)
* ignore higher level configs (e.g. Datasets)
* For example:
* If TABLES_INCLUDE list is provided:
* * Tag only these tables
* * SKIP tables in TABLES_EXCLUDE list
* * IGNORE all other INCLUDE lists
* If DATASETS_INCLUDE list is provided:
* * Tag only tables in these datasets
* * SKIP datasets in DATASETS_EXCLUDE
* * SKIP tables in TABLES_EXCLUDE
* * IGNORE all other INCLUDE lists
* If PROJECTS_INCLUDE list is provided:
* * Tag only datasets and tables in these projects
* * SKIP datasets in DATASETS_EXCLUDE
* * SKIP tables in TABLES_EXCLUDE
* * IGNORE all other INCLUDE lists
*/
// List down which tables to publish a Tagging request for based on the input scan scope and DLP results table
List<JsonMessage> pubSubMessagesToPublish;
if (!bqScope.getTableIncludeList().isEmpty()) {
pubSubMessagesToPublish = processTables(bqScope.getTableIncludeList(), bqScope.getTableExcludeList());
} else {
if (!bqScope.getDatasetIncludeList().isEmpty()) {
pubSubMessagesToPublish = processDatasets(bqScope.getDatasetIncludeList(), bqScope.getDatasetExcludeList(), bqScope.getTableExcludeList(), config.getDataRegionId());
} else {
if (!bqScope.getProjectIncludeList().isEmpty()) {
pubSubMessagesToPublish = processProjects(bqScope.getProjectIncludeList(), bqScope.getDatasetExcludeList(), bqScope.getTableExcludeList(), config.getDataRegionId());
} else {
throw new NonRetryableApplicationException("At least one of of the following params must be not empty [tableIncludeList, datasetIncludeList, projectIncludeList]");
}
}
}
// Publish the list of tagging requests to PubSub
PubSubPublishResults publishResults = pubSubService.publishTableOperationRequests(config.getProjectId(), config.getOutputTopic(), pubSubMessagesToPublish);
for (FailedPubSubMessage msg : publishResults.getFailedMessages()) {
String logMsg = String.format("Failed to publish this messages %s", msg.toString());
logger.logWarnWithTracker(runId, logMsg);
}
for (SuccessPubSubMessage msg : publishResults.getSuccessMessages()) {
// this enable us to detect dispatched messages within a runId that fail in later stages (i.e. Tagger)
Operation request = (Operation) msg.getMsg();
// Log the dispatched tracking ID to be able to track the progress of this run
if (config.getDispatcherType().equals(DispatcherType.INSPECTION) || config.getSolutionMode().equals(SolutionMode.AUTO_DLP)) {
// Inspection Dispatcher (in Standard Mode) and Auto DLP mode outputs contains the table spec (for the inspector service to use)
TableSpec tableSpec = TableSpec.fromSqlString(request.getEntityKey());
logger.logSuccessDispatcherTrackingId(runId, request.getTrackingId(), tableSpec);
} else {
// Tagger Dispatcher in Standard mode outputs contains the table spec (for the inspector service to use)
logger.logSuccessDispatcherTrackingId(runId, request.getTrackingId());
}
}
logger.logFunctionEnd(runId);
return publishResults;
}
Aggregations