use of com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubPublishResults in project bq-pii-classifier by GoogleCloudPlatform.
the class DispatcherTest method testWithInput.
// @Test
// public void testDispatcher_withDatasets () throws IOException {
//
// String jsonPayLoad = "{\"tablesInclude\":\"\""
// + ",\"tablesExclude\":\"p1.d1.t1\""
// + ",\"datasetsInclude\":\"p1.d1, p1.d2\""
// + ",\"datasetsExclude\":\"\""
// + ",\"projectsInclude\":\"p2\"" // should have no effect
// + "}";
//
// List<String> expectedOutput = Lists.newArrayList("p1.d1.t2", "p1.d2.t1", "p1.d2.t2");
// List<String> actualOutput = testWithInput(jsonPayLoad);
//
// assertEquals(expectedOutput, actualOutput);
// }
//
// @Test
// public void testDispatcher_withProjects () throws IOException {
//
// String jsonPayLoad = "{\"tablesInclude\":\"\""
// + ",\"tablesExclude\":\"p1.d2.t1\""
// + ",\"datasetsInclude\":\"\""
// + ",\"datasetsExclude\":\"p1.d1\""
// + ",\"projectsInclude\":\"p1, p2\"" // should have no effect
// + "}";
//
// List<String> expectedOutput = Lists.newArrayList("p1.d2.t2", "p2.d1.t1", "p2.d1.t2");
// List<String> actualOutput = testWithInput(jsonPayLoad);
//
// assertEquals(expectedOutput, actualOutput);
// }
private List<String> testWithInput(BigQueryScope bigQueryScope) throws IOException, NonRetryableApplicationException, InterruptedException {
// Dispatcher function = new Dispatcher(envMock, bqServiceMock, cloudTasksServiceMock);
PubSubPublishResults results = function.execute(bigQueryScope, "");
PubSubServiceImpl pubSubServiceMock = mock(PubSubServiceImpl.class);
lenient().when(pubSubServiceMock.publishTableOperationRequests(anyString(), anyString(), any())).thenReturn(new PubSubPublishResults(Arrays.asList(new SuccessPubSubMessage(new Operation("p1.d1.t1", "runId", "trackingId"), "publishedMessageId"), new SuccessPubSubMessage(new Operation("p1.d1.t2", "runId", "trackingId"), "publishedMessageId")), Arrays.asList(new FailedPubSubMessage(new Operation("", "", ""), new Exception("test fail message")))));
return results.getSuccessMessages().stream().map(x -> ((Operation) x.getMsg()).getEntityKey()).collect(Collectors.toList());
}
use of com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubPublishResults in project bq-pii-classifier by GoogleCloudPlatform.
the class Dispatcher method execute.
public PubSubPublishResults execute(BigQueryScope bqScope, String pubSubMessageId) throws IOException, NonRetryableApplicationException, InterruptedException {
/**
* Check if we already processed this pubSubMessageId before to avoid re-running the dispatcher (and the whole process)
* in case we have unexpected errors with PubSub re-sending the message. This is an extra measure to avoid unnecessary cost.
* We do that by keeping simple flag files in GCS with the pubSubMessageId as file name.
*/
String flagFileName = String.format("%s/%s", persistentSetObjectPrefix, pubSubMessageId);
if (persistentSet.contains(flagFileName)) {
// log error and ACK and return
String msg = String.format("PubSub message ID '%s' has been processed before by the dispatcher. The message should be ACK to PubSub to stop retries. Please investigate further why the message was retried in the first place.", pubSubMessageId);
throw new NonRetryableApplicationException(msg);
} else {
logger.logInfoWithTracker(runId, String.format("Persisting processing key for PubSub message ID %s", pubSubMessageId));
persistentSet.add(flagFileName);
}
/**
* Detecting which resources to tag is done bottom up TABLES > DATASETS > PROJECTS where lower levels configs (e.g. Tables)
* ignore higher level configs (e.g. Datasets)
* For example:
* If TABLES_INCLUDE list is provided:
* * Tag only these tables
* * SKIP tables in TABLES_EXCLUDE list
* * IGNORE all other INCLUDE lists
* If DATASETS_INCLUDE list is provided:
* * Tag only tables in these datasets
* * SKIP datasets in DATASETS_EXCLUDE
* * SKIP tables in TABLES_EXCLUDE
* * IGNORE all other INCLUDE lists
* If PROJECTS_INCLUDE list is provided:
* * Tag only datasets and tables in these projects
* * SKIP datasets in DATASETS_EXCLUDE
* * SKIP tables in TABLES_EXCLUDE
* * IGNORE all other INCLUDE lists
*/
// List down which tables to publish a Tagging request for based on the input scan scope and DLP results table
List<JsonMessage> pubSubMessagesToPublish;
if (!bqScope.getTableIncludeList().isEmpty()) {
pubSubMessagesToPublish = processTables(bqScope.getTableIncludeList(), bqScope.getTableExcludeList());
} else {
if (!bqScope.getDatasetIncludeList().isEmpty()) {
pubSubMessagesToPublish = processDatasets(bqScope.getDatasetIncludeList(), bqScope.getDatasetExcludeList(), bqScope.getTableExcludeList(), config.getDataRegionId());
} else {
if (!bqScope.getProjectIncludeList().isEmpty()) {
pubSubMessagesToPublish = processProjects(bqScope.getProjectIncludeList(), bqScope.getDatasetExcludeList(), bqScope.getTableExcludeList(), config.getDataRegionId());
} else {
throw new NonRetryableApplicationException("At least one of of the following params must be not empty [tableIncludeList, datasetIncludeList, projectIncludeList]");
}
}
}
// Publish the list of tagging requests to PubSub
PubSubPublishResults publishResults = pubSubService.publishTableOperationRequests(config.getProjectId(), config.getOutputTopic(), pubSubMessagesToPublish);
for (FailedPubSubMessage msg : publishResults.getFailedMessages()) {
String logMsg = String.format("Failed to publish this messages %s", msg.toString());
logger.logWarnWithTracker(runId, logMsg);
}
for (SuccessPubSubMessage msg : publishResults.getSuccessMessages()) {
// this enable us to detect dispatched messages within a runId that fail in later stages (i.e. Tagger)
Operation request = (Operation) msg.getMsg();
// Log the dispatched tracking ID to be able to track the progress of this run
if (config.getDispatcherType().equals(DispatcherType.INSPECTION) || config.getSolutionMode().equals(SolutionMode.AUTO_DLP)) {
// Inspection Dispatcher (in Standard Mode) and Auto DLP mode outputs contains the table spec (for the inspector service to use)
TableSpec tableSpec = TableSpec.fromSqlString(request.getEntityKey());
logger.logSuccessDispatcherTrackingId(runId, request.getTrackingId(), tableSpec);
} else {
// Tagger Dispatcher in Standard mode outputs contains the table spec (for the inspector service to use)
logger.logSuccessDispatcherTrackingId(runId, request.getTrackingId());
}
}
logger.logFunctionEnd(runId);
return publishResults;
}
use of com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubPublishResults in project bq-pii-classifier by GoogleCloudPlatform.
the class TaggingDispatcherController method receiveMessage.
@RequestMapping(value = "/", method = RequestMethod.POST)
public ResponseEntity receiveMessage(@RequestBody PubSubEvent requestBody) {
String runId = TrackingHelper.generateTaggingRunId();
String state = "";
try {
if (requestBody == null || requestBody.getMessage() == null) {
String msg = "Bad Request: invalid message format";
logger.logSevereWithTracker(runId, msg);
throw new NonRetryableApplicationException("Request body or message is Null.");
}
String requestJsonString = requestBody.getMessage().dataToUtf8String();
// remove any escape characters (e.g. from Terraform
requestJsonString = requestJsonString.replace("\\", "");
logger.logInfoWithTracker(runId, String.format("Received payload: %s", requestJsonString));
BigQueryScope bqScope = gson.fromJson(requestJsonString, BigQueryScope.class);
logger.logInfoWithTracker(runId, String.format("Parsed JSON input %s ", bqScope.toString()));
Scanner dlpResultsScanner;
if (environment.getIsAutoDlpMode()) {
dlpResultsScanner = new AutoDlpResultsScannerImpl(environment.getProjectId(), environment.getSolutionDataset(), environment.getDlpTableAuto(), new BigQueryServiceImpl());
} else {
dlpResultsScanner = new StandardDlpResultsScannerImpl(environment.getProjectId(), environment.getSolutionDataset(), environment.getDlpTableStandard(), environment.getLoggingTable(), new BigQueryServiceImpl());
}
Dispatcher dispatcher = new Dispatcher(environment.toConfig(), new BigQueryServiceImpl(), new PubSubServiceImpl(), dlpResultsScanner, new GCSPersistentSetImpl(environment.getGcsFlagsBucket()), "tagging-dispatcher-flags", runId);
PubSubPublishResults results = dispatcher.execute(bqScope, requestBody.getMessage().getMessageId());
state = String.format("Publishing results: %s SUCCESS MESSAGES and %s FAILED MESSAGES", results.getSuccessMessages().size(), results.getFailedMessages().size());
logger.logInfoWithTracker(runId, state);
} catch (Exception e) {
logger.logNonRetryableExceptions(runId, e);
state = String.format("ERROR '%s'", e.getMessage());
}
return new ResponseEntity(String.format("Process completed with state = %s", state), HttpStatus.OK);
}
use of com.google.cloud.pso.bq_pii_classifier.services.pubsub.PubSubPublishResults in project bq-pii-classifier by GoogleCloudPlatform.
the class InspectionDispatcherController method receiveMessage.
@RequestMapping(value = "/", method = RequestMethod.POST)
public ResponseEntity receiveMessage(@RequestBody PubSubEvent requestBody) {
String runId = TrackingHelper.generateInspectionRunId();
String state = "";
try {
if (requestBody == null || requestBody.getMessage() == null) {
String msg = "Bad Request: invalid message format";
logger.logSevereWithTracker(runId, msg);
throw new NonRetryableApplicationException("Request body or message is Null.");
}
String requestJsonString = requestBody.getMessage().dataToUtf8String();
// remove any escape characters (e.g. from Terraform
requestJsonString = requestJsonString.replace("\\", "");
logger.logInfoWithTracker(runId, String.format("Received payload: %s", requestJsonString));
BigQueryScope bqScope = gson.fromJson(requestJsonString, BigQueryScope.class);
logger.logInfoWithTracker(runId, String.format("Parsed JSON input %s ", bqScope.toString()));
Dispatcher dispatcher = new Dispatcher(environment.toConfig(), new BigQueryServiceImpl(), new PubSubServiceImpl(), new BigQueryScannerImpl(), new GCSPersistentSetImpl(environment.getGcsFlagsBucket()), "inspection-dispatcher-flags", runId);
PubSubPublishResults results = dispatcher.execute(bqScope, requestBody.getMessage().getMessageId());
state = String.format("Publishing results: %s SUCCESS MESSAGES and %s FAILED MESSAGES", results.getSuccessMessages().size(), results.getFailedMessages().size());
logger.logInfoWithTracker(runId, state);
} catch (Exception e) {
logger.logNonRetryableExceptions(runId, e);
state = String.format("ERROR '%s'", e.getMessage());
}
return new ResponseEntity(String.format("Process completed with state = %s", state), HttpStatus.OK);
}
Aggregations