use of bio.terra.cloudres.google.bigquery.BigQueryCow in project terra-workspace-manager by DataBiosphere.
the class CreateTableCopyJobsStep method doStep.
/**
* Create one BigQuery copy job for each table in the source dataset. Keep a running map from
* table ID to job ID as new jobs are created, and only create jobs for tables that aren't in the
* map already. Rerun the step after every table is processed so that the map may be persisted
* incrementally.
*
* <p>On retry, create the jobs for any tables that don't have them. Use WRITE_TRUNCATE to avoid
* the possibility of duplicate data.
*/
@Override
public StepResult doStep(FlightContext flightContext) throws InterruptedException, RetryException {
final FlightMap workingMap = flightContext.getWorkingMap();
final CloningInstructions effectiveCloningInstructions = flightContext.getInputParameters().get(ControlledResourceKeys.CLONING_INSTRUCTIONS, CloningInstructions.class);
if (CloningInstructions.COPY_RESOURCE != effectiveCloningInstructions) {
return StepResult.getStepResultSuccess();
}
// Gather inputs
final DatasetCloneInputs sourceInputs = getSourceInputs();
workingMap.put(ControlledResourceKeys.SOURCE_CLONE_INPUTS, sourceInputs);
final DatasetCloneInputs destinationInputs = getDestinationInputs(flightContext);
workingMap.put(ControlledResourceKeys.DESTINATION_CLONE_INPUTS, destinationInputs);
final BigQueryCow bigQueryCow = crlService.createWsmSaBigQueryCow();
// TODO(jaycarlton): remove usage of this client when it's all in CRL PF-942
final Bigquery bigQueryClient = crlService.createWsmSaNakedBigQueryClient();
try {
// Get a list of all tables in the source dataset
final TableList sourceTables = bigQueryCow.tables().list(sourceInputs.getProjectId(), sourceInputs.getDatasetName()).execute();
// Start a copy job for each source table
final Map<String, String> tableToJobId = Optional.ofNullable(workingMap.get(ControlledResourceKeys.TABLE_TO_JOB_ID_MAP, new TypeReference<Map<String, String>>() {
})).orElseGet(HashMap::new);
final List<Tables> tables = Optional.ofNullable(sourceTables.getTables()).orElse(Collections.emptyList());
// Find the first table whose ID isn't a key in the map.
final Optional<Tables> tableMaybe = tables.stream().filter(t -> null != t.getId() && !tableToJobId.containsKey(t.getId())).findFirst();
if (tableMaybe.isPresent()) {
final Tables table = tableMaybe.get();
checkStreamingBuffer(sourceInputs, bigQueryCow, table);
final Job inputJob = buildTableCopyJob(sourceInputs, destinationInputs, table);
// bill the job to the destination project
final Job submittedJob = bigQueryClient.jobs().insert(destinationInputs.getProjectId(), inputJob).execute();
// Update the map, which will be persisted
tableToJobId.put(table.getId(), submittedJob.getId());
workingMap.put(ControlledResourceKeys.TABLE_TO_JOB_ID_MAP, tableToJobId);
return new StepResult(StepStatus.STEP_RESULT_RERUN);
} else {
// All tables have entries in the map, so all jobs are started.
workingMap.put(ControlledResourceKeys.TABLE_TO_JOB_ID_MAP, // in case it's empty
tableToJobId);
return StepResult.getStepResultSuccess();
}
} catch (IOException e) {
return new StepResult(StepStatus.STEP_RESULT_FAILURE_RETRY, e);
}
}
use of bio.terra.cloudres.google.bigquery.BigQueryCow in project terra-workspace-manager by DataBiosphere.
the class RetrieveBigQueryDatasetCloudAttributesStep method doStep.
@Override
public StepResult doStep(FlightContext flightContext) throws InterruptedException, RetryException {
final String suppliedLocation = flightContext.getInputParameters().get(ControlledResourceKeys.LOCATION, String.class);
if (!Strings.isNullOrEmpty(suppliedLocation)) {
flightContext.getWorkingMap().put(ControlledResourceKeys.LOCATION, suppliedLocation);
// we can stop here as we don't need the original location
return StepResult.getStepResultSuccess();
}
// Since no location was specified, we need to find the original one
// from the source dataset.
final String projectId = gcpCloudContextService.getRequiredGcpProject(datasetResource.getWorkspaceId());
final BigQueryCow bigQueryCow = crlService.createWsmSaBigQueryCow();
try {
final Dataset dataset = bigQueryCow.datasets().get(projectId, datasetResource.getDatasetName()).execute();
final String sourceLocation = dataset.getLocation();
flightContext.getWorkingMap().put(ControlledResourceKeys.LOCATION, sourceLocation);
return StepResult.getStepResultSuccess();
} catch (IOException e) {
// TODO: consider retry here
return new StepResult(StepStatus.STEP_RESULT_FAILURE_FATAL, e);
}
}
use of bio.terra.cloudres.google.bigquery.BigQueryCow in project terra-cli by DataBiosphere.
the class ExternalBQDatasets method grantAccess.
/**
* Grant a given user or group access to a dataset. This method uses SA credentials that have
* permissions on the external (to WSM) project.
*/
private static void grantAccess(DatasetReference datasetRef, String memberEmail, IamMemberType memberType, String role) throws IOException {
BigQueryCow bigQuery = getBQCow();
Dataset datasetToUpdate = bigQuery.datasets().get(datasetRef.getProjectId(), datasetRef.getDatasetId()).execute();
List<Dataset.Access> accessToUpdate = datasetToUpdate.getAccess();
Dataset.Access newAccess = new Dataset.Access().setRole(role);
if (memberType.equals(IamMemberType.USER)) {
newAccess.setUserByEmail(memberEmail);
} else {
newAccess.setGroupByEmail(memberEmail);
}
accessToUpdate.add(newAccess);
datasetToUpdate.setAccess(accessToUpdate);
bigQuery.datasets().update(datasetRef.getProjectId(), datasetRef.getDatasetId(), datasetToUpdate).execute();
}
use of bio.terra.cloudres.google.bigquery.BigQueryCow in project terra-cli by DataBiosphere.
the class ExternalBQDatasets method grantReadAccessToTable.
/**
* Grants a given group dataViewer role to the specified table.
*/
public static void grantReadAccessToTable(String projectId, String datasetId, String tableId, String groupEmail) throws IOException {
BigQueryCow bigQuery = getBQCow();
Policy policy = bigQuery.tables().getIamPolicy(projectId, datasetId, tableId, new GetIamPolicyRequest()).execute();
List<Binding> updatedBindings = Optional.ofNullable(policy.getBindings()).orElse(new ArrayList<>());
updatedBindings.add(new Binding().setRole("roles/bigquery.dataViewer").setMembers(ImmutableList.of("group:" + groupEmail)));
bigQuery.tables().setIamPolicy(projectId, datasetId, tableId, new SetIamPolicyRequest().setPolicy(policy.setBindings(updatedBindings))).execute();
System.out.println("Grant dataViewer access to table " + tableId + " for group email: " + groupEmail);
}
use of bio.terra.cloudres.google.bigquery.BigQueryCow in project terra-workspace-manager by DataBiosphere.
the class ControlledResourceServiceTest method deleteBqDatasetDo.
@Test
@DisabledIfEnvironmentVariable(named = "TEST_ENV", matches = BUFFER_SERVICE_DISABLED_ENVS_REG_EX)
void deleteBqDatasetDo() throws Exception {
String datasetId = ControlledResourceFixtures.uniqueDatasetId();
String location = "us-central1";
ApiGcpBigQueryDatasetCreationParameters creationParameters = new ApiGcpBigQueryDatasetCreationParameters().datasetId(datasetId).location(location);
ControlledBigQueryDatasetResource resource = ControlledResourceFixtures.makeDefaultControlledBigQueryBuilder(workspace.getWorkspaceId()).datasetName(datasetId).build();
ControlledBigQueryDatasetResource createdDataset = controlledResourceService.createControlledResourceSync(resource, null, user.getAuthenticatedRequest(), creationParameters).castByEnum(WsmResourceType.CONTROLLED_GCP_BIG_QUERY_DATASET);
assertEquals(resource, createdDataset);
// Test idempotency of delete by retrying steps once.
Map<String, StepStatus> retrySteps = new HashMap<>();
retrySteps.put(DeleteMetadataStep.class.getName(), StepStatus.STEP_RESULT_FAILURE_RETRY);
retrySteps.put(DeleteBigQueryDatasetStep.class.getName(), StepStatus.STEP_RESULT_FAILURE_RETRY);
// Do not test lastStepFailure, as this flight has no undo steps, only dismal failure.
jobService.setFlightDebugInfoForTest(FlightDebugInfo.newBuilder().doStepFailures(retrySteps).build());
controlledResourceService.deleteControlledResourceSync(resource.getWorkspaceId(), resource.getResourceId(), user.getAuthenticatedRequest());
BigQueryCow bqCow = crlService.createWsmSaBigQueryCow();
GoogleJsonResponseException getException = assertThrows(GoogleJsonResponseException.class, () -> bqCow.datasets().get(projectId, resource.getDatasetName()).execute());
assertEquals(HttpStatus.NOT_FOUND.value(), getException.getStatusCode());
assertThrows(ResourceNotFoundException.class, () -> controlledResourceService.getControlledResource(workspace.getWorkspaceId(), resource.getResourceId(), user.getAuthenticatedRequest()));
}
Aggregations