Search in sources :

Example 1 with CloneControlledGcpBigQueryDatasetResult

use of bio.terra.workspace.model.CloneControlledGcpBigQueryDatasetResult in project terra-workspace-manager by DataBiosphere.

the class ControlledBigQueryDatasetLifecycle method testCloneBigQueryDataset.

private void testCloneBigQueryDataset(GcpBigQueryDatasetResource sourceDataset, TestUserSpecification cloningUser, ControlledGcpResourceApi cloningUserResourceApi) throws Exception {
    final String destinationDatasetName = ("clone_" + UUID.randomUUID().toString()).replace('-', '_');
    // clone the dataset as the cloning user
    final String clonedDatasetDescription = "Clone of " + destinationDatasetName;
    final String jobId = UUID.randomUUID().toString();
    final CloneControlledGcpBigQueryDatasetRequest cloneRequest = new CloneControlledGcpBigQueryDatasetRequest().cloningInstructions(CloningInstructionsEnum.RESOURCE).description(clonedDatasetDescription).location(// keep same
    null).destinationWorkspaceId(getDestinationWorkspaceId()).name("MyClonedDataset").jobControl(new JobControl().id(jobId)).destinationDatasetName(// keep same
    null);
    final ResourceMetadata sourceDatasetMetadata = sourceDataset.getMetadata();
    logger.info("Cloning BigQuery dataset\n\tname: {}\n\tresource ID: {}\n\tworkspace: {}\n\t" + "projectID: {}\ninto destination \n\tname: {}\n\tworkspace: {}\n\tprojectID: {}", sourceDatasetMetadata.getName(), sourceDatasetMetadata.getResourceId(), sourceDatasetMetadata.getWorkspaceId(), getSourceProjectId(), sourceDatasetMetadata.getName(), getDestinationWorkspaceId(), getDestinationProjectId());
    // Submit clone request and poll for async result
    CloneControlledGcpBigQueryDatasetResult cloneResult = cloningUserResourceApi.cloneBigQueryDataset(cloneRequest, sourceDatasetMetadata.getWorkspaceId(), sourceDatasetMetadata.getResourceId());
    cloneResult = ClientTestUtils.pollWhileRunning(cloneResult, () -> cloningUserResourceApi.getCloneBigQueryDatasetResult(cloneRequest.getDestinationWorkspaceId(), cloneRequest.getJobControl().getId()), CloneControlledGcpBigQueryDatasetResult::getJobReport, Duration.ofSeconds(5));
    ClientTestUtils.assertJobSuccess("clone BigQuery dataset", cloneResult.getJobReport(), cloneResult.getErrorReport());
    assertEquals(sourceDatasetMetadata.getWorkspaceId(), cloneResult.getDataset().getSourceWorkspaceId());
    assertEquals(sourceDatasetMetadata.getResourceId(), cloneResult.getDataset().getSourceResourceId());
    // unwrap the result one layer at a time
    final ClonedControlledGcpBigQueryDataset clonedControlledGcpBigQueryDataset = cloneResult.getDataset();
    assertEquals(CloningInstructionsEnum.RESOURCE, clonedControlledGcpBigQueryDataset.getEffectiveCloningInstructions());
    final GcpBigQueryDatasetResource clonedResource = clonedControlledGcpBigQueryDataset.getDataset();
    final ResourceMetadata clonedDatasetMetadata = clonedResource.getMetadata();
    assertEquals(sourceDatasetMetadata.getCloningInstructions(), clonedDatasetMetadata.getCloningInstructions());
    assertEquals(sourceDatasetMetadata.getCloudPlatform(), clonedDatasetMetadata.getCloudPlatform());
    assertEquals(sourceDatasetMetadata.getResourceType(), clonedDatasetMetadata.getResourceType());
    assertEquals(sourceDatasetMetadata.getStewardshipType(), clonedDatasetMetadata.getStewardshipType());
    assertEquals(sourceDatasetMetadata.getControlledResourceMetadata().getManagedBy(), clonedDatasetMetadata.getControlledResourceMetadata().getManagedBy());
    assertEquals(sourceDatasetMetadata.getControlledResourceMetadata().getAccessScope(), clonedDatasetMetadata.getControlledResourceMetadata().getAccessScope());
    assertNotEquals(sourceDataset.getAttributes().getProjectId(), clonedResource.getAttributes().getProjectId());
    assertEquals(sourceDataset.getAttributes().getDatasetId(), clonedResource.getAttributes().getDatasetId());
    assertEquals(sourceDataset.getMetadata().getName(), sourceDataset.getAttributes().getDatasetId());
    // compare dataset contents
    final BigQuery bigQueryClient = ClientTestUtils.getGcpBigQueryClient(cloningUser, getDestinationProjectId());
    // Create an empty table to hold results
    TableId resultTableId = TableId.of(getDestinationProjectId(), clonedResource.getAttributes().getDatasetId(), "results_table");
    final QueryJobConfiguration employeeQueryJobConfiguration = QueryJobConfiguration.newBuilder("SELECT * FROM `" + getDestinationProjectId() + "." + clonedResource.getAttributes().getDatasetId() + ".employee`;").setDestinationTable(resultTableId).setWriteDisposition(WriteDisposition.WRITE_TRUNCATE).build();
    final TableResult employeeTableResult = bigQueryClient.query(employeeQueryJobConfiguration);
    final long numRows = StreamSupport.stream(employeeTableResult.getValues().spliterator(), false).count();
    assertThat(numRows, is(greaterThanOrEqualTo(2L)));
    final TableResult departmentTableResult = bigQueryClient.query(QueryJobConfiguration.newBuilder("SELECT * FROM `" + getDestinationProjectId() + "." + clonedResource.getAttributes().getDatasetId() + ".department` " + "WHERE department_id = 201;").setDestinationTable(resultTableId).setWriteDisposition(WriteDisposition.WRITE_TRUNCATE).build());
    final FieldValueList row = StreamSupport.stream(departmentTableResult.getValues().spliterator(), false).findFirst().orElseThrow(() -> new RuntimeException("Can't find expected result row"));
    // Assert data matches the expected values from BqDatasetUtils.populateBigQueryDataset
    final FieldValue nameFieldValue = row.get("name");
    assertEquals("ocean", nameFieldValue.getStringValue());
    final FieldValue managerFieldValue = row.get("manager_id");
    assertEquals(101, managerFieldValue.getLongValue());
}
Also used : TableId(com.google.cloud.bigquery.TableId) BigQuery(com.google.cloud.bigquery.BigQuery) JobControl(bio.terra.workspace.model.JobControl) CloneControlledGcpBigQueryDatasetResult(bio.terra.workspace.model.CloneControlledGcpBigQueryDatasetResult) ClonedControlledGcpBigQueryDataset(bio.terra.workspace.model.ClonedControlledGcpBigQueryDataset) ResourceMetadata(bio.terra.workspace.model.ResourceMetadata) GcpBigQueryDatasetResource(bio.terra.workspace.model.GcpBigQueryDatasetResource) CloneControlledGcpBigQueryDatasetRequest(bio.terra.workspace.model.CloneControlledGcpBigQueryDatasetRequest) TableResult(com.google.cloud.bigquery.TableResult) FieldValueList(com.google.cloud.bigquery.FieldValueList) FieldValue(com.google.cloud.bigquery.FieldValue) QueryJobConfiguration(com.google.cloud.bigquery.QueryJobConfiguration)

Aggregations

CloneControlledGcpBigQueryDatasetRequest (bio.terra.workspace.model.CloneControlledGcpBigQueryDatasetRequest)1 CloneControlledGcpBigQueryDatasetResult (bio.terra.workspace.model.CloneControlledGcpBigQueryDatasetResult)1 ClonedControlledGcpBigQueryDataset (bio.terra.workspace.model.ClonedControlledGcpBigQueryDataset)1 GcpBigQueryDatasetResource (bio.terra.workspace.model.GcpBigQueryDatasetResource)1 JobControl (bio.terra.workspace.model.JobControl)1 ResourceMetadata (bio.terra.workspace.model.ResourceMetadata)1 BigQuery (com.google.cloud.bigquery.BigQuery)1 FieldValue (com.google.cloud.bigquery.FieldValue)1 FieldValueList (com.google.cloud.bigquery.FieldValueList)1 QueryJobConfiguration (com.google.cloud.bigquery.QueryJobConfiguration)1 TableId (com.google.cloud.bigquery.TableId)1 TableResult (com.google.cloud.bigquery.TableResult)1