Search in sources :

Example 16 with IngestRequestModel

use of bio.terra.model.IngestRequestModel in project jade-data-repo by DataBiosphere.

the class SnapshotConnectedTest method loadData.

private void loadData(String datasetId, String tableName, String resourcePath, IngestRequestModel.FormatEnum format) throws Exception {
    String bucket = testConfig.getIngestbucket();
    BlobInfo stagingBlob = BlobInfo.newBuilder(bucket, UUID.randomUUID() + "-" + resourcePath).build();
    byte[] data = IOUtils.toByteArray(jsonLoader.getClassLoader().getResource(resourcePath));
    IngestRequestModel ingestRequest = new IngestRequestModel().table(tableName).format(format).path("gs://" + stagingBlob.getBucket() + "/" + stagingBlob.getName());
    if (format.equals(IngestRequestModel.FormatEnum.CSV)) {
        ingestRequest.csvSkipLeadingRows(1);
    }
    try {
        storage.create(stagingBlob, data);
        connectedOperations.ingestTableSuccess(datasetId, ingestRequest);
    } finally {
        storage.delete(stagingBlob.getBlobId());
    }
}
Also used : CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) BlobInfo(com.google.cloud.storage.BlobInfo) IngestRequestModel(bio.terra.model.IngestRequestModel)

Example 17 with IngestRequestModel

use of bio.terra.model.IngestRequestModel in project jade-data-repo by DataBiosphere.

the class SnapshotConnectedTest method testExcludeLockedFromSnapshotFileLookups.

@Test
public void testExcludeLockedFromSnapshotFileLookups() throws Exception {
    // create a dataset
    DatasetSummaryModel datasetRefSummary = createTestDataset("simple-with-filerefs-dataset.json");
    // ingest a file
    URI sourceUri = new URI("gs", "jade-testdata", "/fileloadprofiletest/1KBfile.txt", null, null);
    String targetFilePath = "/mm/" + Names.randomizeName("testdir") + "/testExcludeLockedFromSnapshotFileLookups.txt";
    FileLoadModel fileLoadModel = new FileLoadModel().sourcePath(sourceUri.toString()).description("testExcludeLockedFromSnapshotFileLookups").mimeType("text/plain").targetPath(targetFilePath).profileId(billingProfile.getId());
    FileModel fileModel = connectedOperations.ingestFileSuccess(datasetRefSummary.getId(), fileLoadModel);
    // generate a JSON file with the fileref
    String jsonLine = "{\"name\":\"name1\", \"file_ref\":\"" + fileModel.getFileId() + "\"}\n";
    // load a JSON file that contains the table rows to load into the test bucket
    String jsonFileName = "this-better-pass.json";
    String dirInCloud = "scratch/testExcludeLockedFromSnapshotFileLookups/" + UUID.randomUUID().toString();
    BlobInfo ingestTableBlob = BlobInfo.newBuilder(testConfig.getIngestbucket(), dirInCloud + "/" + jsonFileName).build();
    Storage storage = StorageOptions.getDefaultInstance().getService();
    storage.create(ingestTableBlob, jsonLine.getBytes(StandardCharsets.UTF_8));
    // make sure the JSON file gets cleaned up on test teardown
    connectedOperations.addScratchFile(dirInCloud + "/" + jsonFileName);
    // ingest the tabular data from the JSON file we just generated
    String gsPath = "gs://" + testConfig.getIngestbucket() + "/" + dirInCloud + "/" + jsonFileName;
    IngestRequestModel ingestRequest1 = new IngestRequestModel().format(IngestRequestModel.FormatEnum.JSON).table("tableA").path(gsPath);
    connectedOperations.ingestTableSuccess(datasetRefSummary.getId(), ingestRequest1);
    // create a snapshot
    SnapshotSummaryModel snapshotSummary = connectedOperations.createSnapshot(datasetRefSummary, "simple-with-filerefs-snapshot.json", "");
    // check that the snapshot metadata row is unlocked
    String exclusiveLock = snapshotDao.getExclusiveLockState(UUID.fromString(snapshotSummary.getId()));
    assertNull("snapshot row is unlocked", exclusiveLock);
    String fileUri = getFileRefIdFromSnapshot(snapshotSummary);
    DrsId drsId = drsIdService.fromUri(fileUri);
    DRSObject drsObject = connectedOperations.drsGetObjectSuccess(drsId.toDrsObjectId(), false);
    String filePath = drsObject.getAliases().get(0);
    // lookup the snapshot file by DRS id, make sure it's returned (lookupSnapshotFileSuccess will already check)
    FileModel fsObjById = connectedOperations.lookupSnapshotFileSuccess(snapshotSummary.getId(), drsId.getFsObjectId());
    assertEquals("Retrieve snapshot file by DRS id matches desc", fsObjById.getDescription(), fileLoadModel.getDescription());
    // lookup the snapshot file by DRS path and check that it's found
    FileModel fsObjByPath = connectedOperations.lookupSnapshotFileByPathSuccess(snapshotSummary.getId(), filePath, 0);
    assertEquals("Retrieve snapshot file by path matches desc", fsObjByPath.getDescription(), fileLoadModel.getDescription());
    assertThat("Retrieve snapshot file objects match", fsObjById, CoreMatchers.equalTo(fsObjByPath));
    // now the snapshot exists....let's get it locked!
    // NO ASSERTS inside the block below where hang is enabled to reduce chance of failing before disabling the hang
    // ====================================================
    // enable hang in DeleteSnapshotPrimaryDataStep
    configService.setFault(ConfigEnum.SNAPSHOT_DELETE_LOCK_CONFLICT_STOP_FAULT.name(), true);
    // kick off a request to delete the snapshot. this should hang before unlocking the snapshot object.
    // note: asserts are below outside the hang block
    MvcResult deleteResult = mvc.perform(delete("/api/repository/v1/snapshots/" + snapshotSummary.getId())).andReturn();
    // give the flight time to launch and get to the hang
    TimeUnit.SECONDS.sleep(5);
    // check that the snapshot metadata row has an exclusive lock
    exclusiveLock = snapshotDao.getExclusiveLockState(UUID.fromString(snapshotSummary.getId()));
    // lookup the snapshot file by id and check that it's NOT found
    MockHttpServletResponse failedGetSnapshotByIdResponse = connectedOperations.lookupSnapshotFileRaw(snapshotSummary.getId(), drsId.getFsObjectId());
    // lookup the snapshot file by path and check that it's NOT found
    MockHttpServletResponse failedGetSnapshotByPathResponse = connectedOperations.lookupSnapshotFileByPathRaw(snapshotSummary.getId(), filePath, 0);
    // disable hang in DeleteSnapshotPrimaryDataStep
    configService.setFault(ConfigEnum.SNAPSHOT_DELETE_LOCK_CONFLICT_CONTINUE_FAULT.name(), true);
    // ====================================================
    // check that the snapshot metadata row has an exclusive lock after kicking off the delete
    assertNotNull("snapshot row is exclusively locked", exclusiveLock);
    assertEquals("Snapshot file NOT found by DRS id lookup", HttpStatus.NOT_FOUND, HttpStatus.valueOf(failedGetSnapshotByIdResponse.getStatus()));
    assertEquals("Snapshot file NOT found by path lookup", HttpStatus.NOT_FOUND, HttpStatus.valueOf(failedGetSnapshotByPathResponse.getStatus()));
    // check the response from the snapshot delete request
    MockHttpServletResponse deleteResponse = connectedOperations.validateJobModelAndWait(deleteResult);
    DeleteResponseModel deleteResponseModel = connectedOperations.handleSuccessCase(deleteResponse, DeleteResponseModel.class);
    assertEquals("Snapshot delete returned successfully", DeleteResponseModel.ObjectStateEnum.DELETED, deleteResponseModel.getObjectState());
    // delete the dataset and check that it succeeds
    connectedOperations.deleteTestDataset(datasetRefSummary.getId());
    // remove the file from the connectedoperation bookkeeping list
    connectedOperations.removeFile(datasetRefSummary.getId(), fileModel.getFileId());
    // try to fetch the snapshot again and confirm nothing is returned
    connectedOperations.getSnapshotExpectError(snapshotSummary.getId(), HttpStatus.NOT_FOUND);
    // try to fetch the dataset again and confirm nothing is returned
    connectedOperations.getDatasetExpectError(datasetRefSummary.getId(), HttpStatus.NOT_FOUND);
}
Also used : SnapshotSummaryModel(bio.terra.model.SnapshotSummaryModel) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) BlobInfo(com.google.cloud.storage.BlobInfo) FileLoadModel(bio.terra.model.FileLoadModel) IngestRequestModel(bio.terra.model.IngestRequestModel) MvcResult(org.springframework.test.web.servlet.MvcResult) URI(java.net.URI) FileModel(bio.terra.model.FileModel) Storage(com.google.cloud.storage.Storage) DrsId(bio.terra.service.filedata.DrsId) DatasetSummaryModel(bio.terra.model.DatasetSummaryModel) DRSObject(bio.terra.model.DRSObject) MockHttpServletResponse(org.springframework.mock.web.MockHttpServletResponse) DeleteResponseModel(bio.terra.model.DeleteResponseModel) SpringBootTest(org.springframework.boot.test.context.SpringBootTest) Test(org.junit.Test)

Example 18 with IngestRequestModel

use of bio.terra.model.IngestRequestModel in project jade-data-repo by DataBiosphere.

the class AccessTest method fileAclTest.

@Test
public void fileAclTest() throws Exception {
    makeAclTestDataset();
    dataRepoFixtures.addDatasetPolicyMember(steward(), datasetSummaryModel.getId(), IamRole.CUSTODIAN, custodian().getEmail());
    // Ingest a file into the dataset
    String gsPath = "gs://" + testConfiguration.getIngestbucket();
    FileModel fileModel = dataRepoFixtures.ingestFile(steward(), datasetSummaryModel.getId(), profileId, gsPath + "/files/File Design Notes.pdf", "/foo/bar");
    // Ingest one row into the study 'file' table with a reference to that ingested file
    String json = String.format("{\"file_id\":\"foo\",\"file_ref\":\"%s\"}", fileModel.getFileId());
    String targetPath = "scratch/file" + UUID.randomUUID().toString() + ".json";
    BlobInfo targetBlobInfo = BlobInfo.newBuilder(BlobId.of(testConfiguration.getIngestbucket(), targetPath)).build();
    Storage storage = StorageOptions.getDefaultInstance().getService();
    try (WriteChannel writer = storage.writer(targetBlobInfo)) {
        writer.write(ByteBuffer.wrap(json.getBytes(StandardCharsets.UTF_8)));
    }
    IngestRequestModel request = dataRepoFixtures.buildSimpleIngest("file", targetPath);
    IngestResponseModel ingestResponseModel = dataRepoFixtures.ingestJsonData(steward(), datasetSummaryModel.getId(), request);
    assertThat("1 Row was ingested", ingestResponseModel.getRowCount(), equalTo(1L));
    // Create a snapshot exposing the one row and grant read access to our reader.
    SnapshotSummaryModel snapshotSummaryModel = dataRepoFixtures.createSnapshot(custodian(), datasetSummaryModel, "file-acl-test-snapshot.json");
    snapshotIds.add(snapshotSummaryModel.getId());
    SnapshotModel snapshotModel = dataRepoFixtures.getSnapshot(custodian(), snapshotSummaryModel.getId());
    dataRepoFixtures.addSnapshotPolicyMember(custodian(), snapshotModel.getId(), IamRole.READER, reader().getEmail());
    AuthenticatedUserRequest authenticatedReaderRequest = new AuthenticatedUserRequest().email(reader().getEmail()).token(Optional.of(readerToken));
    boolean authorized = iamService.isAuthorized(authenticatedReaderRequest, IamResourceType.DATASNAPSHOT, snapshotModel.getId(), IamAction.READ_DATA);
    assertTrue("correctly added reader", authorized);
    // The reader does not have permission to make queries in any project,
    // so we have to use the custodian to look up the DRS id.
    BigQuery bigQueryCustodian = BigQueryFixtures.getBigQuery(snapshotModel.getDataProject(), custodianToken);
    BigQueryFixtures.hasAccess(bigQueryCustodian, snapshotModel.getDataProject(), snapshotModel.getName());
    // Read and validate the DRS URI from the file ref column in the 'file' table.
    String drsObjectId = BigQueryFixtures.queryForDrsId(bigQueryCustodian, snapshotModel, "file", "file_ref");
    // Use DRS API to lookup the file by DRS ID (pulled out of the URI).
    DRSObject drsObject = dataRepoFixtures.drsGetObject(reader(), drsObjectId);
    String gsuri = TestUtils.validateDrsAccessMethods(drsObject.getAccessMethods());
    // Try to read the file of the gs path as reader and discoverer
    String[] strings = gsuri.split("/", 4);
    String bucketName = strings[2];
    String blobName = strings[3];
    BlobId blobId = BlobId.of(bucketName, blobName);
    Storage readerStorage = getStorage(readerToken);
    assertTrue("Reader can read some bytes of the file", canReadBlobRetry(readerStorage, blobId));
    Storage discovererStorage = getStorage(discovererToken);
    assertFalse("Discoverer can not read the file", canReadBlob(discovererStorage, blobId));
}
Also used : SnapshotSummaryModel(bio.terra.model.SnapshotSummaryModel) BigQuery(com.google.cloud.bigquery.BigQuery) BlobInfo(com.google.cloud.storage.BlobInfo) IngestRequestModel(bio.terra.model.IngestRequestModel) SnapshotModel(bio.terra.model.SnapshotModel) IngestResponseModel(bio.terra.model.IngestResponseModel) FileModel(bio.terra.model.FileModel) Storage(com.google.cloud.storage.Storage) WriteChannel(com.google.cloud.WriteChannel) DRSObject(bio.terra.model.DRSObject) BlobId(com.google.cloud.storage.BlobId) SpringBootTest(org.springframework.boot.test.context.SpringBootTest) Test(org.junit.Test)

Example 19 with IngestRequestModel

use of bio.terra.model.IngestRequestModel in project jade-data-repo by DataBiosphere.

the class AccessTest method checkShared.

@Test
public void checkShared() throws Exception {
    makeIngestTestDataset();
    IngestRequestModel request = dataRepoFixtures.buildSimpleIngest("participant", "ingest-test/ingest-test-participant.json");
    dataRepoFixtures.ingestJsonData(steward(), datasetId, request);
    request = dataRepoFixtures.buildSimpleIngest("sample", "ingest-test/ingest-test-sample.json");
    dataRepoFixtures.ingestJsonData(steward(), datasetId, request);
    DatasetModel dataset = dataRepoFixtures.getDataset(steward(), datasetId);
    String datasetBqSnapshotName = "datarepo_" + dataset.getName();
    BigQuery custodianBigQuery = BigQueryFixtures.getBigQuery(dataset.getDataProject(), custodianToken);
    try {
        BigQueryFixtures.datasetExists(custodianBigQuery, dataset.getDataProject(), datasetBqSnapshotName);
        fail("custodian shouldn't be able to access bq dataset before it is shared with them");
    } catch (IllegalStateException e) {
        assertThat("checking message for pdao exception error", e.getMessage(), equalTo("existence check failed for " + datasetBqSnapshotName));
    }
    dataRepoFixtures.addDatasetPolicyMember(steward(), datasetId, IamRole.CUSTODIAN, custodian().getEmail());
    DataRepoResponse<EnumerateDatasetModel> enumDatasets = dataRepoFixtures.enumerateDatasetsRaw(custodian());
    assertThat("Custodian is authorized to enumerate datasets", enumDatasets.getStatusCode(), equalTo(HttpStatus.OK));
    boolean custodianHasAccess = BigQueryFixtures.hasAccess(custodianBigQuery, dataset.getDataProject(), datasetBqSnapshotName);
    assertThat("custodian can access the bq snapshot after it has been shared", custodianHasAccess, equalTo(true));
    SnapshotSummaryModel snapshotSummaryModel = dataRepoFixtures.createSnapshot(custodian(), datasetSummaryModel, "ingest-test-snapshot.json");
    SnapshotModel snapshotModel = dataRepoFixtures.getSnapshot(custodian(), snapshotSummaryModel.getId());
    BigQuery bigQuery = BigQueryFixtures.getBigQuery(snapshotModel.getDataProject(), readerToken);
    try {
        BigQueryFixtures.datasetExists(bigQuery, snapshotModel.getDataProject(), snapshotModel.getName());
        fail("reader shouldn't be able to access bq dataset before it is shared with them");
    } catch (IllegalStateException e) {
        assertThat("checking message for exception error", e.getMessage(), equalTo("existence check failed for ".concat(snapshotSummaryModel.getName())));
    }
    dataRepoFixtures.addSnapshotPolicyMember(custodian(), snapshotSummaryModel.getId(), IamRole.READER, reader().getEmail());
    AuthenticatedUserRequest authenticatedReaderRequest = new AuthenticatedUserRequest().email(reader().getEmail()).token(Optional.of(readerToken));
    assertThat("correctly added reader", iamService.isAuthorized(authenticatedReaderRequest, IamResourceType.DATASNAPSHOT, snapshotSummaryModel.getId(), IamAction.READ_DATA), equalTo(true));
    boolean readerHasAccess = BigQueryFixtures.hasAccess(bigQuery, snapshotModel.getDataProject(), snapshotModel.getName());
    assertThat("reader can access the snapshot after it has been shared", readerHasAccess, equalTo(true));
}
Also used : BigQuery(com.google.cloud.bigquery.BigQuery) SnapshotSummaryModel(bio.terra.model.SnapshotSummaryModel) IngestRequestModel(bio.terra.model.IngestRequestModel) EnumerateDatasetModel(bio.terra.model.EnumerateDatasetModel) DatasetModel(bio.terra.model.DatasetModel) SnapshotModel(bio.terra.model.SnapshotModel) EnumerateDatasetModel(bio.terra.model.EnumerateDatasetModel) SpringBootTest(org.springframework.boot.test.context.SpringBootTest) Test(org.junit.Test)

Example 20 with IngestRequestModel

use of bio.terra.model.IngestRequestModel in project jade-data-repo by DataBiosphere.

the class AccessTest method checkCustodianPermissions.

@Test
public void checkCustodianPermissions() throws Exception {
    makeIngestTestDataset();
    IngestRequestModel request = dataRepoFixtures.buildSimpleIngest("participant", "ingest-test/ingest-test-participant.json");
    dataRepoFixtures.ingestJsonData(steward(), datasetId, request);
    request = dataRepoFixtures.buildSimpleIngest("sample", "ingest-test/ingest-test-sample.json");
    dataRepoFixtures.ingestJsonData(steward(), datasetId, request);
    DatasetModel dataset = dataRepoFixtures.getDataset(steward(), datasetId);
    String datasetBqSnapshotName = PdaoConstant.PDAO_PREFIX + dataset.getName();
    BigQuery custodianBigQuery = BigQueryFixtures.getBigQuery(dataset.getDataProject(), custodianToken);
    try {
        BigQueryFixtures.datasetExists(custodianBigQuery, dataset.getDataProject(), datasetBqSnapshotName);
        fail("custodian shouldn't be able to access bq dataset before it is shared with them");
    } catch (IllegalStateException e) {
        assertThat("checking message for pdao exception error", e.getMessage(), equalTo("existence check failed for " + datasetBqSnapshotName));
    }
    dataRepoFixtures.addDatasetPolicyMember(steward(), datasetId, IamRole.CUSTODIAN, custodian().getEmail());
    DataRepoResponse<EnumerateDatasetModel> enumDatasets = dataRepoFixtures.enumerateDatasetsRaw(custodian());
    assertThat("Custodian is authorized to enumerate datasets", enumDatasets.getStatusCode(), equalTo(HttpStatus.OK));
    boolean custodianHasAccess = BigQueryFixtures.hasAccess(custodianBigQuery, dataset.getDataProject(), datasetBqSnapshotName);
    assertTrue("custodian can access the bq snapshot after it has been shared", custodianHasAccess);
    // gets the "sample" table and makes a table ref to use in the query
    String tableRef = BigQueryFixtures.makeTableRef(dataset, dataset.getSchema().getTables().get(1).getName());
    String sql = String.format("SELECT * FROM %s LIMIT %s", tableRef, 1000);
    TableResult results = BigQueryFixtures.query(sql, custodianBigQuery);
    Assert.assertEquals(7, results.getTotalRows());
}
Also used : BigQuery(com.google.cloud.bigquery.BigQuery) TableResult(com.google.cloud.bigquery.TableResult) IngestRequestModel(bio.terra.model.IngestRequestModel) EnumerateDatasetModel(bio.terra.model.EnumerateDatasetModel) DatasetModel(bio.terra.model.DatasetModel) EnumerateDatasetModel(bio.terra.model.EnumerateDatasetModel) SpringBootTest(org.springframework.boot.test.context.SpringBootTest) Test(org.junit.Test)

Aggregations

IngestRequestModel (bio.terra.model.IngestRequestModel)37 Test (org.junit.Test)27 SpringBootTest (org.springframework.boot.test.context.SpringBootTest)27 IngestResponseModel (bio.terra.model.IngestResponseModel)16 BlobInfo (com.google.cloud.storage.BlobInfo)11 DatasetSummaryModel (bio.terra.model.DatasetSummaryModel)10 SnapshotSummaryModel (bio.terra.model.SnapshotSummaryModel)10 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)9 JobModel (bio.terra.model.JobModel)8 Storage (com.google.cloud.storage.Storage)8 MockHttpServletResponse (org.springframework.mock.web.MockHttpServletResponse)7 MvcResult (org.springframework.test.web.servlet.MvcResult)7 SnapshotModel (bio.terra.model.SnapshotModel)6 Dataset (bio.terra.service.dataset.Dataset)6 ErrorModel (bio.terra.model.ErrorModel)4 FileModel (bio.terra.model.FileModel)4 DatasetTable (bio.terra.service.dataset.DatasetTable)4 BigQuery (com.google.cloud.bigquery.BigQuery)4 Blob (com.google.cloud.storage.Blob)4 DRSObject (bio.terra.model.DRSObject)3