use of bio.terra.model.FileLoadModel in project jade-data-repo by DataBiosphere.
the class IngestFilePrimaryDataStep method doStep.
@Override
public StepResult doStep(FlightContext context) {
FlightMap inputParameters = context.getInputParameters();
FileLoadModel fileLoadModel = inputParameters.get(JobMapKeys.REQUEST.getKeyName(), FileLoadModel.class);
FlightMap workingMap = context.getWorkingMap();
String fileId = workingMap.get(FileMapKeys.FILE_ID, String.class);
Boolean loadComplete = workingMap.get(FileMapKeys.LOAD_COMPLETED, Boolean.class);
if (loadComplete == null || !loadComplete) {
// The bucket has been selected for this file. In the single file load case, the info
// is stored in the working map. In the bulk load case, the info is stored in the input
// parameters.
GoogleBucketResource bucketResource = inputParameters.get(FileMapKeys.BUCKET_INFO, GoogleBucketResource.class);
if (bucketResource == null) {
bucketResource = workingMap.get(FileMapKeys.BUCKET_INFO, GoogleBucketResource.class);
}
FSFileInfo fsFileInfo;
if (configService.testInsertFault(ConfigEnum.LOAD_SKIP_FILE_LOAD)) {
fsFileInfo = new FSFileInfo().fileId(fileId).bucketResourceId(bucketResource.getResourceId().toString()).checksumCrc32c(null).checksumMd5("baaaaaad").createdDate(Instant.now().toString()).gspath("gs://path").size(100L);
} else {
fsFileInfo = gcsPdao.copyFile(dataset, fileLoadModel, fileId, bucketResource);
}
workingMap.put(FileMapKeys.FILE_INFO, fsFileInfo);
}
return StepResult.getStepResultSuccess();
}
use of bio.terra.model.FileLoadModel in project jade-data-repo by DataBiosphere.
the class DatasetConnectedTest method testSharedLockFileIngest.
@Test
public void testSharedLockFileIngest() throws Exception {
// NO ASSERTS inside the block below where hang is enabled to reduce chance of failing before disabling the hang
// ====================================================
// enable hang in IngestFileIdStep
configService.setFault(ConfigEnum.FILE_INGEST_LOCK_CONFLICT_STOP_FAULT.name(), true);
// try to ingest a file
URI sourceUri = new URI("gs", "jade-testdata", "/fileloadprofiletest/1KBfile.txt", null, null);
String targetPath1 = "/mm/" + Names.randomizeName("testdir") + "/testfile1.txt";
FileLoadModel fileLoadModel1 = new FileLoadModel().sourcePath(sourceUri.toString()).description("file 1").mimeType("text/plain").targetPath(targetPath1).profileId(billingProfile.getId());
MvcResult result1 = mvc.perform(post("/api/repository/v1/datasets/" + summaryModel.getId() + "/files").contentType(MediaType.APPLICATION_JSON).content(TestUtils.mapToJson(fileLoadModel1))).andReturn();
// give the flight time to launch
TimeUnit.SECONDS.sleep(5);
// check that the dataset metadata row has a shared lock
// note: asserts are below outside the hang block
UUID datasetId = UUID.fromString(summaryModel.getId());
String exclusiveLock1 = datasetDao.getExclusiveLock(datasetId);
String[] sharedLocks1 = datasetDao.getSharedLocks(datasetId);
// try to ingest a separate file
String targetPath2 = "/mm/" + Names.randomizeName("testdir") + "/testfile2.txt";
FileLoadModel fileLoadModel2 = new FileLoadModel().sourcePath(sourceUri.toString()).description("file 2").mimeType("text/plain").targetPath(targetPath2).profileId(billingProfile.getId());
MvcResult result2 = mvc.perform(post("/api/repository/v1/datasets/" + summaryModel.getId() + "/files").contentType(MediaType.APPLICATION_JSON).content(TestUtils.mapToJson(fileLoadModel2))).andReturn();
// give the flight time to launch
TimeUnit.SECONDS.sleep(5);
// check that the dataset metadata row has two shared locks
// note: asserts are below outside the hang block
String exclusiveLock2 = datasetDao.getExclusiveLock(datasetId);
String[] sharedLocks2 = datasetDao.getSharedLocks(datasetId);
// try to delete the dataset, this should fail with a lock exception
// note: asserts are below outside the hang block
MvcResult result3 = mvc.perform(delete("/api/repository/v1/datasets/" + summaryModel.getId())).andReturn();
// give the flight time to launch
TimeUnit.SECONDS.sleep(5);
// disable hang in IngestFileIdStep
configService.setFault(ConfigEnum.FILE_INGEST_LOCK_CONFLICT_CONTINUE_FAULT.name(), true);
// ====================================================
// check that the dataset metadata row has a shared lock during the first ingest request
assertNull("dataset row has no exclusive lock", exclusiveLock1);
assertNotNull("dataset row has a shared lock taken out", sharedLocks1);
assertEquals("dataset row has one shared lock", 1, sharedLocks1.length);
// check that the dataset metadata row has two shared locks while both ingests are running
assertNull("dataset row has no exclusive lock", exclusiveLock2);
assertNotNull("dataset row has a shared lock taken out", sharedLocks2);
assertEquals("dataset row has two shared locks", 2, sharedLocks2.length);
// check the response from the first ingest request
MockHttpServletResponse response1 = connectedOperations.validateJobModelAndWait(result1);
FileModel fileModel1 = connectedOperations.handleSuccessCase(response1, FileModel.class);
assertEquals("file description 1 correct", fileLoadModel1.getDescription(), fileModel1.getDescription());
// check the response from the second ingest request
MockHttpServletResponse response2 = connectedOperations.validateJobModelAndWait(result2);
FileModel fileModel2 = connectedOperations.handleSuccessCase(response2, FileModel.class);
assertEquals("file description 2 correct", fileLoadModel2.getDescription(), fileModel2.getDescription());
// check the response from the delete request, confirm fails with a lock exception
MockHttpServletResponse response3 = connectedOperations.validateJobModelAndWait(result3);
ErrorModel errorModel3 = connectedOperations.handleFailureCase(response3, HttpStatus.INTERNAL_SERVER_ERROR);
assertThat("delete failed on lock exception", errorModel3.getMessage(), startsWith("Failed to lock the dataset"));
// delete the dataset again and check that it succeeds now that there are no outstanding locks
connectedOperations.deleteTestDataset(summaryModel.getId());
// try to fetch the dataset again and confirm nothing is returned
connectedOperations.getDatasetExpectError(summaryModel.getId(), HttpStatus.NOT_FOUND);
}
use of bio.terra.model.FileLoadModel in project jade-data-repo by DataBiosphere.
the class DatasetConnectedTest method testSharedLockFileDelete.
@Test
public void testSharedLockFileDelete() throws Exception {
// ingest two files
URI sourceUri = new URI("gs", "jade-testdata", "/fileloadprofiletest/1KBfile.txt", null, null);
String targetPath1 = "/mm/" + Names.randomizeName("testdir") + "/testfile1.txt";
FileLoadModel fileLoadModel1 = new FileLoadModel().sourcePath(sourceUri.toString()).description("file 1").mimeType("text/plain").targetPath(targetPath1).profileId(billingProfile.getId());
FileModel fileModel1 = connectedOperations.ingestFileSuccess(summaryModel.getId(), fileLoadModel1);
String targetPath2 = "/mm/" + Names.randomizeName("testdir") + "/testfile2.txt";
FileLoadModel fileLoadModel2 = new FileLoadModel().sourcePath(sourceUri.toString()).description("file 2").mimeType("text/plain").targetPath(targetPath2).profileId(billingProfile.getId());
FileModel fileModel2 = connectedOperations.ingestFileSuccess(summaryModel.getId(), fileLoadModel2);
// NO ASSERTS inside the block below where hang is enabled to reduce chance of failing before disabling the hang
// ====================================================
// enable hang in DeleteFileLookupStep
configService.setFault(ConfigEnum.FILE_DELETE_LOCK_CONFLICT_STOP_FAULT.name(), true);
// try to delete the first file
MvcResult result1 = mvc.perform(delete("/api/repository/v1/datasets/" + summaryModel.getId() + "/files/" + fileModel1.getFileId())).andReturn();
// give the flight time to launch
TimeUnit.SECONDS.sleep(5);
// check that the dataset metadata row has a shared lock
// note: asserts are below outside the hang block
UUID datasetId = UUID.fromString(summaryModel.getId());
String exclusiveLock1 = datasetDao.getExclusiveLock(datasetId);
String[] sharedLocks1 = datasetDao.getSharedLocks(datasetId);
// try to delete the second file
MvcResult result2 = mvc.perform(delete("/api/repository/v1/datasets/" + summaryModel.getId() + "/files/" + fileModel2.getFileId())).andReturn();
// give the flight time to launch
TimeUnit.SECONDS.sleep(5);
// check that the dataset metadata row has two shared locks
// note: asserts are below outside the hang block
String exclusiveLock2 = datasetDao.getExclusiveLock(datasetId);
String[] sharedLocks2 = datasetDao.getSharedLocks(datasetId);
// try to delete the dataset, this should fail with a lock exception
// note: asserts are below outside the hang block
MvcResult result3 = mvc.perform(delete("/api/repository/v1/datasets/" + summaryModel.getId())).andReturn();
// give the flight time to launch
TimeUnit.SECONDS.sleep(5);
// disable hang in DeleteFileLookupStep
configService.setFault(ConfigEnum.FILE_DELETE_LOCK_CONFLICT_CONTINUE_FAULT.name(), true);
// ====================================================
// check that the dataset metadata row has a shared lock during the first file delete
assertNull("dataset row has no exclusive lock", exclusiveLock1);
assertNotNull("dataset row has a shared lock taken out", sharedLocks1);
assertEquals("dataset row has one shared lock", 1, sharedLocks1.length);
// check that the dataset metadata row has two shared locks while both file deletes are running
assertNull("dataset row has no exclusive lock", exclusiveLock2);
assertNotNull("dataset row has a shared lock taken out", sharedLocks2);
assertEquals("dataset row has two shared locks", 2, sharedLocks2.length);
// check the response from the first delete file request
MockHttpServletResponse response1 = connectedOperations.validateJobModelAndWait(result1);
assertEquals(response1.getStatus(), HttpStatus.OK.value());
connectedOperations.checkDeleteResponse(response1);
connectedOperations.removeFile(summaryModel.getId(), fileModel1.getFileId());
// check the response from the second delete file request
MockHttpServletResponse response2 = connectedOperations.validateJobModelAndWait(result2);
assertEquals(response2.getStatus(), HttpStatus.OK.value());
connectedOperations.checkDeleteResponse(response2);
connectedOperations.removeFile(summaryModel.getId(), fileModel2.getFileId());
// check that the delete request launched while the dataset had shared locks on it, failed with a lock exception
MockHttpServletResponse response3 = connectedOperations.validateJobModelAndWait(result3);
ErrorModel errorModel3 = connectedOperations.handleFailureCase(response3, HttpStatus.INTERNAL_SERVER_ERROR);
assertThat("delete failed on lock exception", errorModel3.getMessage(), startsWith("Failed to lock the dataset"));
// delete the dataset again and check that it succeeds now that there are no outstanding locks
connectedOperations.deleteTestDataset(summaryModel.getId());
// try to fetch the dataset again and confirm nothing is returned
connectedOperations.getDatasetExpectError(summaryModel.getId(), HttpStatus.NOT_FOUND);
}
use of bio.terra.model.FileLoadModel in project jade-data-repo by DataBiosphere.
the class EncodeFileTest method loadFiles.
private String loadFiles(String datasetId, boolean insertBadId, boolean insertBadRow) throws Exception {
// Open the source data from the bucket
// Open target data in bucket
// Read one line at a time - unpack into pojo
// Ingest the files, substituting the file ids
// Generate JSON and write the line to scratch
String targetPath = "scratch/file" + UUID.randomUUID().toString() + ".json";
// For a bigger test use encodetest/file.json (1000+ files)
// For normal testing encodetest/file_small.json (10 files)
Blob sourceBlob = storage.get(BlobId.of(testConfig.getIngestbucket(), "encodetest/file_small.json"));
assertNotNull("source blob not null", sourceBlob);
BlobInfo targetBlobInfo = BlobInfo.newBuilder(BlobId.of(testConfig.getIngestbucket(), targetPath)).build();
try (WriteChannel writer = storage.writer(targetBlobInfo);
BufferedReader reader = new BufferedReader(Channels.newReader(sourceBlob.reader(), "UTF-8"))) {
boolean badIdInserted = false;
boolean badRowInserted = false;
String line = null;
while ((line = reader.readLine()) != null) {
EncodeFileIn encodeFileIn = TestUtils.mapFromJson(line, EncodeFileIn.class);
String bamFileId = null;
String bamiFileId = null;
if (encodeFileIn.getFile_gs_path() != null) {
FileLoadModel fileLoadModel = makeFileLoadModel(encodeFileIn.getFile_gs_path());
FileModel bamFile = connectedOperations.ingestFileSuccess(datasetId, fileLoadModel);
// Fault insertion on request: we corrupt one id if requested to do so.
if (insertBadId && !badIdInserted) {
bamFileId = bamFile.getFileId() + ID_GARBAGE;
badIdInserted = true;
} else {
bamFileId = bamFile.getFileId();
}
}
if (encodeFileIn.getFile_index_gs_path() != null) {
FileLoadModel fileLoadModel = makeFileLoadModel(encodeFileIn.getFile_index_gs_path());
FileModel bamiFile = connectedOperations.ingestFileSuccess(datasetId, fileLoadModel);
bamiFileId = bamiFile.getFileId();
}
EncodeFileOut encodeFileOut = new EncodeFileOut(encodeFileIn, bamFileId, bamiFileId);
String fileLine;
if (insertBadRow && !badRowInserted) {
fileLine = "{\"fribbitz\";\"ABCDEFG\"}\n";
} else {
fileLine = TestUtils.mapToJson(encodeFileOut) + "\n";
}
writer.write(ByteBuffer.wrap(fileLine.getBytes("UTF-8")));
}
}
return targetPath;
}
use of bio.terra.model.FileLoadModel in project jade-data-repo by DataBiosphere.
the class FileOperationTest method makeFileLoad.
private FileLoadModel makeFileLoad(String profileId) {
String targetDir = Names.randomizeName("dir");
String uri = "gs://" + testConfig.getIngestbucket() + "/files/" + testPdfFile;
String targetPath = "/dd/files/" + targetDir + "/" + testPdfFile;
return new FileLoadModel().sourcePath(uri).description(testDescription).mimeType(testMimeType).targetPath(targetPath).profileId(profileId);
}
Aggregations