use of bio.terra.model.BulkLoadArrayResultModel in project jade-data-repo by DataBiosphere.
the class IngestBulkArrayResponseStep method makeLoadResult.
private BulkLoadArrayResultModel makeLoadResult(UUID loadId, FlightContext context) {
// Get the summary stats and fill in our specific information
BulkLoadResultModel summary = loadService.makeBulkLoadResult(loadId);
summary.loadTag(loadTag).jobId(context.getFlightId());
// Get the file load results
List<BulkLoadFileResultModel> fileResults = loadService.makeBulkLoadFileArray(loadId);
return new BulkLoadArrayResultModel().loadSummary(summary).loadFileResults(fileResults);
}
use of bio.terra.model.BulkLoadArrayResultModel in project jade-data-repo by DataBiosphere.
the class EncodeFixture method loadFiles.
private String loadFiles(String datasetId, String profileId, TestConfiguration.User user, Storage storage) throws Exception {
// Open the source data from the bucket
// Open target data in bucket
// Read one line at a time - unpack into pojo
// Ingest the files, substituting the file ids
// Generate JSON and write the line to scratch
String rndSuffix = UUID.randomUUID().toString() + ".json";
String loadData = "scratch/lf_loaddata" + rndSuffix;
// For a bigger test use encodetest/file.json (1000+ files)
// For normal testing encodetest/file_small.json (10 files)
Blob sourceBlob = storage.get(BlobId.of(testConfiguration.getIngestbucket(), "encodetest/file_small.json"));
List<BulkLoadFileModel> loadArray = new ArrayList<>();
List<EncodeFileIn> inArray = new ArrayList<>();
try (BufferedReader reader = new BufferedReader(Channels.newReader(sourceBlob.reader(), "UTF-8"))) {
String line = null;
while ((line = reader.readLine()) != null) {
EncodeFileIn encodeFileIn = TestUtils.mapFromJson(line, EncodeFileIn.class);
inArray.add(encodeFileIn);
if (encodeFileIn.getFile_gs_path() != null) {
loadArray.add(makeFileModel(encodeFileIn.getFile_gs_path()));
}
if (encodeFileIn.getFile_index_gs_path() != null) {
loadArray.add(makeFileModel(encodeFileIn.getFile_index_gs_path()));
}
}
}
BulkLoadArrayRequestModel loadRequest = new BulkLoadArrayRequestModel().loadArray(loadArray).maxFailedFileLoads(0).profileId(profileId).loadTag("encodeFixture");
BulkLoadArrayResultModel loadResult = dataRepoFixtures.bulkLoadArray(user, datasetId, loadRequest);
Map<String, BulkLoadFileResultModel> resultMap = new HashMap<>();
for (BulkLoadFileResultModel fileResult : loadResult.getLoadFileResults()) {
resultMap.put(fileResult.getSourcePath(), fileResult);
}
try (GcsChannelWriter writer = new GcsChannelWriter(storage, testConfiguration.getIngestbucket(), loadData)) {
for (EncodeFileIn encodeFileIn : inArray) {
BulkLoadFileResultModel resultModel = resultMap.get(encodeFileIn.getFile_gs_path());
String bamFileId = (resultModel == null) ? null : resultModel.getFileId();
resultModel = resultMap.get(encodeFileIn.getFile_index_gs_path());
String bamiFileId = (resultModel == null) ? null : resultModel.getFileId();
EncodeFileOut encodeFileOut = new EncodeFileOut(encodeFileIn, bamFileId, bamiFileId);
String fileLine = TestUtils.mapToJson(encodeFileOut) + "\n";
writer.write(fileLine);
}
}
return loadData;
}
use of bio.terra.model.BulkLoadArrayResultModel in project jade-data-repo by DataBiosphere.
the class FileOperationTest method arrayMultiFileLoadSuccessTest.
// -- array bulk load --
@Test
public void arrayMultiFileLoadSuccessTest() throws Exception {
int fileCount = 10;
// Test copying load_history data in chunks
ConfigModel loadHistoryChunkSize = configService.getConfig(ConfigEnum.LOAD_HISTORY_COPY_CHUNK_SIZE.name());
loadHistoryChunkSize.setParameter(new ConfigParameterModel().value("2"));
ConfigGroupModel configGroupModel = new ConfigGroupModel().label("FileOperationTestMultiFileLoad").addGroupItem(loadHistoryChunkSize);
configService.setConfig(configGroupModel);
BulkLoadArrayRequestModel arrayLoad = makeSuccessArrayLoad("arrayMultiFileLoadSuccessTest", 0, fileCount);
BulkLoadArrayResultModel result = connectedOperations.ingestArraySuccess(datasetSummary.getId(), arrayLoad);
checkLoadSummary(result.getLoadSummary(), arrayLoad.getLoadTag(), fileCount, fileCount, 0, 0);
Map<String, String> fileIdMap = new HashMap<>();
for (BulkLoadFileResultModel fileResult : result.getLoadFileResults()) {
checkFileResultSuccess(fileResult);
fileIdMap.put(fileResult.getTargetPath(), fileResult.getFileId());
}
// Query Big Query datarepo_load_history table - should reflect all files loaded above
String columnToQuery = "file_id";
TableResult queryLoadHistoryTableResult = queryLoadHistoryTable(columnToQuery);
ArrayList<String> ids = new ArrayList<>();
queryLoadHistoryTableResult.iterateAll().forEach(r -> ids.add(r.get(columnToQuery).getStringValue()));
assertThat("Number of files in datarepo_load_history table match load summary", ids.size(), equalTo(fileCount));
for (String bq_file_id : ids) {
assertNotNull("fileIdMap should contain File_id from datarepo_load_history", fileIdMap.containsValue(bq_file_id));
}
// retry successful load to make sure it still succeeds and does nothing
BulkLoadArrayResultModel result2 = connectedOperations.ingestArraySuccess(datasetSummary.getId(), arrayLoad);
checkLoadSummary(result2.getLoadSummary(), arrayLoad.getLoadTag(), fileCount, fileCount, 0, 0);
for (BulkLoadFileResultModel fileResult : result.getLoadFileResults()) {
checkFileResultSuccess(fileResult);
assertThat("FileId matches", fileResult.getFileId(), equalTo(fileIdMap.get(fileResult.getTargetPath())));
}
}
use of bio.terra.model.BulkLoadArrayResultModel in project jade-data-repo by DataBiosphere.
the class FileOperationTest method arrayMultiFileLoadDoubleSuccessTest.
@Test
public void arrayMultiFileLoadDoubleSuccessTest() throws Exception {
int fileCount = 8;
int totalfileCount = fileCount * 2;
BulkLoadArrayRequestModel arrayLoad1 = makeSuccessArrayLoad("arrayMultiDoubleSuccess", 0, fileCount);
BulkLoadArrayRequestModel arrayLoad2 = makeSuccessArrayLoad("arrayMultiDoubleSuccess", fileCount, fileCount);
String loadTag1 = arrayLoad1.getLoadTag();
String loadTag2 = arrayLoad2.getLoadTag();
String datasetId = datasetSummary.getId();
MvcResult result1 = connectedOperations.ingestArrayRaw(datasetId, arrayLoad1);
MvcResult result2 = connectedOperations.ingestArrayRaw(datasetId, arrayLoad2);
MockHttpServletResponse response1 = connectedOperations.validateJobModelAndWait(result1);
MockHttpServletResponse response2 = connectedOperations.validateJobModelAndWait(result2);
BulkLoadArrayResultModel resultModel1 = connectedOperations.handleSuccessCase(response1, BulkLoadArrayResultModel.class);
BulkLoadArrayResultModel resultModel2 = connectedOperations.handleSuccessCase(response2, BulkLoadArrayResultModel.class);
checkLoadSummary(resultModel1.getLoadSummary(), loadTag1, fileCount, fileCount, 0, 0);
checkLoadSummary(resultModel2.getLoadSummary(), loadTag2, fileCount, fileCount, 0, 0);
List<String> fileIds = new ArrayList<>();
for (BulkLoadFileResultModel fileResult : resultModel1.getLoadFileResults()) {
checkFileResultSuccess(fileResult);
fileIds.add(fileResult.getFileId());
}
for (BulkLoadFileResultModel fileResult : resultModel2.getLoadFileResults()) {
checkFileResultSuccess(fileResult);
fileIds.add(fileResult.getFileId());
}
// Query Big Query datarepo_load_history table - should reflect all files loaded above
String columnToQuery = "file_id";
TableResult queryLoadHistoryTableResult = queryLoadHistoryTable(columnToQuery);
ArrayList<String> bq_fileIds = new ArrayList<>();
queryLoadHistoryTableResult.iterateAll().forEach(r -> bq_fileIds.add(r.get(columnToQuery).getStringValue()));
assertThat("Number of files in datarepo_load_history table match load summary", totalfileCount, equalTo(bq_fileIds.size()));
for (String bq_file_id : bq_fileIds) {
assertNotNull("fileIdMap should contain File_id from datarepo_load_history", fileIds.contains(bq_file_id));
}
}
use of bio.terra.model.BulkLoadArrayResultModel in project jade-data-repo by DataBiosphere.
the class IngestBulkArrayResponseStep method doStep.
@Override
public StepResult doStep(FlightContext context) {
FlightMap workingMap = context.getWorkingMap();
String loadIdString = workingMap.get(LoadMapKeys.LOAD_ID, String.class);
UUID loadId = UUID.fromString(loadIdString);
BulkLoadArrayResultModel result = makeLoadResult(loadId, context);
workingMap.put(JobMapKeys.RESPONSE.getKeyName(), result);
return StepResult.getStepResultSuccess();
}
Aggregations