use of bio.terra.model.BulkLoadFileModel in project jade-data-repo by DataBiosphere.
the class FileLoadTest method makeBulkFileLoad.
private BulkLoadRequestModel makeBulkFileLoad(String tagBase, int fileCount) {
String testId = Names.randomizeName("test");
String loadTag = tagBase + testId;
String targetPath = "scratch/loadtest" + UUID.randomUUID().toString() + ".json";
// track the file so it gets cleaned up
connectedOperations.addScratchFile(targetPath);
String gspath = "gs://" + testConfig.getIngestbucket() + "/" + targetPath;
Storage storage = StorageOptions.getDefaultInstance().getService();
try (GcsChannelWriter writer = new GcsChannelWriter(storage, testConfig.getIngestbucket(), targetPath)) {
// length is 20
int repeats = fileCount / goodFileSource.length;
for (int r = 0; r < repeats; r++) {
for (int i = 0; i < goodFileSource.length; i++) {
BulkLoadFileModel fileModel = getFileModel(i, r, testId);
String fileLine = objectMapper.writeValueAsString(fileModel) + "\n";
writer.write(fileLine);
}
}
} catch (IOException ex) {
fail("Failed to write load file '" + targetPath + "' to bucket '" + testConfig.getIngestbucket() + "'");
}
BulkLoadRequestModel loadRequest = new BulkLoadRequestModel().profileId(profileModel.getId()).loadTag(loadTag).maxFailedFileLoads(0).loadControlFile(gspath);
return loadRequest;
}
use of bio.terra.model.BulkLoadFileModel in project jade-data-repo by DataBiosphere.
the class FileOperationTest method arrayMultiFileLoadFailRetryTest.
@Test
public void arrayMultiFileLoadFailRetryTest() throws Exception {
String testId = Names.randomizeName("test");
String loadTag = "arrayMultiFileLoadFileRetryTest" + testId;
BulkLoadArrayRequestModel arrayLoad = new BulkLoadArrayRequestModel().profileId(profileModel.getId()).loadTag(loadTag).maxFailedFileLoads(2);
arrayLoad.addLoadArrayItem(getFileModel(true, 2, testId));
arrayLoad.addLoadArrayItem(getFileModel(false, 3, testId));
arrayLoad.addLoadArrayItem(getFileModel(true, 4, testId));
BulkLoadArrayResultModel result = connectedOperations.ingestArraySuccess(datasetSummary.getId(), arrayLoad);
checkLoadSummary(result.getLoadSummary(), loadTag, 3, 2, 1, 0);
Map<String, BulkLoadFileResultModel> resultMap = new HashMap<>();
for (BulkLoadFileResultModel fileResult : result.getLoadFileResults()) {
resultMap.put(fileResult.getTargetPath(), fileResult);
}
// Query Big Query datarepo_load_history table - assert correctly reflects different
// bulk load file states
String columnsToQuery = "state, file_id, error";
TableResult queryLoadHistoryTableResult = queryLoadHistoryTable(columnsToQuery);
for (FieldValueList item : queryLoadHistoryTableResult.getValues()) {
String state = item.get(0).getStringValue();
assertTrue("state should either be succeeded or failed.", state.equals(BulkLoadFileState.SUCCEEDED.toString()) || state.equals(BulkLoadFileState.FAILED.toString()));
if (state.equals(BulkLoadFileState.SUCCEEDED.toString())) {
assertTrue("file_id should have value", item.get(1).getStringValue().length() > 0);
assertTrue("Error column should be empty", item.get(2).getStringValue().length() == 0);
} else if (state.equals(BulkLoadFileState.FAILED.toString())) {
assertTrue("file_id should NOT have value", item.get(1).getStringValue().length() == 0);
assertTrue("Error column should have value", item.get(2).getStringValue().length() > 0);
}
}
FieldValueList curr_result;
List<BulkLoadFileModel> loadArray = arrayLoad.getLoadArray();
BulkLoadFileResultModel fileResult = resultMap.get(loadArray.get(0).getTargetPath());
checkFileResultSuccess(fileResult);
fileResult = resultMap.get(loadArray.get(1).getTargetPath());
checkFileResultFailed(fileResult);
fileResult = resultMap.get(loadArray.get(2).getTargetPath());
checkFileResultSuccess(fileResult);
// fix the bad file and retry load
loadArray.set(1, getFileModel(true, 3, testId));
BulkLoadArrayResultModel result2 = connectedOperations.ingestArraySuccess(datasetSummary.getId(), arrayLoad);
checkLoadSummary(result2.getLoadSummary(), loadTag, 3, 3, 0, 0);
}
use of bio.terra.model.BulkLoadFileModel in project jade-data-repo by DataBiosphere.
the class FileTest method longFileLoadTest.
// The purpose of this test is to have a long-running workload that completes successfully
// while we delete pods and have them recover.
// Marked ignore for normal testing.
@Ignore
@Test
public void longFileLoadTest() throws Exception {
// TODO: want this to run about 5 minutes on 2 DRmanager instances. The speed of loads is when they are
// not local is about 2.5GB/minutes. With a fixed size of 1GB, each instance should do 2.5 files per minute,
// so two instances should do 5 files per minute. To run 5 minutes we should run 25 files.
// (There are 25 files in the directory, so if we need more we should do a reuse scheme like the fileLoadTest)
final int filesToLoad = 25;
String loadTag = Names.randomizeName("longtest");
BulkLoadArrayRequestModel arrayLoad = new BulkLoadArrayRequestModel().profileId(profileId).loadTag(loadTag).maxFailedFileLoads(// do not stop if there is a failure.
filesToLoad);
logger.info("longFileLoadTest loading " + filesToLoad + " files into dataset id " + datasetId);
for (int i = 0; i < filesToLoad; i++) {
String tailPath = String.format("/fileloadscaletest/file1GB-%02d.txt", i);
String sourcePath = "gs://jade-testdata-uswestregion" + tailPath;
String targetPath = "/" + loadTag + tailPath;
BulkLoadFileModel model = new BulkLoadFileModel().mimeType("application/binary");
model.description("bulk load file " + i).sourcePath(sourcePath).targetPath(targetPath);
arrayLoad.addLoadArrayItem(model);
}
BulkLoadArrayResultModel result = dataRepoFixtures.bulkLoadArray(steward(), datasetId, arrayLoad);
BulkLoadResultModel loadSummary = result.getLoadSummary();
logger.info("Total files : " + loadSummary.getTotalFiles());
logger.info("Succeeded files: " + loadSummary.getSucceededFiles());
logger.info("Failed files : " + loadSummary.getFailedFiles());
logger.info("Not Tried files: " + loadSummary.getNotTriedFiles());
}
Aggregations