use of bio.terra.model.BulkLoadFileModel in project jade-data-repo by DataBiosphere.
the class EncodeFixture method loadFiles.
private String loadFiles(String datasetId, String profileId, TestConfiguration.User user, Storage storage) throws Exception {
// Open the source data from the bucket
// Open target data in bucket
// Read one line at a time - unpack into pojo
// Ingest the files, substituting the file ids
// Generate JSON and write the line to scratch
String rndSuffix = UUID.randomUUID().toString() + ".json";
String loadData = "scratch/lf_loaddata" + rndSuffix;
// For a bigger test use encodetest/file.json (1000+ files)
// For normal testing encodetest/file_small.json (10 files)
Blob sourceBlob = storage.get(BlobId.of(testConfiguration.getIngestbucket(), "encodetest/file_small.json"));
List<BulkLoadFileModel> loadArray = new ArrayList<>();
List<EncodeFileIn> inArray = new ArrayList<>();
try (BufferedReader reader = new BufferedReader(Channels.newReader(sourceBlob.reader(), "UTF-8"))) {
String line = null;
while ((line = reader.readLine()) != null) {
EncodeFileIn encodeFileIn = TestUtils.mapFromJson(line, EncodeFileIn.class);
inArray.add(encodeFileIn);
if (encodeFileIn.getFile_gs_path() != null) {
loadArray.add(makeFileModel(encodeFileIn.getFile_gs_path()));
}
if (encodeFileIn.getFile_index_gs_path() != null) {
loadArray.add(makeFileModel(encodeFileIn.getFile_index_gs_path()));
}
}
}
BulkLoadArrayRequestModel loadRequest = new BulkLoadArrayRequestModel().loadArray(loadArray).maxFailedFileLoads(0).profileId(profileId).loadTag("encodeFixture");
BulkLoadArrayResultModel loadResult = dataRepoFixtures.bulkLoadArray(user, datasetId, loadRequest);
Map<String, BulkLoadFileResultModel> resultMap = new HashMap<>();
for (BulkLoadFileResultModel fileResult : loadResult.getLoadFileResults()) {
resultMap.put(fileResult.getSourcePath(), fileResult);
}
try (GcsChannelWriter writer = new GcsChannelWriter(storage, testConfiguration.getIngestbucket(), loadData)) {
for (EncodeFileIn encodeFileIn : inArray) {
BulkLoadFileResultModel resultModel = resultMap.get(encodeFileIn.getFile_gs_path());
String bamFileId = (resultModel == null) ? null : resultModel.getFileId();
resultModel = resultMap.get(encodeFileIn.getFile_index_gs_path());
String bamiFileId = (resultModel == null) ? null : resultModel.getFileId();
EncodeFileOut encodeFileOut = new EncodeFileOut(encodeFileIn, bamFileId, bamiFileId);
String fileLine = TestUtils.mapToJson(encodeFileOut) + "\n";
writer.write(fileLine);
}
}
return loadData;
}
use of bio.terra.model.BulkLoadFileModel in project jade-data-repo by DataBiosphere.
the class FileOperationTest method getFileModel.
private BulkLoadFileModel getFileModel(boolean getGood, int index, String testId) {
assertTrue("test bug: file index not in range", index < fileTarget.length);
BulkLoadFileModel model = new BulkLoadFileModel().mimeType("application/binary");
String infile = (getGood ? goodFileSource[index] : badFileSource[index]);
model.description("bulk load file " + index).sourcePath(infile).targetPath(testId + fileTarget[index]);
return model;
}
use of bio.terra.model.BulkLoadFileModel in project jade-data-repo by DataBiosphere.
the class IngestPopulateFileStateFromFileStep method doStep.
@Override
public StepResult doStep(FlightContext context) {
// Ensure that file ingestion works with extra key-value pairs
ObjectMapper objectMapper = new ObjectMapper().registerModule(new Jdk8Module()).registerModule(new JavaTimeModule()).disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
FlightMap inputParameters = context.getInputParameters();
BulkLoadRequestModel loadRequest = inputParameters.get(JobMapKeys.REQUEST.getKeyName(), BulkLoadRequestModel.class);
FlightMap workingMap = context.getWorkingMap();
UUID loadId = UUID.fromString(workingMap.get(LoadMapKeys.LOAD_ID, String.class));
Storage storage = StorageOptions.getDefaultInstance().getService();
List<String> errorDetails = new ArrayList<>();
try (BufferedReader reader = new GcsBufferedReader(storage, loadRequest.getLoadControlFile())) {
long lineCount = 0;
List<BulkLoadFileModel> fileList = new ArrayList<>();
for (String line = reader.readLine(); line != null; line = reader.readLine()) {
lineCount++;
try {
BulkLoadFileModel loadFile = objectMapper.readValue(line, BulkLoadFileModel.class);
fileList.add(loadFile);
} catch (IOException ex) {
errorDetails.add("Format error at line " + lineCount + ": " + ex.getMessage());
if (errorDetails.size() > maxBadLines) {
throw new BulkLoadControlFileException("More than " + maxBadLines + " bad lines in the control file", errorDetails);
}
}
// Keep this check and load out of the inner try; it should only catch objectMapper failures
if (fileList.size() > batchSize) {
loadService.populateFiles(loadId, fileList);
fileList.clear();
}
}
// If there are errors in the load file, don't do the load
if (errorDetails.size() > 0) {
throw new BulkLoadControlFileException("There were " + errorDetails.size() + " bad lines in the control file", errorDetails);
}
if (fileList.size() > 0) {
loadService.populateFiles(loadId, fileList);
}
} catch (IOException ex) {
throw new BulkLoadControlFileException("Failure accessing the load control file", ex);
}
return StepResult.getStepResultSuccess();
}
use of bio.terra.model.BulkLoadFileModel in project jade-data-repo by DataBiosphere.
the class FileOperationTest method makeBulkFileLoad.
private BulkLoadRequestModel makeBulkFileLoad(String tagBase, int startIndex, int badLines, boolean addExtraKeys, boolean[] validPattern) {
int fileCount = validPattern.length;
String testId = Names.randomizeName("test");
String loadTag = tagBase + testId;
String targetPath = "scratch/controlfile" + UUID.randomUUID().toString() + ".json";
// track the file so it gets cleaned up
connectedOperations.addScratchFile(targetPath);
String gspath = "gs://" + testConfig.getIngestbucket() + "/" + targetPath;
Storage storage = StorageOptions.getDefaultInstance().getService();
try (GcsChannelWriter writer = new GcsChannelWriter(storage, testConfig.getIngestbucket(), targetPath)) {
for (int i = 0; i < badLines; i++) {
String badLine = "bad line: " + loadTag + "\n";
writer.write(badLine);
}
for (int i = 0; i < fileCount; i++) {
BulkLoadFileModel fileModel = getFileModel(validPattern[i], startIndex + i, testId);
String fileLine = objectMapper.writeValueAsString(fileModel) + "\n";
// Inject extra key-value pairs into file lines
if (addExtraKeys) {
fileLine = fileLine.replaceFirst("^\\{", "{\"customKey\":\"customValue\",");
logger.info("Added extra keys: " + fileLine);
}
writer.write(fileLine);
}
} catch (IOException ex) {
fail("Failed to write load file '" + targetPath + "' to bucket '" + testConfig.getIngestbucket() + "'");
}
BulkLoadRequestModel loadRequest = new BulkLoadRequestModel().profileId(profileModel.getId()).loadTag(loadTag).maxFailedFileLoads(0).loadControlFile(gspath);
return loadRequest;
}
use of bio.terra.model.BulkLoadFileModel in project jade-data-repo by DataBiosphere.
the class FileLoadTest method getFileModel.
private BulkLoadFileModel getFileModel(int index, int repeat, String testId) {
assertTrue("test bug: file index not in range", index < fileTarget.length);
BulkLoadFileModel model = new BulkLoadFileModel().mimeType("application/binary");
String infile = goodFileSource[index] + repeat;
model.description("bulk load file " + index).sourcePath(infile).targetPath(testId + fileTarget[index] + repeat);
return model;
}
Aggregations