use of bio.terra.service.load.LoadFile in project jade-data-repo by DataBiosphere.
the class IngestDriverStep method getLoadCandidates.
private LoadCandidates getLoadCandidates(FlightContext context, UUID loadId, int concurrentLoads) throws DatabaseOperationException, InterruptedException {
// We start by getting the database view of the state of loads.
// For the running loads, we ask Stairway what the actual state is.
// If they have completed, we mark them as such.
// We then update the failure count and runnings loads list in the
// LoadCandidates so it correctly reflects the running state
// right now (more or less).
LoadCandidates candidates = loadService.findCandidates(loadId, concurrentLoads);
logger.debug("Candidates from db: failedLoads={} runningLoads={} candidateFiles={}", candidates.getFailedLoads(), candidates.getRunningLoads().size(), candidates.getCandidateFiles().size());
int failureCount = candidates.getFailedLoads();
List<LoadFile> realRunningLoads = new LinkedList<>();
for (LoadFile loadFile : candidates.getRunningLoads()) {
FlightState flightState = context.getStairway().getFlightState(loadFile.getFlightId());
switch(flightState.getFlightStatus()) {
case RUNNING:
case WAITING:
case READY:
case QUEUED:
realRunningLoads.add(loadFile);
break;
case ERROR:
case FATAL:
{
String error = "unknown error";
if (flightState.getException().isPresent()) {
error = flightState.getException().get().toString();
}
loadService.setLoadFileFailed(loadId, loadFile.getTargetPath(), error);
failureCount++;
break;
}
case SUCCESS:
{
FlightMap resultMap = flightState.getResultMap().orElse(null);
if (resultMap == null) {
throw new FileSystemCorruptException("no result map in flight state");
}
String fileId = resultMap.get(FileMapKeys.FILE_ID, String.class);
FSFileInfo fileInfo = resultMap.get(FileMapKeys.FILE_INFO, FSFileInfo.class);
loadService.setLoadFileSucceeded(loadId, loadFile.getTargetPath(), fileId, fileInfo);
break;
}
}
}
candidates.failedLoads(failureCount).runningLoads(realRunningLoads);
logger.debug("Candidates resolved: failedLoads={} runningLoads={} candidateFiles={}", candidates.getFailedLoads(), candidates.getRunningLoads().size(), candidates.getCandidateFiles().size());
return candidates;
}
use of bio.terra.service.load.LoadFile in project jade-data-repo by DataBiosphere.
the class IngestDriverStep method launchLoads.
private void launchLoads(FlightContext context, int launchCount, List<LoadFile> loadFiles, String profileId, UUID loadId, GoogleBucketResource bucketInfo) throws DatabaseOperationException, StairwayExecutionException, InterruptedException {
Stairway stairway = context.getStairway();
for (int i = 0; i < launchCount; i++) {
LoadFile loadFile = loadFiles.get(i);
String flightId = stairway.createFlightId();
FileLoadModel fileLoadModel = new FileLoadModel().sourcePath(loadFile.getSourcePath()).targetPath(loadFile.getTargetPath()).mimeType(loadFile.getMimeType()).profileId(profileId).loadTag(loadTag).description(loadFile.getDescription());
FlightMap inputParameters = new FlightMap();
inputParameters.put(FileMapKeys.DATASET_ID, datasetId);
inputParameters.put(FileMapKeys.REQUEST, fileLoadModel);
inputParameters.put(FileMapKeys.BUCKET_INFO, bucketInfo);
loadService.setLoadFileRunning(loadId, loadFile.getTargetPath(), flightId);
// NOTE: this is the window where we have recorded a flight as RUNNING in the load_file
// table, but it has not yet been launched. A failure in this window leaves "orphan"
// loads that are marked running, but not actually started. We handle this
// with the check for launch orphans at the beginning of the do() method.
// We use submitToQueue to spread the file loaders across multiple instances of datarepo.
stairway.submitToQueue(flightId, FileIngestWorkerFlight.class, inputParameters);
}
}
Aggregations