Search in sources :

Example 1 with LoadCandidates

use of bio.terra.service.load.LoadCandidates in project jade-data-repo by DataBiosphere.

the class IngestDriverStep method doStep.

@Override
public StepResult doStep(FlightContext context) throws InterruptedException {
    // Gather inputs
    FlightMap workingMap = context.getWorkingMap();
    String loadIdString = workingMap.get(LoadMapKeys.LOAD_ID, String.class);
    UUID loadId = UUID.fromString(loadIdString);
    GoogleBucketResource bucketResource = workingMap.get(FileMapKeys.BUCKET_INFO, GoogleBucketResource.class);
    try {
        // Check for launch orphans - these are loads in the RUNNING state that never
        // got recorded by stairway.
        checkForOrphans(context, loadId);
        // Load Loop
        while (true) {
            int podCount = kubeService.getActivePodCount();
            int concurrentFiles = configurationService.getParameterValue(ConfigEnum.LOAD_CONCURRENT_FILES);
            int scaledConcurrentFiles = podCount * concurrentFiles;
            // Get the state of active and failed loads
            LoadCandidates candidates = getLoadCandidates(context, loadId, scaledConcurrentFiles);
            int currentRunning = candidates.getRunningLoads().size();
            int candidateCount = candidates.getCandidateFiles().size();
            if (currentRunning == 0 && candidateCount == 0) {
                // Nothing doing and nothing to do
                break;
            }
            // Test for exceeding max failed loads; if so, wait for all RUNNINGs to finish
            if (candidates.getFailedLoads() > maxFailedFileLoads) {
                waitForAll(context, loadId, scaledConcurrentFiles);
                break;
            }
            // Launch new loads
            if (currentRunning < scaledConcurrentFiles) {
                // Compute how many loads to launch
                int launchCount = scaledConcurrentFiles - currentRunning;
                if (candidateCount < launchCount) {
                    launchCount = candidateCount;
                }
                launchLoads(context, launchCount, candidates.getCandidateFiles(), profileId, loadId, bucketResource);
                currentRunning += launchCount;
            }
            // Wait until some loads complete
            waitForAny(context, loadId, scaledConcurrentFiles, currentRunning);
        }
    } catch (DatabaseOperationException | StairwayExecutionException ex) {
        return new StepResult(StepStatus.STEP_RESULT_FAILURE_RETRY, ex);
    }
    return StepResult.getStepResultSuccess();
}
Also used : GoogleBucketResource(bio.terra.service.resourcemanagement.google.GoogleBucketResource) DatabaseOperationException(bio.terra.stairway.exception.DatabaseOperationException) LoadCandidates(bio.terra.service.load.LoadCandidates) StairwayExecutionException(bio.terra.stairway.exception.StairwayExecutionException) FlightMap(bio.terra.stairway.FlightMap) UUID(java.util.UUID) StepResult(bio.terra.stairway.StepResult)

Example 2 with LoadCandidates

use of bio.terra.service.load.LoadCandidates in project jade-data-repo by DataBiosphere.

the class IngestDriverStep method waitForAny.

private void waitForAny(FlightContext context, UUID loadId, int concurrentLoads, int originallyRunning) throws DatabaseOperationException, InterruptedException {
    while (true) {
        waiting();
        LoadCandidates candidates = getLoadCandidates(context, loadId, concurrentLoads);
        if (candidates.getRunningLoads().size() < originallyRunning) {
            break;
        }
    }
}
Also used : LoadCandidates(bio.terra.service.load.LoadCandidates)

Example 3 with LoadCandidates

use of bio.terra.service.load.LoadCandidates in project jade-data-repo by DataBiosphere.

the class IngestDriverStep method getLoadCandidates.

private LoadCandidates getLoadCandidates(FlightContext context, UUID loadId, int concurrentLoads) throws DatabaseOperationException, InterruptedException {
    // We start by getting the database view of the state of loads.
    // For the running loads, we ask Stairway what the actual state is.
    // If they have completed, we mark them as such.
    // We then update the failure count and runnings loads list in the
    // LoadCandidates so it correctly reflects the running state
    // right now (more or less).
    LoadCandidates candidates = loadService.findCandidates(loadId, concurrentLoads);
    logger.debug("Candidates from db: failedLoads={}  runningLoads={}  candidateFiles={}", candidates.getFailedLoads(), candidates.getRunningLoads().size(), candidates.getCandidateFiles().size());
    int failureCount = candidates.getFailedLoads();
    List<LoadFile> realRunningLoads = new LinkedList<>();
    for (LoadFile loadFile : candidates.getRunningLoads()) {
        FlightState flightState = context.getStairway().getFlightState(loadFile.getFlightId());
        switch(flightState.getFlightStatus()) {
            case RUNNING:
            case WAITING:
            case READY:
            case QUEUED:
                realRunningLoads.add(loadFile);
                break;
            case ERROR:
            case FATAL:
                {
                    String error = "unknown error";
                    if (flightState.getException().isPresent()) {
                        error = flightState.getException().get().toString();
                    }
                    loadService.setLoadFileFailed(loadId, loadFile.getTargetPath(), error);
                    failureCount++;
                    break;
                }
            case SUCCESS:
                {
                    FlightMap resultMap = flightState.getResultMap().orElse(null);
                    if (resultMap == null) {
                        throw new FileSystemCorruptException("no result map in flight state");
                    }
                    String fileId = resultMap.get(FileMapKeys.FILE_ID, String.class);
                    FSFileInfo fileInfo = resultMap.get(FileMapKeys.FILE_INFO, FSFileInfo.class);
                    loadService.setLoadFileSucceeded(loadId, loadFile.getTargetPath(), fileId, fileInfo);
                    break;
                }
        }
    }
    candidates.failedLoads(failureCount).runningLoads(realRunningLoads);
    logger.debug("Candidates resolved: failedLoads={}  runningLoads={}  candidateFiles={}", candidates.getFailedLoads(), candidates.getRunningLoads().size(), candidates.getCandidateFiles().size());
    return candidates;
}
Also used : FlightState(bio.terra.stairway.FlightState) FSFileInfo(bio.terra.service.filedata.FSFileInfo) LoadCandidates(bio.terra.service.load.LoadCandidates) FileSystemCorruptException(bio.terra.service.filedata.exception.FileSystemCorruptException) LoadFile(bio.terra.service.load.LoadFile) FlightMap(bio.terra.stairway.FlightMap) LinkedList(java.util.LinkedList)

Example 4 with LoadCandidates

use of bio.terra.service.load.LoadCandidates in project jade-data-repo by DataBiosphere.

the class IngestDriverStep method waitForAll.

private void waitForAll(FlightContext context, UUID loadId, int concurrentLoads) throws DatabaseOperationException, InterruptedException {
    while (true) {
        waiting();
        LoadCandidates candidates = getLoadCandidates(context, loadId, concurrentLoads);
        if (candidates.getRunningLoads().size() == 0) {
            break;
        }
    }
}
Also used : LoadCandidates(bio.terra.service.load.LoadCandidates)

Aggregations

LoadCandidates (bio.terra.service.load.LoadCandidates)4 FlightMap (bio.terra.stairway.FlightMap)2 FSFileInfo (bio.terra.service.filedata.FSFileInfo)1 FileSystemCorruptException (bio.terra.service.filedata.exception.FileSystemCorruptException)1 LoadFile (bio.terra.service.load.LoadFile)1 GoogleBucketResource (bio.terra.service.resourcemanagement.google.GoogleBucketResource)1 FlightState (bio.terra.stairway.FlightState)1 StepResult (bio.terra.stairway.StepResult)1 DatabaseOperationException (bio.terra.stairway.exception.DatabaseOperationException)1 StairwayExecutionException (bio.terra.stairway.exception.StairwayExecutionException)1 LinkedList (java.util.LinkedList)1 UUID (java.util.UUID)1