use of pipelite.executor.cmd.CmdRunner in project pipelite by enasequence.
the class AbstractLsfExecutor method recoverJobs.
/**
* Attempt to recover missing job results. This will only ever be done once and if it fails the
* job is considered failed.
*/
private static void recoverJobs(CmdRunner cmdRunner, Map<String, LsfDescribeJobsCache.RequestContext> requestMap, List<JobResult> jobResults) {
log.atInfo().log("Recovering LSF job results.");
AtomicInteger remainingCount = new AtomicInteger();
AtomicInteger attemptedCount = new AtomicInteger();
AtomicInteger recoveredCount = new AtomicInteger();
ZonedDateTime start = ZonedDateTime.now();
ZonedDateTime until = start.plus(JOB_RECOVERY_TIMEOUT);
ExecutorService executorService = Executors.newFixedThreadPool(JOB_RECOVERY_PARALLELISM);
try {
jobResults.stream().filter(r -> r.jobId != null && r.result == null).forEach(r -> {
attemptedCount.incrementAndGet();
remainingCount.incrementAndGet();
executorService.submit(() -> {
try {
// Attempt to recover missing job result using bhist.
if (recoverJobUsingBhist(cmdRunner, r)) {
recoveredCount.incrementAndGet();
} else {
// Attempt to recover missing job result using output file.
if (recoverJobUsingOutFile(cmdRunner, r, requestMap)) {
recoveredCount.incrementAndGet();
}
}
} finally {
remainingCount.decrementAndGet();
}
});
});
try {
while (remainingCount.get() > 0) {
Time.waitUntil(JOB_RECOVERY_POLL_FREQUENCY, until);
}
} catch (PipeliteTimeoutException ex) {
log.atWarning().log("LSF job recovery timeout exceeded.");
}
} finally {
executorService.shutdownNow();
}
log.atInfo().log("Finished recovering LSF job results in " + (Duration.between(ZonedDateTime.now(), start).abs().toMillis() / 1000) + " seconds. Recovered " + recoveredCount.get() + " out of " + attemptedCount.get() + " jobs.");
}
use of pipelite.executor.cmd.CmdRunner in project pipelite by enasequence.
the class CmdExecutor method execute.
@Override
public void execute(StageExecutorRequest request, StageExecutorResultCallback resultCallback) {
CmdRunner cmdRunner = CmdRunner.create(getExecutorParams());
StageExecutorResult result = cmdRunner.execute(cmd);
if (getExecutorParams().getPermanentErrors().contains(Ints.tryParse(result.getAttribute(EXIT_CODE)))) {
result.setPermanentError();
}
resultCallback.accept(result);
}
Aggregations