use of io.mantisrx.server.master.scheduler.WorkerUnscheduleable in project mantis by Netflix.
the class SchedulingService method schedulingResultHandler.
private void schedulingResultHandler(SchedulingResult schedulingResult) {
try {
lastSchedulingResultCallback.set(System.currentTimeMillis());
final List<Exception> exceptions = schedulingResult.getExceptions();
for (Exception exc : exceptions) {
logger.error("Scheduling result got exception: {}", exc.getMessage(), exc);
schedulingResultExceptions.increment();
}
int workersLaunched = 0;
SchedulerCounters.getInstance().incrementResourceAllocationTrials(schedulingResult.getNumAllocations());
Map<String, VMAssignmentResult> assignmentResultMap = schedulingResult.getResultMap();
final int assignmentResultSize;
if (assignmentResultMap != null) {
assignmentResultSize = assignmentResultMap.size();
long now = System.currentTimeMillis();
for (Map.Entry<String, VMAssignmentResult> aResult : assignmentResultMap.entrySet()) {
launchTasks(aResult.getValue().getTasksAssigned(), aResult.getValue().getLeasesUsed());
for (TaskAssignmentResult r : aResult.getValue().getTasksAssigned()) {
final ScheduleRequest request = (ScheduleRequest) r.getRequest();
final Optional<Long> acceptedAt = workerRegistry.getAcceptedAt(request.getWorkerId());
acceptedAt.ifPresent(acceptedAtTime -> workerAcceptedToLaunchedDistMs.recordValue(now - acceptedAtTime));
perWorkerSchedulingTimeMs.increment(now - request.getReadyAt());
}
workersLaunched += aResult.getValue().getTasksAssigned().size();
}
} else {
assignmentResultSize = 0;
}
// for workers that didn't get scheduled, rate limit them
for (Map.Entry<TaskRequest, List<TaskAssignmentResult>> entry : schedulingResult.getFailures().entrySet()) {
final ScheduleRequest req = (ScheduleRequest) entry.getKey();
boolean success = jobMessageRouter.routeWorkerEvent(new WorkerUnscheduleable(req.getWorkerId(), req.getStageNum()));
if (!success) {
logger.warn("Failed to route {} WorkerUnscheduleable event", req.getWorkerId());
if (logger.isTraceEnabled()) {
logger.trace("Unscheduleable worker {} assignmentresults {}", req.getWorkerId(), entry.getValue());
}
}
}
numWorkersLaunched.increment(workersLaunched);
numResourceOffersReceived.increment(schedulingResult.getLeasesAdded());
numResourceAllocations.increment(schedulingResult.getNumAllocations());
numResourceOffersRejected.increment(schedulingResult.getLeasesRejected());
final int requestedWorkers = workersLaunched + schedulingResult.getFailures().size();
workersToLaunch.set(requestedWorkers);
pendingWorkers.set(schedulingResult.getFailures().size());
schedulerRunMillis.set(schedulingResult.getRuntime());
totalActiveAgents.set(schedulingResult.getTotalVMsCount());
numAgentsUsed.increment(assignmentResultSize);
final int idleVMsCount = schedulingResult.getIdleVMsCount();
idleAgents.set(idleVMsCount);
SchedulerCounters.getInstance().endIteration(requestedWorkers, workersLaunched, assignmentResultSize, schedulingResult.getLeasesRejected());
if (requestedWorkers > 0 && SchedulerCounters.getInstance().getCounter().getIterationNumber() % 10 == 0) {
logger.info("Scheduling iteration result: " + SchedulerCounters.getInstance().toJsonString());
}
if (idleVMsCount != idleMachinesCount.get()) {
logger.info("Idle machines: " + idleVMsCount);
idleMachinesCount.set(idleVMsCount);
}
try {
taskSchedulingService.requestVmCurrentStates(vmCurrentStates -> {
if (lastVmCurrentStatesCheckDone.get() < (System.currentTimeMillis() - vmCurrentStatesCheckInterval)) {
schedulingState.setVMCurrentState(vmCurrentStates);
verifyAndReportResUsageMetrics(vmCurrentStates);
lastVmCurrentStatesCheckDone.set(System.currentTimeMillis());
}
});
} catch (final TaskQueueException e) {
logger.warn("got exception requesting VM states from Fenzo", e);
}
publishJobManagerAndFenzoWorkerMetrics();
} catch (final Exception e) {
logger.error("unexpected exception in scheduling result callback", e);
schedulingCallbackExceptions.increment();
}
}
Aggregations