use of org.apache.mesos.v1.scheduler.Protos.Call.Reconcile.Task in project Singularity by HubSpot.
the class SingularityScheduler method deleteScheduledTasks.
private void deleteScheduledTasks(final Collection<SingularityPendingTask> scheduledTasks, SingularityPendingRequest pendingRequest) {
List<SingularityPendingTask> tasksForDeploy = scheduledTasks.stream().filter(task -> pendingRequest.getRequestId().equals(task.getPendingTaskId().getRequestId())).filter(task -> pendingRequest.getDeployId().equals(task.getPendingTaskId().getDeployId())).collect(Collectors.toList());
for (SingularityPendingTask task : tasksForDeploy) {
LOG.debug("Deleting pending task {} in order to reschedule {}", task.getPendingTaskId().getId(), pendingRequest);
taskManager.deletePendingTask(task.getPendingTaskId());
}
}
use of org.apache.mesos.v1.scheduler.Protos.Call.Reconcile.Task in project Singularity by HubSpot.
the class SingularityMesosTaskBuilder method prepareEnvironment.
private void prepareEnvironment(final SingularityTaskRequest task, SingularityTaskId taskId, CommandInfo.Builder commandBuilder, final SingularityOfferHolder offerHolder, final Optional<long[]> ports) {
Map<String, Object> envVars = new HashMap<>();
envVars.put("INSTANCE_NO", task.getPendingTask().getPendingTaskId().getInstanceNo());
envVars.put("TASK_HOST", offerHolder.getHostname());
envVars.put("TASK_RACK_ID", offerHolder.getRackId());
envVars.put("AVAILABILITY_ZONE", offerHolder.getRackId());
envVars.put("TASK_REQUEST_ID", task.getPendingTask().getPendingTaskId().getRequestId());
envVars.put("TASK_DEPLOY_ID", taskId.getDeployId());
envVars.put("TASK_ID", taskId.getId());
envVars.put("ESTIMATED_INSTANCE_COUNT", task.getRequest().getInstancesSafe());
for (Entry<String, String> envEntry : task.getDeploy().getEnv().orElse(Collections.<String, String>emptyMap()).entrySet()) {
envVars.put(envEntry.getKey(), fillInTaskIdValues(envEntry.getValue(), offerHolder, taskId));
}
if (task.getDeploy().getTaskEnv().isPresent() && task.getDeploy().getTaskEnv().get().containsKey(taskId.getInstanceNo()) && !task.getDeploy().getTaskEnv().get().get(taskId.getInstanceNo()).isEmpty()) {
for (Entry<String, String> envEntry : task.getDeploy().getTaskEnv().get().get(taskId.getInstanceNo()).entrySet()) {
envVars.put(envEntry.getKey(), fillInTaskIdValues(envEntry.getValue(), offerHolder, taskId));
}
}
if (ports.isPresent()) {
for (int portNum = 0; portNum < ports.get().length; portNum++) {
if (portNum == 0) {
envVars.put("PORT", ports.get()[portNum]);
}
envVars.put(String.format("PORT%s", portNum), ports.get()[portNum]);
}
}
if (task.getPendingTask().getResources().isPresent()) {
Resources override = task.getPendingTask().getResources().get();
if (override.getCpus() != 0) {
envVars.put("DEPLOY_CPUS", ((long) override.getCpus()));
}
if (override.getMemoryMb() != 0) {
envVars.put("DEPLOY_MEM", ((long) override.getMemoryMb()));
}
}
for (Entry entry : task.getPendingTask().getEnvOverrides().entrySet()) {
envVars.put(entry.getKey().toString(), entry.getValue());
}
// Set this last so it cannot be overridden by the user
if (task.getPendingTask().getUser().isPresent()) {
envVars.put("STARTED_BY_USER", task.getPendingTask().getUser().get());
}
Environment.Builder envBldr = Environment.newBuilder();
for (Entry entry : envVars.entrySet()) {
setEnv(envBldr, entry.getKey().toString(), entry.getValue());
}
commandBuilder.setEnvironment(envBldr.build());
}
use of org.apache.mesos.v1.scheduler.Protos.Call.Reconcile.Task in project Singularity by HubSpot.
the class SingularityMesosTaskBuilder method buildTask.
public SingularityMesosTaskHolder buildTask(SingularityOfferHolder offerHolder, List<Resource> availableResources, SingularityTaskRequest taskRequest, Resources desiredTaskResources, Resources desiredExecutorResources) {
final String sanitizedRackId = offerHolder.getSanitizedRackId();
final String sanitizedHost = offerHolder.getSanitizedHost();
final SingularityTaskId taskId = new SingularityTaskId(taskRequest.getPendingTask().getPendingTaskId().getRequestId(), taskRequest.getDeploy().getId(), System.currentTimeMillis(), taskRequest.getPendingTask().getPendingTaskId().getInstanceNo(), sanitizedHost, sanitizedRackId);
final TaskInfo.Builder bldr = TaskInfo.newBuilder().setTaskId(TaskID.newBuilder().setValue(taskId.toString()));
Optional<long[]> ports = Optional.empty();
Optional<Resource> portsResource = Optional.empty();
final Optional<SingularityContainerInfo> containerInfo = taskRequest.getDeploy().getContainerInfo();
if (desiredTaskResources.getNumPorts() > 0 || hasLiteralPortMapping(containerInfo)) {
List<Long> requestedPorts = new ArrayList<>();
if (hasLiteralPortMapping(containerInfo)) {
requestedPorts.addAll(containerInfo.get().getDocker().get().getLiteralHostPorts());
}
portsResource = Optional.of(MesosUtils.getPortsResource(desiredTaskResources.getNumPorts(), availableResources, requestedPorts));
ports = Optional.of(MesosUtils.getPorts(portsResource.get(), desiredTaskResources.getNumPorts()));
}
if (containerInfo.isPresent()) {
prepareContainerInfo(offerHolder, taskId, bldr, containerInfo.get(), ports);
}
if (taskRequest.getDeploy().getCustomExecutorCmd().isPresent()) {
prepareCustomExecutor(bldr, taskId, taskRequest, offerHolder, ports, desiredExecutorResources);
} else {
prepareCommand(bldr, taskId, taskRequest, offerHolder, ports);
}
if (portsResource.isPresent()) {
bldr.addResources(portsResource.get());
}
Optional<String> requiredRole = taskRequest.getRequest().getRequiredRole();
bldr.addResources(MesosUtils.getCpuResource(desiredTaskResources.getCpus(), requiredRole));
bldr.addResources(MesosUtils.getMemoryResource(desiredTaskResources.getMemoryMb(), requiredRole));
bldr.addResources(MesosUtils.getDiskResource(desiredTaskResources.getDiskMb(), requiredRole));
bldr.setAgentId(offerHolder.getOffers().get(0).getAgentId());
bldr.setName(taskRequest.getRequest().getId());
final Builder labelsBuilder = Labels.newBuilder();
// apply request-specific labels, if any
if (taskRequest.getDeploy().getMesosLabels().isPresent() && !taskRequest.getDeploy().getMesosLabels().get().isEmpty()) {
for (SingularityMesosTaskLabel label : taskRequest.getDeploy().getMesosLabels().get()) {
org.apache.mesos.v1.Protos.Label.Builder labelBuilder = Label.newBuilder();
labelBuilder.setKey(label.getKey());
if ((label.getValue().isPresent())) {
labelBuilder.setValue(label.getValue().get());
}
labelsBuilder.addLabels(labelBuilder.build());
}
}
// apply task-specific labels, if any
final int taskInstanceNo = taskRequest.getPendingTask().getPendingTaskId().getInstanceNo();
if (taskRequest.getDeploy().getMesosTaskLabels().isPresent() && taskRequest.getDeploy().getMesosTaskLabels().get().containsKey(taskInstanceNo) && !taskRequest.getDeploy().getMesosTaskLabels().get().get(taskInstanceNo).isEmpty()) {
for (SingularityMesosTaskLabel label : taskRequest.getDeploy().getMesosTaskLabels().get().get(taskInstanceNo)) {
org.apache.mesos.v1.Protos.Label.Builder labelBuilder = Label.newBuilder();
labelBuilder.setKey(label.getKey());
if ((label.getValue().isPresent())) {
labelBuilder.setValue(label.getValue().get());
}
labelsBuilder.addLabels(labelBuilder.build());
}
}
bldr.setLabels(labelsBuilder);
TaskInfo task = bldr.build();
return new SingularityMesosTaskHolder(new SingularityTask(taskRequest, taskId, offerHolder.getOffers().stream().map(o -> mesosProtosUtils.offerFromProtos(o)).collect(Collectors.toList()), mesosProtosUtils.taskFromProtos(task), Optional.of(offerHolder.getRackId())), task);
}
use of org.apache.mesos.v1.scheduler.Protos.Call.Reconcile.Task in project Singularity by HubSpot.
the class SingularityMesosOfferScheduler method checkOffers.
Collection<SingularityOfferHolder> checkOffers(final Map<String, Offer> offers, long start) {
if (offers.isEmpty()) {
LOG.debug("No offers to check");
return Collections.emptyList();
}
final List<SingularityTaskRequestHolder> sortedTaskRequestHolders = getSortedDueTaskRequests();
final int numDueTasks = sortedTaskRequestHolders.size();
final Map<String, SingularityOfferHolder> offerHolders = offers.values().stream().collect(Collectors.groupingBy(o -> o.getAgentId().getValue())).entrySet().stream().filter(e -> e.getValue().size() > 0).map(e -> {
List<Offer> offersList = e.getValue();
String agentId = e.getKey();
return new SingularityOfferHolder(offersList, numDueTasks, agentAndRackHelper.getRackIdOrDefault(offersList.get(0)), agentId, offersList.get(0).getHostname(), agentAndRackHelper.getTextAttributes(offersList.get(0)), agentAndRackHelper.getReservedAgentAttributes(offersList.get(0)));
}).collect(Collectors.toMap(SingularityOfferHolder::getAgentId, Function.identity()));
if (sortedTaskRequestHolders.isEmpty()) {
return offerHolders.values();
}
final AtomicInteger tasksScheduled = new AtomicInteger(0);
Map<String, RequestUtilization> requestUtilizations = usageManager.getRequestUtilizations(false);
List<SingularityTaskId> activeTaskIds = taskManager.getActiveTaskIds();
Map<String, SingularityAgentUsageWithId> currentUsages = usageManager.getAllCurrentAgentUsage();
List<CompletableFuture<Void>> currentUsagesFutures = new ArrayList<>();
for (SingularityOfferHolder offerHolder : offerHolders.values()) {
currentUsagesFutures.add(runAsync(() -> {
String agentId = offerHolder.getAgentId();
Optional<SingularityAgentUsageWithId> maybeUsage = Optional.ofNullable(currentUsages.get(agentId));
if (configuration.isReCheckMetricsForLargeNewTaskCount() && maybeUsage.isPresent()) {
long newTaskCount = taskManager.getActiveTaskIds().stream().filter(t -> t.getStartedAt() > maybeUsage.get().getTimestamp() && t.getSanitizedHost().equals(offerHolder.getSanitizedHost())).count();
if (newTaskCount >= maybeUsage.get().getNumTasks() / 2) {
try {
MesosAgentMetricsSnapshotObject metricsSnapshot = usageHelper.getMetricsSnapshot(offerHolder.getHostname());
if (metricsSnapshot.getSystemLoad5Min() / metricsSnapshot.getSystemCpusTotal() > mesosConfiguration.getRecheckMetricsLoad1Threshold() || metricsSnapshot.getSystemLoad1Min() / metricsSnapshot.getSystemCpusTotal() > mesosConfiguration.getRecheckMetricsLoad5Threshold()) {
// Come back to this agent after we have collected more metrics
LOG.info("Skipping evaluation of {} until new metrics are collected. Current load is load1: {}, load5: {}", offerHolder.getHostname(), metricsSnapshot.getSystemLoad1Min(), metricsSnapshot.getSystemLoad5Min());
currentUsages.remove(agentId);
}
} catch (Throwable t) {
LOG.warn("Could not check metrics for host {}, skipping", offerHolder.getHostname());
currentUsages.remove(agentId);
}
}
}
}));
}
CompletableFutures.allOf(currentUsagesFutures).join();
List<CompletableFuture<Void>> usagesWithScoresFutures = new ArrayList<>();
Map<String, SingularityAgentUsageWithCalculatedScores> currentUsagesById = new ConcurrentHashMap<>();
for (SingularityAgentUsageWithId usage : currentUsages.values()) {
if (offerHolders.containsKey(usage.getAgentId())) {
usagesWithScoresFutures.add(runAsync(() -> currentUsagesById.put(usage.getAgentId(), new SingularityAgentUsageWithCalculatedScores(usage, mesosConfiguration.getScoreUsingSystemLoad(), getMaxProbableUsageForAgent(activeTaskIds, requestUtilizations, offerHolders.get(usage.getAgentId()).getSanitizedHost()), mesosConfiguration.getLoad5OverloadedThreshold(), mesosConfiguration.getLoad1OverloadedThreshold(), usage.getTimestamp()))));
}
}
CompletableFutures.allOf(usagesWithScoresFutures).join();
long startCheck = System.currentTimeMillis();
LOG.debug("Found agent usages and scores after {}ms", startCheck - start);
Map<SingularityDeployKey, Optional<SingularityDeployStatistics>> deployStatsCache = new ConcurrentHashMap<>();
Set<String> overloadedHosts = Sets.newConcurrentHashSet();
AtomicInteger noMatches = new AtomicInteger();
// We spend much of the offer check loop for request level locks. Wait for the locks in parallel, but ensure that actual offer checks
// are done in serial to not over commit a single offer
ReentrantLock offerCheckTempLock = new ReentrantLock(false);
CompletableFutures.allOf(sortedTaskRequestHolders.stream().collect(Collectors.groupingBy(t -> t.getTaskRequest().getRequest().getId())).entrySet().stream().map(entry -> runAsync(() -> {
lock.tryRunWithRequestLock(() -> {
offerCheckTempLock.lock();
try {
long startRequest = System.currentTimeMillis();
int evaluated = 0;
for (SingularityTaskRequestHolder taskRequestHolder : entry.getValue()) {
long now = System.currentTimeMillis();
boolean isOfferLoopTakingTooLong = now - startCheck > mesosConfiguration.getOfferLoopTimeoutMillis();
boolean isRequestInOfferLoopTakingTooLong = (now - startRequest > mesosConfiguration.getOfferLoopRequestTimeoutMillis() && evaluated > 1);
if (isOfferLoopTakingTooLong || isRequestInOfferLoopTakingTooLong) {
LOG.warn("{} is holding the offer lock for too long, skipping remaining {} tasks for scheduling", taskRequestHolder.getTaskRequest().getRequest().getId(), entry.getValue().size() - evaluated);
break;
}
evaluated++;
List<SingularityTaskId> activeTaskIdsForRequest = leaderCache.getActiveTaskIdsForRequest(taskRequestHolder.getTaskRequest().getRequest().getId());
if (isTooManyInstancesForRequest(taskRequestHolder.getTaskRequest(), activeTaskIdsForRequest)) {
LOG.debug("Skipping pending task {}, too many instances already running", taskRequestHolder.getTaskRequest().getPendingTask().getPendingTaskId());
continue;
}
Map<String, Double> scorePerOffer = new ConcurrentHashMap<>();
for (SingularityOfferHolder offerHolder : offerHolders.values()) {
if (!isOfferFull(offerHolder)) {
if (calculateScore(requestUtilizations, currentUsagesById, taskRequestHolder, scorePerOffer, activeTaskIdsForRequest, offerHolder, deployStatsCache, overloadedHosts) > mesosConfiguration.getGoodEnoughScoreThreshold()) {
break;
}
}
}
if (!scorePerOffer.isEmpty()) {
SingularityOfferHolder bestOffer = offerHolders.get(Collections.max(scorePerOffer.entrySet(), Map.Entry.comparingByValue()).getKey());
LOG.info("Best offer {}/1 is on {}", scorePerOffer.get(bestOffer.getAgentId()), bestOffer.getSanitizedHost());
acceptTask(bestOffer, taskRequestHolder);
tasksScheduled.getAndIncrement();
updateAgentUsageScores(taskRequestHolder, currentUsagesById, bestOffer.getAgentId(), requestUtilizations);
} else {
noMatches.getAndIncrement();
}
}
} finally {
offerCheckTempLock.unlock();
}
}, entry.getKey(), String.format("%s#%s", getClass().getSimpleName(), "checkOffers"), mesosConfiguration.getOfferLoopRequestTimeoutMillis(), TimeUnit.MILLISECONDS);
})).collect(Collectors.toList())).join();
LOG.info("{} tasks scheduled, {} tasks remaining after examining {} offers ({} overloaded hosts, {} had no offer matches)", tasksScheduled, numDueTasks - tasksScheduled.get(), offers.size(), overloadedHosts.size(), noMatches.get());
return offerHolders.values();
}
use of org.apache.mesos.v1.scheduler.Protos.Call.Reconcile.Task in project Singularity by HubSpot.
the class SingularityMesosSchedulerClient method accept.
/**
* Sent by the scheduler when it accepts offer(s) sent by the master. The ACCEPT request includes the type of
* operations (e.g., launch task, launch task group, reserve resources, create volumes) that the scheduler wants to
* perform on the offers. Note that until the scheduler replies (accepts or declines) to an offer, the offer’s
* resources are considered allocated to the offer’s role and to the framework.
*
* @param offerIds
* @param offerOperations
*/
public void accept(List<OfferID> offerIds, List<Offer.Operation> offerOperations) {
Builder accept = build().setAccept(Accept.newBuilder().addAllOfferIds(offerIds).addAllOperations(offerOperations));
sendCall(accept, Type.ACCEPT);
}
Aggregations