use of com.hubspot.singularity.SingularityTaskUsage in project Singularity by HubSpot.
the class SingularityUsagePoller method updateRequestUtilization.
private void updateRequestUtilization(Map<String, RequestUtilization> utilizationPerRequestId, List<SingularityTaskUsage> pastTaskUsages, SingularityTaskUsage latestUsage, SingularityTaskId task, double memoryMbReservedForTask, double cpuReservedForTask, double diskMbReservedForTask) {
String requestId = task.getRequestId();
RequestUtilization requestUtilization = utilizationPerRequestId.getOrDefault(requestId, new RequestUtilization(requestId, task.getDeployId()));
long curMaxMemBytesUsed = 0;
long curMinMemBytesUsed = Long.MAX_VALUE;
double curMaxCpuUsed = 0;
double curMinCpuUsed = Double.MAX_VALUE;
long curMaxDiskBytesUsed = 0;
long curMinDiskBytesUsed = Long.MAX_VALUE;
if (utilizationPerRequestId.containsKey(requestId)) {
curMaxMemBytesUsed = requestUtilization.getMaxMemBytesUsed();
curMinMemBytesUsed = requestUtilization.getMinMemBytesUsed();
curMaxCpuUsed = requestUtilization.getMaxCpuUsed();
curMinCpuUsed = requestUtilization.getMinCpuUsed();
curMaxDiskBytesUsed = requestUtilization.getMaxDiskBytesUsed();
curMinDiskBytesUsed = requestUtilization.getMinDiskBytesUsed();
}
List<SingularityTaskUsage> pastTaskUsagesCopy = copyUsages(pastTaskUsages, latestUsage, task);
pastTaskUsagesCopy.sort(Comparator.comparingDouble(SingularityTaskUsage::getTimestamp));
int numTasks = pastTaskUsagesCopy.size() - 1;
int numCpuOverages = 0;
for (int i = 0; i < numTasks; i++) {
SingularityTaskUsage olderUsage = pastTaskUsagesCopy.get(i);
SingularityTaskUsage newerUsage = pastTaskUsagesCopy.get(i + 1);
double cpusUsed = (newerUsage.getCpuSeconds() - olderUsage.getCpuSeconds()) / (newerUsage.getTimestamp() - olderUsage.getTimestamp());
curMaxCpuUsed = Math.max(cpusUsed, curMaxCpuUsed);
curMinCpuUsed = Math.min(cpusUsed, curMinCpuUsed);
curMaxMemBytesUsed = Math.max(newerUsage.getMemoryTotalBytes(), curMaxMemBytesUsed);
curMinMemBytesUsed = Math.min(newerUsage.getMemoryTotalBytes(), curMinMemBytesUsed);
curMaxDiskBytesUsed = Math.max(newerUsage.getDiskTotalBytes(), curMaxDiskBytesUsed);
curMinDiskBytesUsed = Math.min(newerUsage.getDiskTotalBytes(), curMinDiskBytesUsed);
if (cpusUsed > cpuReservedForTask) {
numCpuOverages++;
}
requestUtilization.addCpuUsed(cpusUsed).addMemBytesUsed(newerUsage.getMemoryTotalBytes()).addDiskBytesUsed(newerUsage.getDiskTotalBytes()).incrementTaskCount();
}
double cpuBurstRating = pastTaskUsagesCopy.size() > 0 ? numCpuOverages / (double) pastTaskUsagesCopy.size() : 1;
requestUtilization.addMemBytesReserved((long) (memoryMbReservedForTask * SingularitySlaveUsage.BYTES_PER_MEGABYTE * numTasks)).addCpuReserved(cpuReservedForTask * numTasks).addDiskBytesReserved((long) diskMbReservedForTask * SingularitySlaveUsage.BYTES_PER_MEGABYTE * numTasks).setMaxCpuUsed(curMaxCpuUsed).setMinCpuUsed(curMinCpuUsed).setMaxMemBytesUsed(curMaxMemBytesUsed).setMinMemBytesUsed(curMinMemBytesUsed).setMaxDiskBytesUsed(curMaxDiskBytesUsed).setMinDiskBytesUsed(curMinDiskBytesUsed).setCpuBurstRating(cpuBurstRating);
utilizationPerRequestId.put(requestId, requestUtilization);
}
use of com.hubspot.singularity.SingularityTaskUsage in project Singularity by HubSpot.
the class SingularityUsagePoller method copyUsages.
private List<SingularityTaskUsage> copyUsages(List<SingularityTaskUsage> pastTaskUsages, SingularityTaskUsage latestUsage, SingularityTaskId task) {
List<SingularityTaskUsage> pastTaskUsagesCopy = new ArrayList<>();
// to calculate oldest cpu usage
pastTaskUsagesCopy.add(new SingularityTaskUsage(0, TimeUnit.MILLISECONDS.toSeconds(task.getStartedAt()), 0, 0));
pastTaskUsagesCopy.addAll(pastTaskUsages);
pastTaskUsagesCopy.add(latestUsage);
return pastTaskUsagesCopy;
}
use of com.hubspot.singularity.SingularityTaskUsage in project Singularity by HubSpot.
the class SingularityUsagePoller method collectSlaveUage.
private void collectSlaveUage(SingularitySlave slave, long now, Map<String, RequestUtilization> utilizationPerRequestId, Map<SingularitySlaveUsage, List<TaskIdWithUsage>> overLoadedHosts, AtomicLong totalMemBytesUsed, AtomicLong totalMemBytesAvailable, AtomicDouble totalCpuUsed, AtomicDouble totalCpuAvailable, AtomicLong totalDiskBytesUsed, AtomicLong totalDiskBytesAvailable) {
Map<ResourceUsageType, Number> longRunningTasksUsage = new HashMap<>();
longRunningTasksUsage.put(ResourceUsageType.MEMORY_BYTES_USED, 0);
longRunningTasksUsage.put(ResourceUsageType.CPU_USED, 0);
longRunningTasksUsage.put(ResourceUsageType.DISK_BYTES_USED, 0);
Optional<Long> memoryMbTotal = Optional.absent();
Optional<Double> cpusTotal = Optional.absent();
Optional<Long> diskMbTotal = Optional.absent();
long memoryMbReservedOnSlave = 0;
double cpuReservedOnSlave = 0;
long diskMbReservedOnSlave = 0;
long memoryBytesUsedOnSlave = 0;
double cpusUsedOnSlave = 0;
long diskMbUsedOnSlave = 0;
try {
List<MesosTaskMonitorObject> allTaskUsage = mesosClient.getSlaveResourceUsage(slave.getHost());
MesosSlaveMetricsSnapshotObject slaveMetricsSnapshot = mesosClient.getSlaveMetricsSnapshot(slave.getHost());
double systemMemTotalBytes = 0;
double systemMemFreeBytes = 0;
double systemLoad1Min = 0;
double systemLoad5Min = 0;
double systemLoad15Min = 0;
double slaveDiskUsed = 0;
double slaveDiskTotal = 0;
double systemCpusTotal = 0;
if (slaveMetricsSnapshot != null) {
systemMemTotalBytes = slaveMetricsSnapshot.getSystemMemTotalBytes();
systemMemFreeBytes = slaveMetricsSnapshot.getSystemMemFreeBytes();
systemLoad1Min = slaveMetricsSnapshot.getSystemLoad1Min();
systemLoad5Min = slaveMetricsSnapshot.getSystemLoad5Min();
systemLoad15Min = slaveMetricsSnapshot.getSystemLoad15Min();
slaveDiskUsed = slaveMetricsSnapshot.getSlaveDiskUsed();
slaveDiskTotal = slaveMetricsSnapshot.getSlaveDiskTotal();
systemCpusTotal = slaveMetricsSnapshot.getSystemCpusTotal();
}
double systemLoad;
switch(configuration.getMesosConfiguration().getScoreUsingSystemLoad()) {
case LOAD_1:
systemLoad = systemLoad1Min;
break;
case LOAD_15:
systemLoad = systemLoad15Min;
break;
case LOAD_5:
default:
systemLoad = systemLoad5Min;
break;
}
boolean slaveOverloaded = systemCpusTotal > 0 && systemLoad / systemCpusTotal > 1.0;
List<TaskIdWithUsage> possibleTasksToShuffle = new ArrayList<>();
for (MesosTaskMonitorObject taskUsage : allTaskUsage) {
String taskId = taskUsage.getSource();
SingularityTaskId task;
try {
task = SingularityTaskId.valueOf(taskId);
} catch (InvalidSingularityTaskIdException e) {
LOG.error("Couldn't get SingularityTaskId for {}", taskUsage);
continue;
}
SingularityTaskUsage latestUsage = getUsage(taskUsage);
List<SingularityTaskUsage> pastTaskUsages = usageManager.getTaskUsage(taskId);
clearOldUsage(taskId);
usageManager.saveSpecificTaskUsage(taskId, latestUsage);
Optional<SingularityTask> maybeTask = taskManager.getTask(task);
Optional<Resources> maybeResources = Optional.absent();
if (maybeTask.isPresent()) {
maybeResources = maybeTask.get().getTaskRequest().getPendingTask().getResources().or(maybeTask.get().getTaskRequest().getDeploy().getResources());
if (maybeResources.isPresent()) {
Resources taskResources = maybeResources.get();
double memoryMbReservedForTask = taskResources.getMemoryMb();
double cpuReservedForTask = taskResources.getCpus();
double diskMbReservedForTask = taskResources.getDiskMb();
memoryMbReservedOnSlave += memoryMbReservedForTask;
cpuReservedOnSlave += cpuReservedForTask;
diskMbReservedOnSlave += diskMbReservedForTask;
updateRequestUtilization(utilizationPerRequestId, pastTaskUsages, latestUsage, task, memoryMbReservedForTask, cpuReservedForTask, diskMbReservedForTask);
}
}
memoryBytesUsedOnSlave += latestUsage.getMemoryTotalBytes();
diskMbUsedOnSlave += latestUsage.getDiskTotalBytes();
SingularityTaskCurrentUsage currentUsage = null;
if (pastTaskUsages.isEmpty()) {
Optional<SingularityTaskHistoryUpdate> maybeStartingUpdate = taskManager.getTaskHistoryUpdate(task, ExtendedTaskState.TASK_STARTING);
if (maybeStartingUpdate.isPresent()) {
long startTimestampSeconds = TimeUnit.MILLISECONDS.toSeconds(maybeStartingUpdate.get().getTimestamp());
double usedCpusSinceStart = latestUsage.getCpuSeconds() / (latestUsage.getTimestamp() - startTimestampSeconds);
if (isLongRunning(task) || isConsideredLongRunning(task)) {
updateLongRunningTasksUsage(longRunningTasksUsage, latestUsage.getMemoryTotalBytes(), usedCpusSinceStart, latestUsage.getDiskTotalBytes());
}
currentUsage = new SingularityTaskCurrentUsage(latestUsage.getMemoryTotalBytes(), now, usedCpusSinceStart, latestUsage.getDiskTotalBytes());
usageManager.saveCurrentTaskUsage(taskId, currentUsage);
cpusUsedOnSlave += usedCpusSinceStart;
}
} else {
SingularityTaskUsage lastUsage = pastTaskUsages.get(pastTaskUsages.size() - 1);
double taskCpusUsed = ((latestUsage.getCpuSeconds() - lastUsage.getCpuSeconds()) / (latestUsage.getTimestamp() - lastUsage.getTimestamp()));
if (isLongRunning(task) || isConsideredLongRunning(task)) {
updateLongRunningTasksUsage(longRunningTasksUsage, latestUsage.getMemoryTotalBytes(), taskCpusUsed, latestUsage.getDiskTotalBytes());
}
currentUsage = new SingularityTaskCurrentUsage(latestUsage.getMemoryTotalBytes(), now, taskCpusUsed, latestUsage.getDiskTotalBytes());
usageManager.saveCurrentTaskUsage(taskId, currentUsage);
cpusUsedOnSlave += taskCpusUsed;
}
if (configuration.isShuffleTasksForOverloadedSlaves() && currentUsage != null && currentUsage.getCpusUsed() > 0) {
if (isLongRunning(task) && !configuration.getDoNotShuffleRequests().contains(task.getRequestId())) {
Optional<SingularityTaskHistoryUpdate> maybeCleanupUpdate = taskManager.getTaskHistoryUpdate(task, ExtendedTaskState.TASK_CLEANING);
if (maybeCleanupUpdate.isPresent() && isTaskAlreadyCleanedUpForShuffle(maybeCleanupUpdate.get())) {
LOG.trace("Task {} already being cleaned up to spread cpu usage, skipping", taskId);
} else {
if (maybeResources.isPresent()) {
possibleTasksToShuffle.add(new TaskIdWithUsage(task, maybeResources.get(), currentUsage));
}
}
}
}
}
if (!slave.getResources().isPresent() || !slave.getResources().get().getMemoryMegaBytes().isPresent() || !slave.getResources().get().getNumCpus().isPresent()) {
LOG.debug("Could not find slave or resources for slave {}", slave.getId());
} else {
memoryMbTotal = Optional.of(slave.getResources().get().getMemoryMegaBytes().get().longValue());
cpusTotal = Optional.of(slave.getResources().get().getNumCpus().get().doubleValue());
diskMbTotal = Optional.of(slave.getResources().get().getDiskSpace().get());
}
SingularitySlaveUsage slaveUsage = new SingularitySlaveUsage(cpusUsedOnSlave, cpuReservedOnSlave, cpusTotal, memoryBytesUsedOnSlave, memoryMbReservedOnSlave, memoryMbTotal, diskMbUsedOnSlave, diskMbReservedOnSlave, diskMbTotal, longRunningTasksUsage, allTaskUsage.size(), now, systemMemTotalBytes, systemMemFreeBytes, systemCpusTotal, systemLoad1Min, systemLoad5Min, systemLoad15Min, slaveDiskUsed, slaveDiskTotal);
if (slaveOverloaded) {
overLoadedHosts.put(slaveUsage, possibleTasksToShuffle);
}
List<Long> slaveTimestamps = usageManager.getSlaveUsageTimestamps(slave.getId());
if (slaveTimestamps.size() + 1 > configuration.getNumUsageToKeep()) {
usageManager.deleteSpecificSlaveUsage(slave.getId(), slaveTimestamps.get(0));
}
if (slaveUsage.getMemoryBytesTotal().isPresent() && slaveUsage.getCpusTotal().isPresent()) {
totalMemBytesUsed.getAndAdd(slaveUsage.getMemoryBytesUsed());
totalCpuUsed.getAndAdd(slaveUsage.getCpusUsed());
totalDiskBytesUsed.getAndAdd(slaveUsage.getDiskBytesUsed());
totalMemBytesAvailable.getAndAdd(slaveUsage.getMemoryBytesTotal().get());
totalCpuAvailable.getAndAdd(slaveUsage.getCpusTotal().get());
totalDiskBytesAvailable.getAndAdd(slaveUsage.getDiskBytesTotal().get());
}
LOG.debug("Saving slave {} usage {}", slave.getHost(), slaveUsage);
usageManager.saveSpecificSlaveUsageAndSetCurrent(slave.getId(), slaveUsage);
} catch (Throwable t) {
String message = String.format("Could not get slave usage for host %s", slave.getHost());
LOG.error(message, t);
exceptionNotifier.notify(message, t);
}
}
use of com.hubspot.singularity.SingularityTaskUsage in project Singularity by HubSpot.
the class SingularityUsageTest method testUsagePollerSimple.
@Test
public void testUsagePollerSimple() {
// works with no slaves
usagePoller.runActionOnPoll();
cleaner.runActionOnPoll();
initRequest();
initFirstDeploy();
saveAndSchedule(request.toBuilder().setInstances(Optional.of(1)));
resourceOffers(1);
SingularityTask firstTask = taskManager.getActiveTasks().get(0);
String hostname = firstTask.getHostname();
MesosTaskMonitorObject usage = getTaskMonitor(firstTask.getTaskId().getId(), 2, 5, 100);
mesosClient.setSlaveResourceUsage(hostname, Collections.singletonList(usage));
usagePoller.runActionOnPoll();
String slaveId = firstTask.getAgentId().getValue();
List<String> slaves = usageManager.getSlavesWithUsage();
Assert.assertEquals(1, slaves.size());
Assert.assertEquals(slaves.get(0), slaveId);
Assert.assertEquals(0, usageManager.getSlaveUsage(slaveId).get(0).getCpusUsed(), 0);
Assert.assertEquals(100, usageManager.getSlaveUsage(slaveId).get(0).getMemoryBytesUsed());
SingularityTaskUsage first = usageManager.getTaskUsage(firstTask.getTaskId().getId()).get(0);
Assert.assertEquals(2, first.getCpuSeconds(), 0);
Assert.assertEquals(100, first.getMemoryTotalBytes(), 0);
Assert.assertEquals(5, first.getTimestamp(), 0);
}
Aggregations