use of com.hubspot.singularity.SingularityAgentUsage in project Singularity by HubSpot.
the class SingularityUsageHelper method collectAgentUsage.
public void collectAgentUsage(SingularityAgent agent, long now, Map<String, RequestUtilization> utilizationPerRequestId, Map<String, RequestUtilization> previousUtilizations, Map<SingularityAgentUsage, List<TaskIdWithUsage>> overLoadedHosts, AtomicLong totalMemBytesUsed, AtomicLong totalMemBytesAvailable, AtomicDouble totalCpuUsed, AtomicDouble totalCpuAvailable, AtomicLong totalDiskBytesUsed, AtomicLong totalDiskBytesAvailable, boolean useShortTimeout) {
Optional<Long> memoryMbTotal = Optional.empty();
Optional<Double> cpusTotal = Optional.empty();
Optional<Long> diskMbTotal = Optional.empty();
long memoryMbReserved = 0;
double cpuReserved = 0;
long diskMbReserved = 0;
long memoryBytesUsed = 0;
double cpusUsed = 0;
long diskMbUsed = 0;
try {
List<MesosTaskMonitorObject> allTaskUsage = mesosClient.getAgentResourceUsage(agent.getHost(), useShortTimeout);
MesosAgentMetricsSnapshotObject agentMetricsSnapshot = mesosClient.getAgentMetricsSnapshot(agent.getHost());
double systemMemTotalBytes = 0;
double systemMemFreeBytes = 0;
double systemLoad1Min = 0;
double systemLoad5Min = 0;
double systemLoad15Min = 0;
double diskUsed = 0;
double diskTotal = 0;
double systemCpusTotal = 0;
if (agentMetricsSnapshot != null) {
systemMemTotalBytes = agentMetricsSnapshot.getSystemMemTotalBytes();
systemMemFreeBytes = agentMetricsSnapshot.getSystemMemFreeBytes();
systemLoad1Min = agentMetricsSnapshot.getSystemLoad1Min();
systemLoad5Min = agentMetricsSnapshot.getSystemLoad5Min();
systemLoad15Min = agentMetricsSnapshot.getSystemLoad15Min();
diskUsed = agentMetricsSnapshot.getDiskUsed();
diskTotal = agentMetricsSnapshot.getDiskTotal();
systemCpusTotal = agentMetricsSnapshot.getSystemCpusTotal();
}
double systemLoad;
switch(configuration.getMesosConfiguration().getScoreUsingSystemLoad()) {
case LOAD_1:
systemLoad = systemLoad1Min;
break;
case LOAD_15:
systemLoad = systemLoad15Min;
break;
case LOAD_5:
default:
systemLoad = systemLoad5Min;
break;
}
boolean overloadedForCpu = systemCpusTotal > 0 && systemLoad / systemCpusTotal > 1.0;
boolean experiencingHighMemUsage = ((systemMemTotalBytes - systemMemFreeBytes) / systemMemTotalBytes) > configuration.getShuffleTasksWhenAgentMemoryUtilizationPercentageExceeds();
List<TaskIdWithUsage> possibleTasksToShuffle = new ArrayList<>();
Set<String> shuffleBlacklist = new HashSet<>(shuffleConfigurationManager.getShuffleBlocklist());
for (MesosTaskMonitorObject taskUsage : allTaskUsage) {
if (!taskUsage.getFrameworkId().equals(configuration.getMesosConfiguration().getFrameworkId())) {
LOG.info("Skipping task {} from other framework {}", taskUsage.getSource(), taskUsage.getFrameworkId());
continue;
}
String taskId = taskUsage.getSource();
SingularityTaskId task;
try {
task = SingularityTaskId.valueOf(taskId);
} catch (InvalidSingularityTaskIdException e) {
LOG.warn("Couldn't get SingularityTaskId for {}", taskUsage);
continue;
}
SingularityTaskUsage latestUsage = getUsage(taskUsage);
List<SingularityTaskUsage> pastTaskUsages = usageManager.getTaskUsage(task);
usageManager.saveSpecificTaskUsage(task, latestUsage);
Optional<SingularityTask> maybeTask = taskManager.getTask(task);
Optional<Resources> maybeResources = Optional.empty();
if (maybeTask.isPresent()) {
maybeResources = maybeTask.get().getTaskRequest().getPendingTask().getResources().isPresent() ? maybeTask.get().getTaskRequest().getPendingTask().getResources() : maybeTask.get().getTaskRequest().getDeploy().getResources();
if (maybeResources.isPresent()) {
Resources taskResources = maybeResources.get();
double memoryMbReservedForTask = taskResources.getMemoryMb();
double cpuReservedForTask = taskResources.getCpus();
double diskMbReservedForTask = taskResources.getDiskMb();
memoryMbReserved += memoryMbReservedForTask;
cpuReserved += cpuReservedForTask;
diskMbReserved += diskMbReservedForTask;
updateRequestUtilization(utilizationPerRequestId, previousUtilizations.get(maybeTask.get().getTaskRequest().getRequest().getId()), pastTaskUsages, latestUsage, task, memoryMbReservedForTask, cpuReservedForTask, diskMbReservedForTask);
}
}
memoryBytesUsed += latestUsage.getMemoryTotalBytes();
diskMbUsed += latestUsage.getDiskTotalBytes();
SingularityTaskCurrentUsage currentUsage = null;
if (pastTaskUsages.isEmpty()) {
Optional<SingularityTaskHistoryUpdate> maybeStartingUpdate = taskManager.getTaskHistoryUpdate(task, ExtendedTaskState.TASK_STARTING);
if (maybeStartingUpdate.isPresent()) {
long startTimestamp = maybeStartingUpdate.get().getTimestamp();
double usedCpusSinceStart = latestUsage.getCpuSeconds() / TimeUnit.MILLISECONDS.toSeconds(latestUsage.getTimestamp() - startTimestamp);
currentUsage = new SingularityTaskCurrentUsage(latestUsage.getMemoryTotalBytes(), (long) taskUsage.getStatistics().getTimestamp() * 1000, usedCpusSinceStart, latestUsage.getDiskTotalBytes());
cpusUsed += usedCpusSinceStart;
}
} else {
SingularityTaskUsage lastUsage = pastTaskUsages.get(pastTaskUsages.size() - 1);
double taskCpusUsed = ((latestUsage.getCpuSeconds() - lastUsage.getCpuSeconds()) / TimeUnit.MILLISECONDS.toSeconds(latestUsage.getTimestamp() - lastUsage.getTimestamp()));
currentUsage = new SingularityTaskCurrentUsage(latestUsage.getMemoryTotalBytes(), (long) taskUsage.getStatistics().getTimestamp() * 1000, taskCpusUsed, latestUsage.getDiskTotalBytes());
cpusUsed += taskCpusUsed;
}
if (currentUsage != null && currentUsage.getCpusUsed() > 0) {
if (isEligibleForShuffle(task, shuffleBlacklist)) {
Optional<SingularityTaskHistoryUpdate> maybeCleanupUpdate = taskManager.getTaskHistoryUpdate(task, ExtendedTaskState.TASK_CLEANING);
if (maybeCleanupUpdate.isPresent() && isTaskAlreadyCleanedUpForShuffle(maybeCleanupUpdate.get())) {
LOG.trace("Task {} already being cleaned up to spread cpu or mem usage, skipping", taskId);
} else {
if (maybeResources.isPresent()) {
possibleTasksToShuffle.add(new TaskIdWithUsage(task, maybeResources.get(), currentUsage));
}
}
}
}
}
if (!agent.getResources().isPresent() || !agent.getResources().get().getMemoryMegaBytes().isPresent() || !agent.getResources().get().getNumCpus().isPresent()) {
LOG.debug("Could not find agent or resources for agent {}", agent.getId());
} else {
memoryMbTotal = Optional.of(agent.getResources().get().getMemoryMegaBytes().get().longValue());
cpusTotal = Optional.of(agent.getResources().get().getNumCpus().get().doubleValue());
diskMbTotal = Optional.of(agent.getResources().get().getDiskSpace().get());
}
SingularityAgentUsage agentUsage = new SingularityAgentUsage(cpusUsed, cpuReserved, cpusTotal, memoryBytesUsed, memoryMbReserved, memoryMbTotal, diskMbUsed, diskMbReserved, diskMbTotal, allTaskUsage.size(), now, systemMemTotalBytes, systemMemFreeBytes, systemCpusTotal, systemLoad1Min, systemLoad5Min, systemLoad15Min, diskUsed, diskTotal);
if (overloadedForCpu || experiencingHighMemUsage) {
overLoadedHosts.put(agentUsage, possibleTasksToShuffle);
}
if (agentUsage.getMemoryBytesTotal().isPresent() && agentUsage.getCpusTotal().isPresent()) {
totalMemBytesUsed.getAndAdd((long) agentUsage.getMemoryBytesUsed());
totalCpuUsed.getAndAdd(agentUsage.getCpusUsed());
totalDiskBytesUsed.getAndAdd((long) agentUsage.getDiskBytesUsed());
totalMemBytesAvailable.getAndAdd(agentUsage.getMemoryBytesTotal().get());
totalCpuAvailable.getAndAdd(agentUsage.getCpusTotal().get());
totalDiskBytesAvailable.getAndAdd(agentUsage.getDiskBytesTotal().get());
}
LOG.debug("Saving agent {} usage {}", agent.getHost(), agentUsage);
usageManager.saveCurrentAgentUsage(new SingularityAgentUsageWithId(agentUsage, agent.getId()));
} catch (Throwable t) {
String message = String.format("Could not get agent usage for host %s", agent.getHost());
LOG.error(message, t);
exceptionNotifier.notify(message, t);
}
}
use of com.hubspot.singularity.SingularityAgentUsage in project Singularity by HubSpot.
the class SingularityUsageTest method itDoesNotShuffleBlacklistedTasks.
@Test
public void itDoesNotShuffleBlacklistedTasks() {
try {
configuration.setShuffleTasksForOverloadedAgents(true);
configuration.setMinutesBeforeNewTaskEligibleForShuffle(0);
shuffleCfgManager.addToShuffleBlocklist(requestId);
initRequest();
initFirstDeployWithResources(configuration.getMesosConfiguration().getDefaultCpus(), configuration.getMesosConfiguration().getDefaultMemory());
saveAndSchedule(requestManager.getRequest(requestId).get().getRequest().toBuilder().setInstances(Optional.of(3)));
resourceOffers(1);
SingularityAgentUsage highUsage = new SingularityAgentUsage(15, 10, Optional.of(10.0), 1, 1, Optional.of(30L), 1, 1, Optional.of(1024L), 1, System.currentTimeMillis(), 200000, 30000, 10, 15, 15, 15, 0, 107374182);
usageManager.saveCurrentAgentUsage(new SingularityAgentUsageWithId(highUsage, "host1"));
SingularityTaskId taskId1 = taskManager.getActiveTaskIds().get(0);
String t1 = taskId1.getId();
SingularityTaskId taskId2 = taskManager.getActiveTaskIds().get(1);
String t2 = taskId2.getId();
SingularityTaskId taskId3 = taskManager.getActiveTaskIds().get(2);
String t3 = taskId3.getId();
statusUpdate(taskManager.getTask(taskId1).get(), TaskState.TASK_STARTING, Optional.of(taskId1.getStartedAt()));
statusUpdate(taskManager.getTask(taskId2).get(), TaskState.TASK_STARTING, Optional.of(taskId2.getStartedAt()));
statusUpdate(taskManager.getTask(taskId3).get(), TaskState.TASK_STARTING, Optional.of(taskId3.getStartedAt()));
// task 1 using 3 cpus
MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 15, TimeUnit.MILLISECONDS.toSeconds(taskId1.getStartedAt()) + 5, 1024);
// task 2 using 2 cpus
MesosTaskMonitorObject t2u1 = getTaskMonitor(t2, 10, TimeUnit.MILLISECONDS.toSeconds(taskId2.getStartedAt()) + 5, 1024);
// task 3 using 1 cpus
MesosTaskMonitorObject t3u1 = getTaskMonitor(t3, 5, TimeUnit.MILLISECONDS.toSeconds(taskId3.getStartedAt()) + 5, 1024);
mesosClient.setAgentResourceUsage("host1", Arrays.asList(t1u1, t2u1, t3u1));
mesosClient.setAgentMetricsSnapshot("host1", new MesosAgentMetricsSnapshotObject(0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 200000, 0, 30000, 0, 0, 0, 15, 0, 0, 0, 0));
usagePoller.runActionOnPoll();
// First task is not cleaned up because it is a blacklisted request ID
Assertions.assertFalse(taskManager.getTaskCleanup(taskId1.getId()).isPresent());
// Second task is not cleaned up because it is from the same request as task 1
Assertions.assertFalse(taskManager.getTaskCleanup(taskId2.getId()).isPresent());
} finally {
configuration.setShuffleTasksForOverloadedAgents(false);
shuffleCfgManager.removeFromShuffleBlocklist(requestId);
}
}
use of com.hubspot.singularity.SingularityAgentUsage in project Singularity by HubSpot.
the class SingularityUsageTest method itCreatesTaskCleanupsWhenAMachineIsOverloadedOnMemory.
@Test
public void itCreatesTaskCleanupsWhenAMachineIsOverloadedOnMemory() {
try {
configuration.setShuffleTasksForOverloadedAgents(true);
configuration.setMinutesBeforeNewTaskEligibleForShuffle(0);
configuration.setShuffleTasksWhenAgentMemoryUtilizationPercentageExceeds(0.90);
initRequest();
initFirstDeployWithResources(configuration.getMesosConfiguration().getDefaultCpus(), configuration.getMesosConfiguration().getDefaultMemory());
saveAndSchedule(requestManager.getRequest(requestId).get().getRequest().toBuilder().setInstances(Optional.of(3)));
resourceOffers(1);
SingularityAgentUsage highUsage = new SingularityAgentUsage(10, 10, Optional.of(10.0), 1, 1, Optional.of(30L), 1, 1, Optional.of(1024L), 1, System.currentTimeMillis(), 200000, 10000, 10, 10, 10, 10, 0, 107374182);
usageManager.saveCurrentAgentUsage(new SingularityAgentUsageWithId(highUsage, "host1"));
SingularityTaskId taskId1 = taskManager.getActiveTaskIds().get(0);
String t1 = taskId1.getId();
SingularityTaskId taskId2 = taskManager.getActiveTaskIds().get(1);
String t2 = taskId2.getId();
SingularityTaskId taskId3 = taskManager.getActiveTaskIds().get(2);
String t3 = taskId3.getId();
statusUpdate(taskManager.getTask(taskId1).get(), TaskState.TASK_STARTING, Optional.of(taskId1.getStartedAt()));
statusUpdate(taskManager.getTask(taskId2).get(), TaskState.TASK_STARTING, Optional.of(taskId2.getStartedAt()));
statusUpdate(taskManager.getTask(taskId3).get(), TaskState.TASK_STARTING, Optional.of(taskId3.getStartedAt()));
// task 1 using 3G mem
MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 2, TimeUnit.MILLISECONDS.toSeconds(taskId1.getStartedAt()) + 5, 95000);
// task 2 using 2G mem
MesosTaskMonitorObject t2u1 = getTaskMonitor(t2, 5, TimeUnit.MILLISECONDS.toSeconds(taskId2.getStartedAt()) + 5, 63333);
// task 3 using 1G mem
MesosTaskMonitorObject t3u1 = getTaskMonitor(t3, 5, TimeUnit.MILLISECONDS.toSeconds(taskId3.getStartedAt()) + 5, 31667);
mesosClient.setAgentResourceUsage("host1", Arrays.asList(t1u1, t2u1, t3u1));
mesosClient.setAgentMetricsSnapshot("host1", new MesosAgentMetricsSnapshotObject(0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 200000, 0, 10000, 0, 0, 0, 10, 0, 0, 0, 0));
usagePoller.runActionOnPoll();
// First task is not cleaned up because it uses the most memory
Assertions.assertFalse(taskManager.getTaskCleanup(taskId1.getId()).isPresent());
// Third task is cleaned up because it uses the least memory
Assertions.assertEquals(TaskCleanupType.REBALANCE_MEMORY_USAGE, taskManager.getTaskCleanup(taskId3.getId()).get().getCleanupType());
// Second task is not cleaned up because it is from the same request as task 3
Assertions.assertFalse(taskManager.getTaskCleanup(taskId2.getId()).isPresent());
} finally {
configuration.setShuffleTasksForOverloadedAgents(false);
}
}
use of com.hubspot.singularity.SingularityAgentUsage in project Singularity by HubSpot.
the class SingularityUsageTest method itWillShuffleMultipleTasksIfNecessaryForMemoryShuffle.
@Test
public void itWillShuffleMultipleTasksIfNecessaryForMemoryShuffle() {
try {
configuration.setShuffleTasksForOverloadedAgents(true);
configuration.setMinutesBeforeNewTaskEligibleForShuffle(0);
configuration.setMaxTasksToShufflePerHost(2);
configuration.setMaxTasksToShuffleTotal(5);
configuration.setShuffleTasksWhenAgentMemoryUtilizationPercentageExceeds(0.90);
String t1id = "test-request-1";
String t2id = "test-request-2";
String t3id = "test-request-3";
scheduleTask(t1id, 1, 10);
scheduleTask(t2id, 1, 10);
scheduleTask(t3id, 1, 10);
sms.resourceOffers(ImmutableList.of(createOffer(10, 100000, 100000, "agent1", "host1"))).join();
System.out.println(taskManager.getActiveTaskIds());
Map<String, Map<String, SingularityTaskId>> taskIdMap = getTaskIdMapByHostByRequest();
SingularityTaskId task1 = taskIdMap.get("host1").get(t1id);
SingularityTaskId task2 = taskIdMap.get("host1").get(t2id);
SingularityTaskId task3 = taskIdMap.get("host1").get(t3id);
startTask(task1);
startTask(task2);
startTask(task3);
// not actually necessary to trigger shuffle, but worth leaving in case that changes
SingularityAgentUsage highMemUsage = new SingularityAgentUsage(1, 10, Optional.of(10.0), 1, 1, Optional.of(30L), 1, 1, Optional.of(1024L), 1, System.currentTimeMillis(), 100000, 1000, 10, 10, 10, 10, 0, 107374182);
usageManager.saveCurrentAgentUsage(new SingularityAgentUsageWithId(highMemUsage, "host1"));
MesosTaskMonitorObject t1u1 = getTaskMonitor(task1.getId(), 2, TimeUnit.MILLISECONDS.toSeconds(task1.getStartedAt()) + 5, 89000);
MesosTaskMonitorObject t2u1 = getTaskMonitor(task2.getId(), 2, TimeUnit.MILLISECONDS.toSeconds(task2.getStartedAt()) + 5, 9000);
MesosTaskMonitorObject t3u1 = getTaskMonitor(task3.getId(), 2, TimeUnit.MILLISECONDS.toSeconds(task3.getStartedAt()) + 5, 1000);
mesosClient.setAgentResourceUsage("host1", Arrays.asList(t1u1, t2u1, t3u1));
mesosClient.setAgentMetricsSnapshot("host1", new MesosAgentMetricsSnapshotObject(0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 10.0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 100000, 0, 1000, 0, 0, 0, 10, 0, 0, 0, 0));
usagePoller.runActionOnPoll();
System.out.println(taskManager.getCleanupTaskIds().toString());
// First task is not cleaned up, due to relatively high utilization.
Assertions.assertFalse(taskManager.getTaskCleanup(task1.getId()).isPresent());
// Second task is cleaned up, due to relatively low utilization.
Assertions.assertEquals(TaskCleanupType.REBALANCE_MEMORY_USAGE, taskManager.getTaskCleanup(task2.getId()).get().getCleanupType());
// Third task is also cleaned up, in order to reach desired memory utilization.
Assertions.assertEquals(TaskCleanupType.REBALANCE_MEMORY_USAGE, taskManager.getTaskCleanup(task3.getId()).get().getCleanupType());
} finally {
configuration.setShuffleTasksForOverloadedAgents(false);
}
}
use of com.hubspot.singularity.SingularityAgentUsage in project Singularity by HubSpot.
the class SingularityMesosOfferSchedulerTest method itAccountsForMaxHistoricalTaskUsage.
@Test
public void itAccountsForMaxHistoricalTaskUsage() {
initRequest();
double cpuReserved = 2;
double memMbReserved = 1000;
initFirstDeployWithResources(cpuReserved, memMbReserved);
saveAndSchedule(requestManager.getRequest(requestId).get().getRequest().toBuilder().setInstances(Optional.of(1)));
resourceOffers(3);
SingularityTaskId taskId = taskManager.getActiveTaskIds().get(0);
String t1 = taskId.getId();
// 2 cpus used
MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 10, getTimestampSeconds(taskId, 5), 1000);
mesosClient.setAgentResourceUsage("host1", Collections.singletonList(t1u1));
usagePoller.runActionOnPoll();
// 1 cpus used
MesosTaskMonitorObject t1u2 = getTaskMonitor(t1, 11, getTimestampSeconds(taskId, 6), 1000);
mesosClient.setAgentResourceUsage("host1", Collections.singletonList(t1u2));
usagePoller.runActionOnPoll();
SingularityAgentUsage smallUsage = new SingularityAgentUsage(0.1, 0.1, Optional.of(10.0), 1, 1, Optional.of(30L), 1, 1, Optional.of(1024L), 1, System.currentTimeMillis(), 1, 30000, 10, 0, 0, 0, 0, 107374182);
usageManager.saveCurrentAgentUsage(new SingularityAgentUsageWithId(smallUsage, "host1"));
usageManager.saveCurrentAgentUsage(new SingularityAgentUsageWithId(smallUsage, "host2"));
usageManager.saveCurrentAgentUsage(new SingularityAgentUsageWithId(smallUsage, "host3"));
requestResource.scale(requestId, new SingularityScaleRequest(Optional.of(3), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty()), SingularityUser.DEFAULT_USER);
Assertions.assertEquals(3.0, usageManager.getRequestUtilizations().get(requestId).getCpuUsed(), 0.001);
Offer host2Offer = createOffer(6, 30000, 107374182, "host2", "host2");
agentAndRackManager.checkOffer(host2Offer);
Offer host3Offer = createOffer(6, 30000, 107374182, "host3", "host3");
agentAndRackManager.checkOffer(host3Offer);
singularityScheduler.drainPendingQueue();
Collection<SingularityOfferHolder> offerHolders = offerScheduler.checkOffers(ImmutableMap.of(host2Offer.getId().getValue(), host2Offer, host3Offer.getId().getValue(), host3Offer), System.currentTimeMillis());
Assertions.assertEquals(2, offerHolders.size());
// A single offer should only ever get a single task even though both have room for both tasks here. Adding a task should reduce the score for the next check
for (SingularityOfferHolder offerHolder : offerHolders) {
Assertions.assertEquals(1, offerHolder.getAcceptedTasks().size());
}
}
Aggregations