use of com.hubspot.singularity.SingularityRack in project Singularity by HubSpot.
the class SingularitySlaveAndRackManager method loadSlavesAndRacksFromMaster.
public void loadSlavesAndRacksFromMaster(MesosMasterStateObject state, boolean isStartup) {
Map<String, SingularitySlave> activeSlavesById = slaveManager.getObjectsByIdForState(MachineState.ACTIVE);
Map<String, SingularityRack> activeRacksById = rackManager.getObjectsByIdForState(MachineState.ACTIVE);
Map<String, SingularityRack> remainingActiveRacks = Maps.newHashMap(activeRacksById);
int slaves = 0;
int racks = 0;
for (MesosMasterSlaveObject slaveJsonObject : state.getSlaves()) {
String slaveId = slaveJsonObject.getId();
String rackId = slaveAndRackHelper.getRackId(slaveJsonObject.getAttributes());
Map<String, String> textAttributes = slaveAndRackHelper.getTextAttributes(slaveJsonObject.getAttributes());
String host = slaveAndRackHelper.getMaybeTruncatedHost(slaveJsonObject.getHostname());
if (activeSlavesById.containsKey(slaveId)) {
SingularitySlave slave = activeSlavesById.get(slaveId);
if (slave != null && (!slave.getResources().isPresent() || !slave.getResources().get().equals(slaveJsonObject.getResources()))) {
LOG.trace("Found updated resources ({}) for slave {}", slaveJsonObject.getResources(), slave);
slaveManager.saveObject(slave.withResources(slaveJsonObject.getResources()));
}
activeSlavesById.remove(slaveId);
} else {
SingularitySlave newSlave = new SingularitySlave(slaveId, host, rackId, textAttributes, Optional.of(slaveJsonObject.getResources()));
if (check(newSlave, slaveManager) == CheckResult.NEW) {
slaves++;
}
}
if (activeRacksById.containsKey(rackId)) {
remainingActiveRacks.remove(rackId);
} else {
SingularityRack rack = new SingularityRack(rackId);
if (check(rack, rackManager) == CheckResult.NEW) {
racks++;
}
}
}
for (SingularitySlave leftOverSlave : activeSlavesById.values()) {
slaveManager.changeState(leftOverSlave, isStartup ? MachineState.MISSING_ON_STARTUP : MachineState.DEAD, Optional.absent(), Optional.absent());
}
for (SingularityRack leftOverRack : remainingActiveRacks.values()) {
rackManager.changeState(leftOverRack, isStartup ? MachineState.MISSING_ON_STARTUP : MachineState.DEAD, Optional.absent(), Optional.absent());
}
LOG.info("Found {} new racks ({} missing) and {} new slaves ({} missing)", racks, remainingActiveRacks.size(), slaves, activeSlavesById.size());
}
use of com.hubspot.singularity.SingularityRack in project Singularity by HubSpot.
the class SingularitySlaveAndRackManager method checkOffer.
public CheckResult checkOffer(Offer offer) {
final String slaveId = offer.getAgentId().getValue();
final String rackId = slaveAndRackHelper.getRackIdOrDefault(offer);
final String host = slaveAndRackHelper.getMaybeTruncatedHost(offer);
final Map<String, String> textAttributes = slaveAndRackHelper.getTextAttributes(offer);
final SingularitySlave slave = new SingularitySlave(slaveId, host, rackId, textAttributes, Optional.absent());
CheckResult result = check(slave, slaveManager);
if (result == CheckResult.NEW) {
if (inactiveSlaveManager.isInactive(slave.getHost())) {
LOG.info("Slave {} on inactive host {} attempted to rejoin. Marking as decommissioned.", slave, host);
slaveManager.changeState(slave, MachineState.STARTING_DECOMMISSION, Optional.of(String.format("Slave %s on inactive host %s attempted to rejoin cluster.", slaveId, host)), Optional.absent());
} else {
LOG.info("Offer revealed a new slave {}", slave);
}
}
final SingularityRack rack = new SingularityRack(rackId);
if (check(rack, rackManager) == CheckResult.NEW) {
LOG.info("Offer revealed a new rack {}", rack);
}
return result;
}
use of com.hubspot.singularity.SingularityRack in project Singularity by HubSpot.
the class SingularityScheduler method checkForDecomissions.
@Timed
public void checkForDecomissions() {
final long start = System.currentTimeMillis();
final Map<String, Optional<String>> requestIdsToUserToReschedule = Maps.newHashMap();
final Set<SingularityTaskId> matchingTaskIds = Sets.newHashSet();
final Collection<SingularityTaskId> activeTaskIds = leaderCache.getActiveTaskIds();
final Map<SingularitySlave, MachineState> slaves = getDefaultMap(slaveManager.getObjectsFiltered(MachineState.STARTING_DECOMMISSION));
for (SingularitySlave slave : slaves.keySet()) {
boolean foundTask = false;
for (SingularityTask activeTask : taskManager.getTasksOnSlave(activeTaskIds, slave)) {
cleanupTaskDueToDecomission(requestIdsToUserToReschedule, matchingTaskIds, activeTask, slave);
foundTask = true;
}
if (!foundTask) {
slaves.put(slave, MachineState.DECOMMISSIONED);
}
}
final Map<SingularityRack, MachineState> racks = getDefaultMap(rackManager.getObjectsFiltered(MachineState.STARTING_DECOMMISSION));
for (SingularityRack rack : racks.keySet()) {
final String sanitizedRackId = JavaUtils.getReplaceHyphensWithUnderscores(rack.getId());
boolean foundTask = false;
for (SingularityTaskId activeTaskId : activeTaskIds) {
if (sanitizedRackId.equals(activeTaskId.getSanitizedRackId())) {
foundTask = true;
}
if (matchingTaskIds.contains(activeTaskId)) {
continue;
}
if (sanitizedRackId.equals(activeTaskId.getSanitizedRackId())) {
Optional<SingularityTask> maybeTask = taskManager.getTask(activeTaskId);
cleanupTaskDueToDecomission(requestIdsToUserToReschedule, matchingTaskIds, maybeTask.get(), rack);
}
}
if (!foundTask) {
racks.put(rack, MachineState.DECOMMISSIONED);
}
}
for (Entry<String, Optional<String>> requestIdAndUser : requestIdsToUserToReschedule.entrySet()) {
final String requestId = requestIdAndUser.getKey();
LOG.trace("Rescheduling request {} due to decomissions", requestId);
Optional<String> maybeDeployId = deployManager.getInUseDeployId(requestId);
if (maybeDeployId.isPresent()) {
requestManager.addToPendingQueue(new SingularityPendingRequest(requestId, maybeDeployId.get(), start, requestIdAndUser.getValue(), PendingType.DECOMISSIONED_SLAVE_OR_RACK, Optional.<Boolean>absent(), Optional.<String>absent()));
} else {
LOG.warn("Not rescheduling a request ({}) because of no active deploy", requestId);
}
}
changeState(slaves, slaveManager);
changeState(racks, rackManager);
if (slaves.isEmpty() && racks.isEmpty() && requestIdsToUserToReschedule.isEmpty() && matchingTaskIds.isEmpty()) {
LOG.trace("Decomission check found nothing");
} else {
LOG.info("Found {} decomissioning slaves, {} decomissioning racks, rescheduling {} requests and scheduling {} tasks for cleanup in {}", slaves.size(), racks.size(), requestIdsToUserToReschedule.size(), matchingTaskIds.size(), JavaUtils.duration(start));
}
}
use of com.hubspot.singularity.SingularityRack in project Singularity by HubSpot.
the class StateManager method generateState.
public SingularityState generateState(boolean includeRequestIds) {
final int launchingTasks = taskManager.getNumLaunchingTasks();
final int activeTasks = taskManager.getNumActiveTasks() - launchingTasks;
final int scheduledTasks = taskManager.getNumScheduledTasks();
final int cleaningTasks = taskManager.getNumCleanupTasks();
final int lbCleanupTasks = taskManager.getNumLbCleanupTasks();
final int lbCleanupRequests = requestManager.getNumLbCleanupRequests();
final SingularityScheduledTasksInfo scheduledTasksInfo = SingularityScheduledTasksInfo.getInfo(taskManager.getPendingTasks(), singularityConfiguration.getDeltaAfterWhichTasksAreLateMillis());
final List<String> overProvisionedRequestIds = new ArrayList<>();
final Set<String> possiblyUnderProvisionedRequestIds = new HashSet<>();
final List<SingularityRequestWithState> requests = requestManager.getRequests();
final Map<String, Long> numInstances = getNumTasks(requests);
int numActiveRequests = 0;
int numPausedRequests = 0;
int cooldownRequests = 0;
int numFinishedRequests = 0;
for (SingularityRequestWithState requestWithState : requests) {
switch(requestWithState.getState()) {
case DEPLOYING_TO_UNPAUSE:
case ACTIVE:
numActiveRequests++;
break;
case FINISHED:
numFinishedRequests++;
break;
case PAUSED:
numPausedRequests++;
break;
case SYSTEM_COOLDOWN:
cooldownRequests++;
break;
case DELETED:
break;
}
updatePossiblyUnderProvisionedAndOverProvisionedIds(requestWithState, numInstances, overProvisionedRequestIds, possiblyUnderProvisionedRequestIds);
}
filterForPendingRequests(possiblyUnderProvisionedRequestIds);
final List<String> underProvisionedRequestIds = getUnderProvisionedRequestIds(possiblyUnderProvisionedRequestIds);
final int pendingRequests = requestManager.getSizeOfPendingQueue();
final int cleaningRequests = requestManager.getSizeOfCleanupQueue();
List<SingularityRack> racks = rackManager.getObjects();
int activeRacks = 0;
int deadRacks = 0;
int decommissioningRacks = 0;
int unknownRacks = 0;
for (SingularityRack rack : racks) {
switch(rack.getCurrentState().getState()) {
case ACTIVE:
activeRacks++;
break;
case DEAD:
deadRacks++;
break;
case MISSING_ON_STARTUP:
unknownRacks++;
break;
case DECOMMISSIONED:
case STARTING_DECOMMISSION:
case DECOMMISSIONING:
decommissioningRacks++;
break;
default:
unknownRacks++;
break;
}
}
List<SingularitySlave> slaves = slaveManager.getObjects();
int activeSlaves = 0;
int deadSlaves = 0;
int decommissioningSlaves = 0;
int unknownSlaves = 0;
for (SingularitySlave slave : slaves) {
switch(slave.getCurrentState().getState()) {
case ACTIVE:
activeSlaves++;
break;
case DEAD:
deadSlaves++;
break;
case MISSING_ON_STARTUP:
unknownSlaves++;
break;
case DECOMMISSIONED:
case STARTING_DECOMMISSION:
case DECOMMISSIONING:
decommissioningSlaves++;
break;
default:
unknownSlaves++;
break;
}
}
final List<SingularityHostState> states = getHostStates();
int numDeploys = 0;
long oldestDeploy = 0;
long oldestDeployStep = 0;
List<SingularityDeployMarker> activeDeploys = new ArrayList<>();
final long now = System.currentTimeMillis();
for (SingularityPendingDeploy pendingDeploy : deployManager.getPendingDeploys()) {
activeDeploys.add(pendingDeploy.getDeployMarker());
if (pendingDeploy.getDeployProgress().isPresent() && !pendingDeploy.getDeployProgress().get().isStepComplete()) {
long deployStepDelta = now - pendingDeploy.getDeployProgress().get().getTimestamp();
if (deployStepDelta > oldestDeployStep) {
oldestDeployStep = deployStepDelta;
}
}
long delta = now - pendingDeploy.getDeployMarker().getTimestamp();
if (delta > oldestDeploy) {
oldestDeploy = delta;
}
numDeploys++;
}
final Optional<Boolean> authDatastoreHealthy = authDatastore.isHealthy();
final Optional<Double> minimumPriorityLevel = getMinimumPriorityLevel();
return new SingularityState(activeTasks, launchingTasks, numActiveRequests, cooldownRequests, numPausedRequests, scheduledTasks, pendingRequests, lbCleanupTasks, lbCleanupRequests, cleaningRequests, activeSlaves, deadSlaves, decommissioningSlaves, activeRacks, deadRacks, decommissioningRacks, cleaningTasks, states, oldestDeploy, numDeploys, oldestDeployStep, activeDeploys, scheduledTasksInfo.getNumLateTasks(), scheduledTasksInfo.getNumFutureTasks(), scheduledTasksInfo.getMaxTaskLag(), System.currentTimeMillis(), includeRequestIds ? overProvisionedRequestIds : null, includeRequestIds ? underProvisionedRequestIds : null, overProvisionedRequestIds.size(), underProvisionedRequestIds.size(), numFinishedRequests, unknownRacks, unknownSlaves, authDatastoreHealthy, minimumPriorityLevel, statusUpdateDeltaAvg.get());
}
Aggregations