use of com.yahoo.vespa.hosted.provision.NodeRepository in project vespa by vespa-engine.
the class OperatorChangeApplicationMaintainer method nodesNeedingMaintenance.
@Override
protected List<Node> nodesNeedingMaintenance() {
Instant windowEnd = clock.instant();
Instant windowStart = previousRun;
previousRun = windowEnd;
return nodeRepository().getNodes().stream().filter(node -> node.allocation().isPresent()).filter(node -> hasManualStateChangeSince(windowStart, node)).collect(Collectors.toList());
}
use of com.yahoo.vespa.hosted.provision.NodeRepository in project vespa by vespa-engine.
the class MetricsReporter method updateDockerMetrics.
private void updateDockerMetrics(List<Node> nodes) {
// Capacity flavors for docker
DockerHostCapacity capacity = new DockerHostCapacity(nodes);
metric.set("hostedVespa.docker.totalCapacityCpu", capacity.getCapacityTotal().getCpu(), null);
metric.set("hostedVespa.docker.totalCapacityMem", capacity.getCapacityTotal().getMemory(), null);
metric.set("hostedVespa.docker.totalCapacityDisk", capacity.getCapacityTotal().getDisk(), null);
metric.set("hostedVespa.docker.freeCapacityCpu", capacity.getFreeCapacityTotal().getCpu(), null);
metric.set("hostedVespa.docker.freeCapacityMem", capacity.getFreeCapacityTotal().getMemory(), null);
metric.set("hostedVespa.docker.freeCapacityDisk", capacity.getFreeCapacityTotal().getDisk(), null);
List<Flavor> dockerFlavors = nodeRepository().getAvailableFlavors().getFlavors().stream().filter(f -> f.getType().equals(Flavor.Type.DOCKER_CONTAINER)).collect(Collectors.toList());
for (Flavor flavor : dockerFlavors) {
Metric.Context context = getContextAt("flavor", flavor.name());
metric.set("hostedVespa.docker.freeCapacityFlavor", capacity.freeCapacityInFlavorEquivalence(flavor), context);
metric.set("hostedVespa.docker.idealHeadroomFlavor", flavor.getIdealHeadroom(), context);
metric.set("hostedVespa.docker.hostsAvailableFlavor", capacity.getNofHostsAvailableFor(flavor), context);
}
}
use of com.yahoo.vespa.hosted.provision.NodeRepository in project vespa by vespa-engine.
the class NodeRetirer method retireAllocated.
void retireAllocated() {
List<Node> allNodes = nodeRepository().getNodes(NodeType.tenant);
List<ApplicationId> activeApplications = getActiveApplicationIds(allNodes);
Map<Flavor, Map<Node.State, Long>> numSpareNodesByFlavorByState = getNumberOfNodesByFlavorByNodeState(allNodes);
flavorSpareChecker.updateReadyAndActiveCountsByFlavor(numSpareNodesByFlavorByState);
// Get all the nodes that we could retire along with their deployments
Map<Deployment, Set<Node>> nodesToRetireByDeployment = new HashMap<>();
for (ApplicationId applicationId : activeApplications) {
Map<ClusterSpec.Id, Set<Node>> nodesByCluster = getNodesBelongingToApplication(allNodes, applicationId).stream().collect(Collectors.groupingBy(node -> node.allocation().get().membership().cluster().id(), Collectors.toSet()));
Map<ClusterSpec.Id, Set<Node>> retireableNodesByCluster = nodesByCluster.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, entry -> filterRetireableNodes(entry.getValue())));
if (retireableNodesByCluster.values().stream().mapToInt(Set::size).sum() == 0)
continue;
Optional<Deployment> deployment = deployer.deployFromLocalActive(applicationId);
// this will be done at another config server
if (!deployment.isPresent())
continue;
Set<Node> replaceableNodes = retireableNodesByCluster.entrySet().stream().flatMap(entry -> entry.getValue().stream().filter(node -> flavorSpareChecker.canRetireAllocatedNodeWithFlavor(node.flavor())).limit(getNumberNodesAllowToRetireForCluster(nodesByCluster.get(entry.getKey()), MAX_SIMULTANEOUS_RETIRES_PER_CLUSTER))).collect(Collectors.toSet());
if (!replaceableNodes.isEmpty())
nodesToRetireByDeployment.put(deployment.get(), replaceableNodes);
}
nodesToRetireByDeployment.forEach(((deployment, nodes) -> {
ApplicationId app = nodes.iterator().next().allocation().get().owner();
Set<Node> nodesToRetire;
// that may have changed) with wantToRetire and wantToDeprovision.
try (Mutex lock = nodeRepository().lock(app)) {
nodesToRetire = nodes.stream().map(node -> nodeRepository().getNode(node.hostname()).filter(upToDateNode -> node.state() == Node.State.active).filter(upToDateNode -> node.allocation().get().owner().equals(upToDateNode.allocation().get().owner()))).flatMap(node -> node.map(Stream::of).orElseGet(Stream::empty)).collect(Collectors.toSet());
nodesToRetire.forEach(node -> retirementPolicy.shouldRetire(node).ifPresent(reason -> {
log.info("Setting wantToRetire and wantToDeprovision for host " + node.hostname() + " with flavor " + node.flavor().name() + " allocated to " + node.allocation().get().owner() + ". Reason: " + reason);
Node updatedNode = node.with(node.status().withWantToRetire(true).withWantToDeprovision(true));
nodeRepository().write(updatedNode);
}));
}
// This takes a while, so do it outside of the application lock
if (!nodesToRetire.isEmpty()) {
try {
deployment.activate();
} catch (Exception e) {
log.log(LogLevel.INFO, "Failed to redeploy " + app.serializedForm() + ", will be redeployed later by application maintainer", e);
}
}
}));
}
Aggregations