Search in sources :

Example 1 with Deployment

use of in project vespa by vespa-engine.

the class ApplicationMaintainer method deployWithLock.

 * Redeploy this application. A lock will be taken for the duration of the deployment activation
final void deployWithLock(ApplicationId application) {
    // Lock is acquired with a low timeout to reduce the chance of colliding with an external deployment.
    try (Mutex lock = nodeRepository().lock(application, Duration.ofSeconds(1))) {
        // became inactive since deployment was requested
        if (!isActive(application))
        Optional<Deployment> deployment = deployer.deployFromLocalActive(application);
        // this will be done at another config server
        if (!deployment.isPresent())
    } catch (RuntimeException e) {
        log.log(Level.WARNING, "Exception on maintenance redeploy", e);
Also used : Deployment( Mutex(

Example 2 with Deployment

use of in project vespa by vespa-engine.

the class RetiredExpirer method maintain.

protected void maintain() {
    List<Node> activeNodes = nodeRepository().getNodes(;
    Map<ApplicationId, List<Node>> retiredNodesByApplication = -> node.allocation().isPresent()).filter(node -> node.allocation().get().membership().retired()).collect(Collectors.groupingBy(node -> node.allocation().get().owner()));
    for (Map.Entry<ApplicationId, List<Node>> entry : retiredNodesByApplication.entrySet()) {
        ApplicationId application = entry.getKey();
        List<Node> retiredNodes = entry.getValue();
        try {
            Optional<Deployment> deployment = deployer.deployFromLocalActive(application);
            // this will be done at another config server
            if (!deployment.isPresent())
            List<Node> nodesToRemove =;
            if (nodesToRemove.isEmpty()) {
            nodeRepository().setRemovable(application, nodesToRemove);
            String nodeList =", "));
  "Redeployed " + application + " to deactivate retired nodes: " + nodeList);
        } catch (RuntimeException e) {
            String nodeList =", "));
            log.log(Level.WARNING, "Exception trying to deactivate retired nodes from " + application + ": " + nodeList, e);
Also used : OrchestrationException( Deployer( ApplicationId( Deployment( NodeType( Orchestrator( Node( Instant(java.time.Instant) Collectors( Level(java.util.logging.Level) NodeRepository( List(java.util.List) History( HostName( Duration(java.time.Duration) Map(java.util.Map) Clock(java.time.Clock) Optional(java.util.Optional) Node( Deployment( List(java.util.List) ApplicationId( Map(java.util.Map)

Example 3 with Deployment

use of in project vespa by vespa-engine.

the class NodeFailer method failActive.

 * Called when a node should be moved to the failed state: Do that if it seems safe,
 * which is when the node repo has available capacity to replace the node (and all its tenant nodes if host).
 * Otherwise not replacing the node ensures (by Orchestrator check) that no further action will be taken.
 * @return whether node was successfully failed
private boolean failActive(Node node, String reason) {
    Optional<Deployment> deployment = deployer.deployFromLocalActive(node.allocation().get().owner(), Duration.ofMinutes(30));
    // this will be done at another config server
    if (!deployment.isPresent())
        return false;
    try (Mutex lock = nodeRepository().lock(node.allocation().get().owner())) {
        // If the active node that we are trying to fail is of type host, we need to successfully fail all
        // the children nodes running on it before we fail the host
        boolean allTenantNodesFailedOutSuccessfully = true;
        String reasonForChildFailure = "Failing due to parent host " + node.hostname() + " failure: " + reason;
        for (Node failingTenantNode : nodeRepository().getChildNodes(node.hostname())) {
            if (failingTenantNode.state() == {
                allTenantNodesFailedOutSuccessfully &= failActive(failingTenantNode, reasonForChildFailure);
            } else {
                nodeRepository().fail(failingTenantNode.hostname(), Agent.system, reasonForChildFailure);
        if (!allTenantNodesFailedOutSuccessfully)
            return false;
        node = nodeRepository().fail(node.hostname(), Agent.system, reason);
        try {
            return true;
        } catch (RuntimeException e) {
            // The expected reason for deployment to fail here is that there is no capacity available to redeploy.
            // In that case we should leave the node in the active state to avoid failing additional nodes.
            nodeRepository().reactivate(node.hostname(), Agent.system, "Failed to redeploy after being failed by NodeFailer");
            log.log(Level.WARNING, "Attempted to fail " + node + " for " + node.allocation().get().owner() + ", but redeploying without the node failed", e);
            return false;
Also used : Node( Deployment( Mutex(

Example 4 with Deployment

use of in project vespa by vespa-engine.

the class NodeRetirer method retireAllocated.

void retireAllocated() {
    List<Node> allNodes = nodeRepository().getNodes(NodeType.tenant);
    List<ApplicationId> activeApplications = getActiveApplicationIds(allNodes);
    Map<Flavor, Map<Node.State, Long>> numSpareNodesByFlavorByState = getNumberOfNodesByFlavorByNodeState(allNodes);
    // Get all the nodes that we could retire along with their deployments
    Map<Deployment, Set<Node>> nodesToRetireByDeployment = new HashMap<>();
    for (ApplicationId applicationId : activeApplications) {
        Map<ClusterSpec.Id, Set<Node>> nodesByCluster = getNodesBelongingToApplication(allNodes, applicationId).stream().collect(Collectors.groupingBy(node -> node.allocation().get().membership().cluster().id(), Collectors.toSet()));
        Map<ClusterSpec.Id, Set<Node>> retireableNodesByCluster = nodesByCluster.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, entry -> filterRetireableNodes(entry.getValue())));
        if (retireableNodesByCluster.values().stream().mapToInt(Set::size).sum() == 0)
        Optional<Deployment> deployment = deployer.deployFromLocalActive(applicationId);
        // this will be done at another config server
        if (!deployment.isPresent())
        Set<Node> replaceableNodes = retireableNodesByCluster.entrySet().stream().flatMap(entry -> entry.getValue().stream().filter(node -> flavorSpareChecker.canRetireAllocatedNodeWithFlavor(node.flavor())).limit(getNumberNodesAllowToRetireForCluster(nodesByCluster.get(entry.getKey()), MAX_SIMULTANEOUS_RETIRES_PER_CLUSTER))).collect(Collectors.toSet());
        if (!replaceableNodes.isEmpty())
            nodesToRetireByDeployment.put(deployment.get(), replaceableNodes);
    nodesToRetireByDeployment.forEach(((deployment, nodes) -> {
        ApplicationId app = nodes.iterator().next().allocation().get().owner();
        Set<Node> nodesToRetire;
        // that may have changed) with wantToRetire and wantToDeprovision.
        try (Mutex lock = nodeRepository().lock(app)) {
            nodesToRetire = -> nodeRepository().getNode(node.hostname()).filter(upToDateNode -> node.state() == -> node.allocation().get().owner().equals(upToDateNode.allocation().get().owner()))).flatMap(node ->;
            nodesToRetire.forEach(node -> retirementPolicy.shouldRetire(node).ifPresent(reason -> {
      "Setting wantToRetire and wantToDeprovision for host " + node.hostname() + " with flavor " + node.flavor().name() + " allocated to " + node.allocation().get().owner() + ". Reason: " + reason);
                Node updatedNode = node.with(node.status().withWantToRetire(true).withWantToDeprovision(true));
        // This takes a while, so do it outside of the application lock
        if (!nodesToRetire.isEmpty()) {
            try {
            } catch (Exception e) {
                log.log(LogLevel.INFO, "Failed to redeploy " + app.serializedForm() + ", will be redeployed later by application maintainer", e);
Also used : Deployer( FlavorSpareChecker( RetirementPolicy( Iterator(java.util.Iterator) ApplicationId( Deployment( NodeType( Collection(java.util.Collection) ClusterSpec( Set(java.util.Set) HashMap(java.util.HashMap) Node( Logger(java.util.logging.Logger) Collectors( NodeRepository( Mutex( List(java.util.List) Stream( Agent( Flavor( Duration(java.time.Duration) Map(java.util.Map) LogLevel( Optional(java.util.Optional) Set(java.util.Set) HashMap(java.util.HashMap) Node( Deployment( Mutex( Flavor( Stream( ApplicationId( ApplicationId( HashMap(java.util.HashMap) Map(java.util.Map)


Deployment ( Mutex ( Node ( ApplicationId ( Deployer ( NodeType ( NodeRepository ( Duration (java.time.Duration)2 List (java.util.List)2 Map (java.util.Map)2 Optional (java.util.Optional)2 Collectors ( ClusterSpec ( Flavor ( LogLevel ( HostName ( RetirementPolicy ( Agent ( History ( FlavorSpareChecker (