Search in sources :

Example 1 with Deployment

use of com.yahoo.config.provision.Deployment in project vespa by vespa-engine.

the class ApplicationMaintainer method deployWithLock.

/**
 * Redeploy this application. A lock will be taken for the duration of the deployment activation
 */
final void deployWithLock(ApplicationId application) {
    // Lock is acquired with a low timeout to reduce the chance of colliding with an external deployment.
    try (Mutex lock = nodeRepository().lock(application, Duration.ofSeconds(1))) {
        // became inactive since deployment was requested
        if (!isActive(application))
            return;
        Optional<Deployment> deployment = deployer.deployFromLocalActive(application);
        // this will be done at another config server
        if (!deployment.isPresent())
            return;
        deployment.get().activate();
    } catch (RuntimeException e) {
        log.log(Level.WARNING, "Exception on maintenance redeploy", e);
    }
}
Also used : Deployment(com.yahoo.config.provision.Deployment) Mutex(com.yahoo.transaction.Mutex)

Example 2 with Deployment

use of com.yahoo.config.provision.Deployment in project vespa by vespa-engine.

the class RetiredExpirer method maintain.

@Override
protected void maintain() {
    List<Node> activeNodes = nodeRepository().getNodes(Node.State.active);
    Map<ApplicationId, List<Node>> retiredNodesByApplication = activeNodes.stream().filter(node -> node.allocation().isPresent()).filter(node -> node.allocation().get().membership().retired()).collect(Collectors.groupingBy(node -> node.allocation().get().owner()));
    for (Map.Entry<ApplicationId, List<Node>> entry : retiredNodesByApplication.entrySet()) {
        ApplicationId application = entry.getKey();
        List<Node> retiredNodes = entry.getValue();
        try {
            Optional<Deployment> deployment = deployer.deployFromLocalActive(application);
            // this will be done at another config server
            if (!deployment.isPresent())
                continue;
            List<Node> nodesToRemove = retiredNodes.stream().filter(this::canRemove).collect(Collectors.toList());
            if (nodesToRemove.isEmpty()) {
                continue;
            }
            nodeRepository().setRemovable(application, nodesToRemove);
            deployment.get().activate();
            String nodeList = nodesToRemove.stream().map(Node::hostname).collect(Collectors.joining(", "));
            log.info("Redeployed " + application + " to deactivate retired nodes: " + nodeList);
        } catch (RuntimeException e) {
            String nodeList = retiredNodes.stream().map(Node::hostname).collect(Collectors.joining(", "));
            log.log(Level.WARNING, "Exception trying to deactivate retired nodes from " + application + ": " + nodeList, e);
        }
    }
}
Also used : OrchestrationException(com.yahoo.vespa.orchestrator.OrchestrationException) Deployer(com.yahoo.config.provision.Deployer) ApplicationId(com.yahoo.config.provision.ApplicationId) Deployment(com.yahoo.config.provision.Deployment) NodeType(com.yahoo.config.provision.NodeType) Orchestrator(com.yahoo.vespa.orchestrator.Orchestrator) Node(com.yahoo.vespa.hosted.provision.Node) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) Level(java.util.logging.Level) NodeRepository(com.yahoo.vespa.hosted.provision.NodeRepository) List(java.util.List) History(com.yahoo.vespa.hosted.provision.node.History) HostName(com.yahoo.vespa.applicationmodel.HostName) Duration(java.time.Duration) Map(java.util.Map) Clock(java.time.Clock) Optional(java.util.Optional) Node(com.yahoo.vespa.hosted.provision.Node) Deployment(com.yahoo.config.provision.Deployment) List(java.util.List) ApplicationId(com.yahoo.config.provision.ApplicationId) Map(java.util.Map)

Example 3 with Deployment

use of com.yahoo.config.provision.Deployment in project vespa by vespa-engine.

the class NodeFailer method failActive.

/**
 * Called when a node should be moved to the failed state: Do that if it seems safe,
 * which is when the node repo has available capacity to replace the node (and all its tenant nodes if host).
 * Otherwise not replacing the node ensures (by Orchestrator check) that no further action will be taken.
 *
 * @return whether node was successfully failed
 */
private boolean failActive(Node node, String reason) {
    Optional<Deployment> deployment = deployer.deployFromLocalActive(node.allocation().get().owner(), Duration.ofMinutes(30));
    // this will be done at another config server
    if (!deployment.isPresent())
        return false;
    try (Mutex lock = nodeRepository().lock(node.allocation().get().owner())) {
        // If the active node that we are trying to fail is of type host, we need to successfully fail all
        // the children nodes running on it before we fail the host
        boolean allTenantNodesFailedOutSuccessfully = true;
        String reasonForChildFailure = "Failing due to parent host " + node.hostname() + " failure: " + reason;
        for (Node failingTenantNode : nodeRepository().getChildNodes(node.hostname())) {
            if (failingTenantNode.state() == Node.State.active) {
                allTenantNodesFailedOutSuccessfully &= failActive(failingTenantNode, reasonForChildFailure);
            } else {
                nodeRepository().fail(failingTenantNode.hostname(), Agent.system, reasonForChildFailure);
            }
        }
        if (!allTenantNodesFailedOutSuccessfully)
            return false;
        node = nodeRepository().fail(node.hostname(), Agent.system, reason);
        try {
            deployment.get().activate();
            return true;
        } catch (RuntimeException e) {
            // The expected reason for deployment to fail here is that there is no capacity available to redeploy.
            // In that case we should leave the node in the active state to avoid failing additional nodes.
            nodeRepository().reactivate(node.hostname(), Agent.system, "Failed to redeploy after being failed by NodeFailer");
            log.log(Level.WARNING, "Attempted to fail " + node + " for " + node.allocation().get().owner() + ", but redeploying without the node failed", e);
            return false;
        }
    }
}
Also used : Node(com.yahoo.vespa.hosted.provision.Node) Deployment(com.yahoo.config.provision.Deployment) Mutex(com.yahoo.transaction.Mutex)

Example 4 with Deployment

use of com.yahoo.config.provision.Deployment in project vespa by vespa-engine.

the class NodeRetirer method retireAllocated.

void retireAllocated() {
    List<Node> allNodes = nodeRepository().getNodes(NodeType.tenant);
    List<ApplicationId> activeApplications = getActiveApplicationIds(allNodes);
    Map<Flavor, Map<Node.State, Long>> numSpareNodesByFlavorByState = getNumberOfNodesByFlavorByNodeState(allNodes);
    flavorSpareChecker.updateReadyAndActiveCountsByFlavor(numSpareNodesByFlavorByState);
    // Get all the nodes that we could retire along with their deployments
    Map<Deployment, Set<Node>> nodesToRetireByDeployment = new HashMap<>();
    for (ApplicationId applicationId : activeApplications) {
        Map<ClusterSpec.Id, Set<Node>> nodesByCluster = getNodesBelongingToApplication(allNodes, applicationId).stream().collect(Collectors.groupingBy(node -> node.allocation().get().membership().cluster().id(), Collectors.toSet()));
        Map<ClusterSpec.Id, Set<Node>> retireableNodesByCluster = nodesByCluster.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, entry -> filterRetireableNodes(entry.getValue())));
        if (retireableNodesByCluster.values().stream().mapToInt(Set::size).sum() == 0)
            continue;
        Optional<Deployment> deployment = deployer.deployFromLocalActive(applicationId);
        // this will be done at another config server
        if (!deployment.isPresent())
            continue;
        Set<Node> replaceableNodes = retireableNodesByCluster.entrySet().stream().flatMap(entry -> entry.getValue().stream().filter(node -> flavorSpareChecker.canRetireAllocatedNodeWithFlavor(node.flavor())).limit(getNumberNodesAllowToRetireForCluster(nodesByCluster.get(entry.getKey()), MAX_SIMULTANEOUS_RETIRES_PER_CLUSTER))).collect(Collectors.toSet());
        if (!replaceableNodes.isEmpty())
            nodesToRetireByDeployment.put(deployment.get(), replaceableNodes);
    }
    nodesToRetireByDeployment.forEach(((deployment, nodes) -> {
        ApplicationId app = nodes.iterator().next().allocation().get().owner();
        Set<Node> nodesToRetire;
        // that may have changed) with wantToRetire and wantToDeprovision.
        try (Mutex lock = nodeRepository().lock(app)) {
            nodesToRetire = nodes.stream().map(node -> nodeRepository().getNode(node.hostname()).filter(upToDateNode -> node.state() == Node.State.active).filter(upToDateNode -> node.allocation().get().owner().equals(upToDateNode.allocation().get().owner()))).flatMap(node -> node.map(Stream::of).orElseGet(Stream::empty)).collect(Collectors.toSet());
            nodesToRetire.forEach(node -> retirementPolicy.shouldRetire(node).ifPresent(reason -> {
                log.info("Setting wantToRetire and wantToDeprovision for host " + node.hostname() + " with flavor " + node.flavor().name() + " allocated to " + node.allocation().get().owner() + ". Reason: " + reason);
                Node updatedNode = node.with(node.status().withWantToRetire(true).withWantToDeprovision(true));
                nodeRepository().write(updatedNode);
            }));
        }
        // This takes a while, so do it outside of the application lock
        if (!nodesToRetire.isEmpty()) {
            try {
                deployment.activate();
            } catch (Exception e) {
                log.log(LogLevel.INFO, "Failed to redeploy " + app.serializedForm() + ", will be redeployed later by application maintainer", e);
            }
        }
    }));
}
Also used : Deployer(com.yahoo.config.provision.Deployer) FlavorSpareChecker(com.yahoo.vespa.hosted.provision.provisioning.FlavorSpareChecker) RetirementPolicy(com.yahoo.vespa.hosted.provision.maintenance.retire.RetirementPolicy) Iterator(java.util.Iterator) ApplicationId(com.yahoo.config.provision.ApplicationId) Deployment(com.yahoo.config.provision.Deployment) NodeType(com.yahoo.config.provision.NodeType) Collection(java.util.Collection) ClusterSpec(com.yahoo.config.provision.ClusterSpec) Set(java.util.Set) HashMap(java.util.HashMap) Node(com.yahoo.vespa.hosted.provision.Node) Logger(java.util.logging.Logger) Collectors(java.util.stream.Collectors) NodeRepository(com.yahoo.vespa.hosted.provision.NodeRepository) Mutex(com.yahoo.transaction.Mutex) List(java.util.List) Stream(java.util.stream.Stream) Agent(com.yahoo.vespa.hosted.provision.node.Agent) Flavor(com.yahoo.config.provision.Flavor) Duration(java.time.Duration) Map(java.util.Map) LogLevel(com.yahoo.log.LogLevel) Optional(java.util.Optional) Set(java.util.Set) HashMap(java.util.HashMap) Node(com.yahoo.vespa.hosted.provision.Node) Deployment(com.yahoo.config.provision.Deployment) Mutex(com.yahoo.transaction.Mutex) Flavor(com.yahoo.config.provision.Flavor) Stream(java.util.stream.Stream) ApplicationId(com.yahoo.config.provision.ApplicationId) ApplicationId(com.yahoo.config.provision.ApplicationId) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

Deployment (com.yahoo.config.provision.Deployment)4 Mutex (com.yahoo.transaction.Mutex)3 Node (com.yahoo.vespa.hosted.provision.Node)3 ApplicationId (com.yahoo.config.provision.ApplicationId)2 Deployer (com.yahoo.config.provision.Deployer)2 NodeType (com.yahoo.config.provision.NodeType)2 NodeRepository (com.yahoo.vespa.hosted.provision.NodeRepository)2 Duration (java.time.Duration)2 List (java.util.List)2 Map (java.util.Map)2 Optional (java.util.Optional)2 Collectors (java.util.stream.Collectors)2 ClusterSpec (com.yahoo.config.provision.ClusterSpec)1 Flavor (com.yahoo.config.provision.Flavor)1 LogLevel (com.yahoo.log.LogLevel)1 HostName (com.yahoo.vespa.applicationmodel.HostName)1 RetirementPolicy (com.yahoo.vespa.hosted.provision.maintenance.retire.RetirementPolicy)1 Agent (com.yahoo.vespa.hosted.provision.node.Agent)1 History (com.yahoo.vespa.hosted.provision.node.History)1 FlavorSpareChecker (com.yahoo.vespa.hosted.provision.provisioning.FlavorSpareChecker)1