Search in sources :

Example 6 with Mutex

use of com.yahoo.transaction.Mutex in project vespa by vespa-engine.

the class NodeRepository method setRemovable.

/**
 * Sets a list of nodes to have their allocation removable (active to inactive) in the node repository.
 *
 * @param application the application the nodes belong to
 * @param nodes the nodes to make removable. These nodes MUST be in the active state.
 */
public void setRemovable(ApplicationId application, List<Node> nodes) {
    try (Mutex lock = lock(application)) {
        List<Node> removableNodes = nodes.stream().map(node -> node.with(node.allocation().get().removable())).collect(Collectors.toList());
        write(removableNodes);
    }
}
Also used : Arrays(java.util.Arrays) ApplicationId(com.yahoo.config.provision.ApplicationId) Inject(com.google.inject.Inject) NodeRepositoryConfig(com.yahoo.config.provisioning.NodeRepositoryConfig) UnaryOperator(java.util.function.UnaryOperator) NameResolver(com.yahoo.vespa.hosted.provision.persistence.NameResolver) TreeSet(java.util.TreeSet) Curator(com.yahoo.vespa.curator.Curator) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) NodeListFilter(com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter) NestedTransaction(com.yahoo.transaction.NestedTransaction) Duration(java.time.Duration) Map(java.util.Map) CuratorDatabaseClient(com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient) NodeAcl(com.yahoo.vespa.hosted.provision.node.NodeAcl) NodeFilter(com.yahoo.vespa.hosted.provision.node.filter.NodeFilter) NodeFlavors(com.yahoo.config.provision.NodeFlavors) ImmutableSet(com.google.common.collect.ImmutableSet) NotFoundException(com.yahoo.vespa.hosted.provision.restapi.v2.NotFoundException) NodeType(com.yahoo.config.provision.NodeType) Set(java.util.Set) DnsNameResolver(com.yahoo.vespa.hosted.provision.persistence.DnsNameResolver) StateFilter(com.yahoo.vespa.hosted.provision.node.filter.StateFilter) Collectors(java.util.stream.Collectors) PeriodicApplicationMaintainer(com.yahoo.vespa.hosted.provision.maintenance.PeriodicApplicationMaintainer) Mutex(com.yahoo.transaction.Mutex) List(java.util.List) Agent(com.yahoo.vespa.hosted.provision.node.Agent) Flavor(com.yahoo.config.provision.Flavor) DockerImage(com.yahoo.config.provision.DockerImage) Zone(com.yahoo.config.provision.Zone) AbstractComponent(com.yahoo.component.AbstractComponent) Clock(java.time.Clock) Optional(java.util.Optional) Comparator(java.util.Comparator) Collections(java.util.Collections) ListMap(com.yahoo.collections.ListMap) Mutex(com.yahoo.transaction.Mutex)

Example 7 with Mutex

use of com.yahoo.transaction.Mutex in project vespa by vespa-engine.

the class NodeFailer method failActive.

/**
 * Called when a node should be moved to the failed state: Do that if it seems safe,
 * which is when the node repo has available capacity to replace the node (and all its tenant nodes if host).
 * Otherwise not replacing the node ensures (by Orchestrator check) that no further action will be taken.
 *
 * @return whether node was successfully failed
 */
private boolean failActive(Node node, String reason) {
    Optional<Deployment> deployment = deployer.deployFromLocalActive(node.allocation().get().owner(), Duration.ofMinutes(30));
    // this will be done at another config server
    if (!deployment.isPresent())
        return false;
    try (Mutex lock = nodeRepository().lock(node.allocation().get().owner())) {
        // If the active node that we are trying to fail is of type host, we need to successfully fail all
        // the children nodes running on it before we fail the host
        boolean allTenantNodesFailedOutSuccessfully = true;
        String reasonForChildFailure = "Failing due to parent host " + node.hostname() + " failure: " + reason;
        for (Node failingTenantNode : nodeRepository().getChildNodes(node.hostname())) {
            if (failingTenantNode.state() == Node.State.active) {
                allTenantNodesFailedOutSuccessfully &= failActive(failingTenantNode, reasonForChildFailure);
            } else {
                nodeRepository().fail(failingTenantNode.hostname(), Agent.system, reasonForChildFailure);
            }
        }
        if (!allTenantNodesFailedOutSuccessfully)
            return false;
        node = nodeRepository().fail(node.hostname(), Agent.system, reason);
        try {
            deployment.get().activate();
            return true;
        } catch (RuntimeException e) {
            // The expected reason for deployment to fail here is that there is no capacity available to redeploy.
            // In that case we should leave the node in the active state to avoid failing additional nodes.
            nodeRepository().reactivate(node.hostname(), Agent.system, "Failed to redeploy after being failed by NodeFailer");
            log.log(Level.WARNING, "Attempted to fail " + node + " for " + node.allocation().get().owner() + ", but redeploying without the node failed", e);
            return false;
        }
    }
}
Also used : Node(com.yahoo.vespa.hosted.provision.Node) Deployment(com.yahoo.config.provision.Deployment) Mutex(com.yahoo.transaction.Mutex)

Example 8 with Mutex

use of com.yahoo.transaction.Mutex in project vespa by vespa-engine.

the class NodeFailer method clearDownRecord.

private void clearDownRecord(Node node) {
    if (!node.history().event(History.Event.Type.down).isPresent())
        return;
    try (Mutex lock = nodeRepository().lock(node.allocation().get().owner())) {
        // re-get inside lock
        node = nodeRepository().getNode(node.hostname(), Node.State.active).get();
        nodeRepository().write(node.up());
    }
}
Also used : Mutex(com.yahoo.transaction.Mutex)

Example 9 with Mutex

use of com.yahoo.transaction.Mutex in project vespa by vespa-engine.

the class GroupPreparer method prepare.

/**
 * Ensure sufficient nodes are reserved or active for the given application, group and cluster
 *
 * @param application        the application we are allocating to
 * @param cluster            the cluster and group we are allocating to
 * @param requestedNodes     a specification of the requested nodes
 * @param surplusActiveNodes currently active nodes which are available to be assigned to this group.
 *                           This method will remove from this list if it finds it needs additional nodes
 * @param highestIndex       the current highest node index among all active nodes in this cluster.
 *                           This method will increase this number when it allocates new nodes to the cluster.
 * @param spareCount         The number of spare docker hosts we want when dynamically allocate docker containers
 * @return the list of nodes this cluster group will have allocated if activated
 */
// Note: This operation may make persisted changes to the set of reserved and inactive nodes,
// but it may not change the set of active nodes, as the active nodes must stay in sync with the
// active config model which is changed on activate
public List<Node> prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, List<Node> surplusActiveNodes, MutableInteger highestIndex, int spareCount) {
    try (Mutex lock = nodeRepository.lock(application)) {
        // Lock ready pool to ensure that ready nodes are not simultaneously grabbed by others
        try (Mutex readyLock = nodeRepository.lockUnallocated()) {
            // Create a prioritized set of nodes
            NodePrioritizer prioritizer = new NodePrioritizer(nodeRepository.getNodes(), application, cluster, requestedNodes, nodeRepository.getAvailableFlavors(), spareCount, nodeRepository.nameResolver());
            prioritizer.addApplicationNodes();
            prioritizer.addSurplusNodes(surplusActiveNodes);
            prioritizer.addReadyNodes();
            prioritizer.addNewDockerNodes();
            // Allocate from the prioritized list
            NodeAllocation allocation = new NodeAllocation(application, cluster, requestedNodes, highestIndex, nodeRepository);
            allocation.offer(prioritizer.prioritize());
            if (!allocation.fullfilled())
                throw new OutOfCapacityException("Could not satisfy " + requestedNodes + " for " + cluster + outOfCapacityDetails(allocation));
            // Extend reservation for already reserved nodes
            nodeRepository.reserve(nodeRepository.getNodes(application, Node.State.reserved));
            // Carry out and return allocation
            nodeRepository.reserve(allocation.reservableNodes());
            nodeRepository.addDockerNodes(allocation.newNodes());
            surplusActiveNodes.removeAll(allocation.surplusNodes());
            return allocation.finalNodes(surplusActiveNodes);
        }
    }
}
Also used : OutOfCapacityException(com.yahoo.config.provision.OutOfCapacityException) Mutex(com.yahoo.transaction.Mutex)

Example 10 with Mutex

use of com.yahoo.transaction.Mutex in project vespa by vespa-engine.

the class Activator method activate.

/**
 * Add operations to activates nodes for an application to the given transaction.
 * The operations are not effective until the transaction is committed.
 * <p>
 * Pre condition: The application has a possibly empty set of nodes in each of reserved and active.
 * <p>
 * Post condition: Nodes in reserved which are present in <code>hosts</code> are moved to active.
 * Nodes in active which are not present in <code>hosts</code> are moved to inactive.
 *
 * @param transaction Transaction with operations to commit together with any operations done within the repository.
 * @param application the application to allocate nodes for
 * @param hosts the hosts to make the set of active nodes of this
 */
public void activate(ApplicationId application, Collection<HostSpec> hosts, NestedTransaction transaction) {
    try (Mutex lock = nodeRepository.lock(application)) {
        Set<String> hostnames = hosts.stream().map(HostSpec::hostname).collect(Collectors.toSet());
        List<Node> reserved = nodeRepository.getNodes(application, Node.State.reserved);
        List<Node> reservedToActivate = retainHostsInList(hostnames, reserved);
        List<Node> active = nodeRepository.getNodes(application, Node.State.active);
        List<Node> continuedActive = retainHostsInList(hostnames, active);
        List<Node> allActive = new ArrayList<>(continuedActive);
        allActive.addAll(reservedToActivate);
        if (!containsAll(hostnames, allActive))
            throw new IllegalArgumentException("Activation of " + application + " failed. " + "Could not find all requested hosts." + "\nRequested: " + hosts + "\nReserved: " + toHostNames(reserved) + "\nActive: " + toHostNames(active) + "\nThis might happen if the time from reserving host to activation takes " + "longer time than reservation expiry (the hosts will then no longer be reserved)");
        List<Node> activeToRemove = removeHostsFromList(hostnames, active);
        // only active nodes can be retired
        activeToRemove = activeToRemove.stream().map(Node::unretire).collect(Collectors.toList());
        nodeRepository.deactivate(activeToRemove, transaction);
        // update active with any changes
        nodeRepository.activate(updateFrom(hosts, continuedActive), transaction);
        nodeRepository.activate(reservedToActivate, transaction);
    }
}
Also used : Node(com.yahoo.vespa.hosted.provision.Node) ArrayList(java.util.ArrayList) Mutex(com.yahoo.transaction.Mutex)

Aggregations

Mutex (com.yahoo.transaction.Mutex)11 Node (com.yahoo.vespa.hosted.provision.Node)5 ApplicationId (com.yahoo.config.provision.ApplicationId)4 Deployment (com.yahoo.config.provision.Deployment)4 List (java.util.List)4 Map (java.util.Map)4 Flavor (com.yahoo.config.provision.Flavor)3 NodeType (com.yahoo.config.provision.NodeType)3 Agent (com.yahoo.vespa.hosted.provision.node.Agent)3 Duration (java.time.Duration)3 ArrayList (java.util.ArrayList)3 Optional (java.util.Optional)3 Set (java.util.Set)3 Collectors (java.util.stream.Collectors)3 ListMap (com.yahoo.collections.ListMap)2 ClusterSpec (com.yahoo.config.provision.ClusterSpec)2 Deployer (com.yahoo.config.provision.Deployer)2 LogLevel (com.yahoo.log.LogLevel)2 NodeRepository (com.yahoo.vespa.hosted.provision.NodeRepository)2 RetirementPolicy (com.yahoo.vespa.hosted.provision.maintenance.retire.RetirementPolicy)2