use of com.yahoo.transaction.Mutex in project vespa by vespa-engine.
the class NodeRepository method setRemovable.
/**
* Sets a list of nodes to have their allocation removable (active to inactive) in the node repository.
*
* @param application the application the nodes belong to
* @param nodes the nodes to make removable. These nodes MUST be in the active state.
*/
public void setRemovable(ApplicationId application, List<Node> nodes) {
try (Mutex lock = lock(application)) {
List<Node> removableNodes = nodes.stream().map(node -> node.with(node.allocation().get().removable())).collect(Collectors.toList());
write(removableNodes);
}
}
use of com.yahoo.transaction.Mutex in project vespa by vespa-engine.
the class NodeFailer method failActive.
/**
* Called when a node should be moved to the failed state: Do that if it seems safe,
* which is when the node repo has available capacity to replace the node (and all its tenant nodes if host).
* Otherwise not replacing the node ensures (by Orchestrator check) that no further action will be taken.
*
* @return whether node was successfully failed
*/
private boolean failActive(Node node, String reason) {
Optional<Deployment> deployment = deployer.deployFromLocalActive(node.allocation().get().owner(), Duration.ofMinutes(30));
// this will be done at another config server
if (!deployment.isPresent())
return false;
try (Mutex lock = nodeRepository().lock(node.allocation().get().owner())) {
// If the active node that we are trying to fail is of type host, we need to successfully fail all
// the children nodes running on it before we fail the host
boolean allTenantNodesFailedOutSuccessfully = true;
String reasonForChildFailure = "Failing due to parent host " + node.hostname() + " failure: " + reason;
for (Node failingTenantNode : nodeRepository().getChildNodes(node.hostname())) {
if (failingTenantNode.state() == Node.State.active) {
allTenantNodesFailedOutSuccessfully &= failActive(failingTenantNode, reasonForChildFailure);
} else {
nodeRepository().fail(failingTenantNode.hostname(), Agent.system, reasonForChildFailure);
}
}
if (!allTenantNodesFailedOutSuccessfully)
return false;
node = nodeRepository().fail(node.hostname(), Agent.system, reason);
try {
deployment.get().activate();
return true;
} catch (RuntimeException e) {
// The expected reason for deployment to fail here is that there is no capacity available to redeploy.
// In that case we should leave the node in the active state to avoid failing additional nodes.
nodeRepository().reactivate(node.hostname(), Agent.system, "Failed to redeploy after being failed by NodeFailer");
log.log(Level.WARNING, "Attempted to fail " + node + " for " + node.allocation().get().owner() + ", but redeploying without the node failed", e);
return false;
}
}
}
use of com.yahoo.transaction.Mutex in project vespa by vespa-engine.
the class NodeFailer method clearDownRecord.
private void clearDownRecord(Node node) {
if (!node.history().event(History.Event.Type.down).isPresent())
return;
try (Mutex lock = nodeRepository().lock(node.allocation().get().owner())) {
// re-get inside lock
node = nodeRepository().getNode(node.hostname(), Node.State.active).get();
nodeRepository().write(node.up());
}
}
use of com.yahoo.transaction.Mutex in project vespa by vespa-engine.
the class GroupPreparer method prepare.
/**
* Ensure sufficient nodes are reserved or active for the given application, group and cluster
*
* @param application the application we are allocating to
* @param cluster the cluster and group we are allocating to
* @param requestedNodes a specification of the requested nodes
* @param surplusActiveNodes currently active nodes which are available to be assigned to this group.
* This method will remove from this list if it finds it needs additional nodes
* @param highestIndex the current highest node index among all active nodes in this cluster.
* This method will increase this number when it allocates new nodes to the cluster.
* @param spareCount The number of spare docker hosts we want when dynamically allocate docker containers
* @return the list of nodes this cluster group will have allocated if activated
*/
// Note: This operation may make persisted changes to the set of reserved and inactive nodes,
// but it may not change the set of active nodes, as the active nodes must stay in sync with the
// active config model which is changed on activate
public List<Node> prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, List<Node> surplusActiveNodes, MutableInteger highestIndex, int spareCount) {
try (Mutex lock = nodeRepository.lock(application)) {
// Lock ready pool to ensure that ready nodes are not simultaneously grabbed by others
try (Mutex readyLock = nodeRepository.lockUnallocated()) {
// Create a prioritized set of nodes
NodePrioritizer prioritizer = new NodePrioritizer(nodeRepository.getNodes(), application, cluster, requestedNodes, nodeRepository.getAvailableFlavors(), spareCount, nodeRepository.nameResolver());
prioritizer.addApplicationNodes();
prioritizer.addSurplusNodes(surplusActiveNodes);
prioritizer.addReadyNodes();
prioritizer.addNewDockerNodes();
// Allocate from the prioritized list
NodeAllocation allocation = new NodeAllocation(application, cluster, requestedNodes, highestIndex, nodeRepository);
allocation.offer(prioritizer.prioritize());
if (!allocation.fullfilled())
throw new OutOfCapacityException("Could not satisfy " + requestedNodes + " for " + cluster + outOfCapacityDetails(allocation));
// Extend reservation for already reserved nodes
nodeRepository.reserve(nodeRepository.getNodes(application, Node.State.reserved));
// Carry out and return allocation
nodeRepository.reserve(allocation.reservableNodes());
nodeRepository.addDockerNodes(allocation.newNodes());
surplusActiveNodes.removeAll(allocation.surplusNodes());
return allocation.finalNodes(surplusActiveNodes);
}
}
}
use of com.yahoo.transaction.Mutex in project vespa by vespa-engine.
the class Activator method activate.
/**
* Add operations to activates nodes for an application to the given transaction.
* The operations are not effective until the transaction is committed.
* <p>
* Pre condition: The application has a possibly empty set of nodes in each of reserved and active.
* <p>
* Post condition: Nodes in reserved which are present in <code>hosts</code> are moved to active.
* Nodes in active which are not present in <code>hosts</code> are moved to inactive.
*
* @param transaction Transaction with operations to commit together with any operations done within the repository.
* @param application the application to allocate nodes for
* @param hosts the hosts to make the set of active nodes of this
*/
public void activate(ApplicationId application, Collection<HostSpec> hosts, NestedTransaction transaction) {
try (Mutex lock = nodeRepository.lock(application)) {
Set<String> hostnames = hosts.stream().map(HostSpec::hostname).collect(Collectors.toSet());
List<Node> reserved = nodeRepository.getNodes(application, Node.State.reserved);
List<Node> reservedToActivate = retainHostsInList(hostnames, reserved);
List<Node> active = nodeRepository.getNodes(application, Node.State.active);
List<Node> continuedActive = retainHostsInList(hostnames, active);
List<Node> allActive = new ArrayList<>(continuedActive);
allActive.addAll(reservedToActivate);
if (!containsAll(hostnames, allActive))
throw new IllegalArgumentException("Activation of " + application + " failed. " + "Could not find all requested hosts." + "\nRequested: " + hosts + "\nReserved: " + toHostNames(reserved) + "\nActive: " + toHostNames(active) + "\nThis might happen if the time from reserving host to activation takes " + "longer time than reservation expiry (the hosts will then no longer be reserved)");
List<Node> activeToRemove = removeHostsFromList(hostnames, active);
// only active nodes can be retired
activeToRemove = activeToRemove.stream().map(Node::unretire).collect(Collectors.toList());
nodeRepository.deactivate(activeToRemove, transaction);
// update active with any changes
nodeRepository.activate(updateFrom(hosts, continuedActive), transaction);
nodeRepository.activate(reservedToActivate, transaction);
}
}
Aggregations