use of com.yahoo.vespa.orchestrator.OrchestrationException in project vespa by vespa-engine.
the class RetiredExpirer method canRemove.
/**
* Checks if the node can be removed:
* if the node is a docker host, it will only be removed if it has no children,
* or all its children are parked or failed
* Otherwise, a removal is allowed if either of these are true:
* - The node has been in state {@link History.Event.Type#retired} for longer than {@link #retiredExpiry}
* - Orchestrator allows it
*/
private boolean canRemove(Node node) {
if (node.type().isDockerHost()) {
return nodeRepository().getChildNodes(node.hostname()).stream().allMatch(child -> child.state() == Node.State.parked || child.state() == Node.State.failed);
}
Optional<Instant> timeOfRetiredEvent = node.history().event(History.Event.Type.retired).map(History.Event::at);
Optional<Instant> retireAfter = timeOfRetiredEvent.map(retiredEvent -> retiredEvent.plus(retiredExpiry));
boolean shouldRetireNowBecauseExpried = retireAfter.map(time -> time.isBefore(clock.instant())).orElse(false);
if (shouldRetireNowBecauseExpried) {
return true;
}
try {
orchestrator.acquirePermissionToRemove(new HostName(node.hostname()));
return true;
} catch (OrchestrationException e) {
log.info("Did not get permission to remove retired " + node + ": " + e.getMessage());
return false;
}
}
use of com.yahoo.vespa.orchestrator.OrchestrationException in project vespa by vespa-engine.
the class RetiredExpirerTest method ensure_early_inactivation.
@Test
public void ensure_early_inactivation() throws OrchestrationException {
createReadyNodes(7, nodeRepository, nodeFlavors);
createHostNodes(4, nodeRepository, nodeFlavors);
ApplicationId applicationId = ApplicationId.from(TenantName.from("foo"), ApplicationName.from("bar"), InstanceName.from("fuz"));
// Allocate content cluster of sizes 7 -> 2 -> 3:
// Should end up with 3 nodes in the cluster (one previously retired), and 4 retired
ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("test"), Version.fromString("6.42"), false);
int wantedNodes;
activate(applicationId, cluster, wantedNodes = 7, 1, provisioner);
activate(applicationId, cluster, wantedNodes = 2, 1, provisioner);
activate(applicationId, cluster, wantedNodes = 3, 1, provisioner);
assertEquals(7, nodeRepository.getNodes(applicationId, Node.State.active).size());
assertEquals(0, nodeRepository.getNodes(applicationId, Node.State.inactive).size());
// Cause inactivation of retired nodes
MockDeployer deployer = new MockDeployer(provisioner, Collections.singletonMap(applicationId, new MockDeployer.ApplicationContext(applicationId, cluster, Capacity.fromNodeCount(wantedNodes, Optional.of("default"), false), 1)));
// Allow the 1st and 3rd retired nodes permission to inactivate
doNothing().doThrow(new OrchestrationException("Permission not granted 1")).doNothing().doThrow(new OrchestrationException("Permission not granted 2")).when(orchestrator).acquirePermissionToRemove(any());
RetiredExpirer retiredExpirer = createRetiredExpirer(deployer);
retiredExpirer.run();
assertEquals(5, nodeRepository.getNodes(applicationId, Node.State.active).size());
assertEquals(2, nodeRepository.getNodes(applicationId, Node.State.inactive).size());
assertEquals(1, deployer.redeployments);
verify(orchestrator, times(4)).acquirePermissionToRemove(any());
// Running it again has no effect
retiredExpirer.run();
assertEquals(5, nodeRepository.getNodes(applicationId, Node.State.active).size());
assertEquals(2, nodeRepository.getNodes(applicationId, Node.State.inactive).size());
assertEquals(1, deployer.redeployments);
verify(orchestrator, times(6)).acquirePermissionToRemove(any());
clock.advance(RETIRED_EXPIRATION.plusMinutes(1));
retiredExpirer.run();
assertEquals(3, nodeRepository.getNodes(applicationId, Node.State.active).size());
assertEquals(4, nodeRepository.getNodes(applicationId, Node.State.inactive).size());
assertEquals(2, deployer.redeployments);
verify(orchestrator, times(6)).acquirePermissionToRemove(any());
// inactivated nodes are not retired
for (Node node : nodeRepository.getNodes(applicationId, Node.State.inactive)) assertFalse(node.allocation().get().membership().retired());
}
use of com.yahoo.vespa.orchestrator.OrchestrationException in project vespa by vespa-engine.
the class HostResource method patch.
@Override
public PatchHostResponse patch(String hostNameString, PatchHostRequest request) {
HostName hostName = new HostName(hostNameString);
if (request.state != null) {
HostStatus state;
try {
state = HostStatus.valueOf(request.state);
} catch (IllegalArgumentException dummy) {
throw new BadRequestException("Bad state in request: '" + request.state + "'");
}
try {
orchestrator.setNodeStatus(hostName, state);
} catch (HostNameNotFoundException e) {
log.log(LogLevel.INFO, "Host not found: " + hostName);
throw new NotFoundException(e);
} catch (OrchestrationException e) {
String message = "Failed to set " + hostName + " to " + state + ": " + e.getMessage();
log.log(LogLevel.INFO, message, e);
throw new InternalServerErrorException(message);
}
}
PatchHostResponse response = new PatchHostResponse();
response.description = "ok";
return response;
}
use of com.yahoo.vespa.orchestrator.OrchestrationException in project vespa by vespa-engine.
the class HostResourceTest method patch_handles_exception_in_orchestrator.
@Test(expected = InternalServerErrorException.class)
public void patch_handles_exception_in_orchestrator() throws OrchestrationException {
Orchestrator orchestrator = mock(Orchestrator.class);
HostResource hostResource = new HostResource(orchestrator, uriInfo);
String hostNameString = "hostname";
PatchHostRequest request = new PatchHostRequest();
request.state = "NO_REMARKS";
doThrow(new OrchestrationException("error")).when(orchestrator).setNodeStatus(new HostName(hostNameString), HostStatus.NO_REMARKS);
hostResource.patch(hostNameString, request);
}
Aggregations