use of com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.OrchestratorException in project vespa by vespa-engine.
the class RunInContainerTest method testGetContainersToRunAPi.
@Ignore
@Test
public void testGetContainersToRunAPi() throws IOException, InterruptedException {
doThrow(new OrchestratorException("Cannot suspend because...")).when(orchestratorMock).suspend(parentHostname);
when(nodeRepositoryMock.getContainersToRun(eq(parentHostname))).thenReturn(Collections.emptyList());
waitForJdiscContainerToServe();
assertTrue("The initial resume command should fail because it needs to converge first", verifyWithRetries("resume", false));
doNothing().when(orchestratorMock).resume(parentHostname);
assertTrue(verifyWithRetries("resume", true));
doThrow(new OrchestratorException("Cannot suspend because...")).when(orchestratorMock).suspend(parentHostname, Collections.singletonList(parentHostname));
assertTrue("Should fail because orchestrator does not allow node-admin to suspend", verifyWithRetries("suspend/node-admin", false));
// Orchestrator changes its mind, allows node-admin to suspend
doNothing().when(orchestratorMock).suspend(parentHostname, Collections.singletonList(parentHostname));
assertTrue(verifyWithRetries("suspend/node-admin", true));
// Lets try to suspend everything now, should be trivial as we have no active containers to stop services at
assertTrue(verifyWithRetries("suspend", false));
assertTrue(verifyWithRetries("suspend", true));
// Back to resume
assertTrue(verifyWithRetries("resume", false));
assertTrue(verifyWithRetries("resume", true));
// Lets try the same, but with an active container running on this host
when(nodeRepositoryMock.getContainersToRun(eq(parentHostname))).thenReturn(Collections.singletonList(new ContainerNodeSpec.Builder().hostname("host1.test.yahoo.com").wantedDockerImage(new DockerImage("dockerImage")).nodeState(Node.State.active).nodeType("tenant").nodeFlavor("docker").build()));
doThrow(new OrchestratorException("Cannot suspend because...")).when(orchestratorMock).suspend("localhost.test.yahoo.com", Arrays.asList("host1.test.yahoo.com", parentHostname));
// Initially we are denied to suspend because we have to freeze all the node-agents
assertTrue(verifyWithRetries("suspend/node-admin", false));
// At this point they should be frozen, but Orchestrator doesn't allow to suspend either the container or the node-admin
assertTrue(verifyWithRetries("suspend/node-admin", false));
doNothing().when(orchestratorMock).suspend("localhost.test.yahoo.com", Arrays.asList("host1.test.yahoo.com", parentHostname));
// Orchestrator successfully suspended everything
assertTrue(verifyWithRetries("suspend/node-admin", true));
// Allow stopping services in active nodes
doNothing().when(dockerOperationsMock).trySuspendNode(eq(new ContainerName("host1")));
doNothing().when(dockerOperationsMock).stopServicesOnNode(eq(new ContainerName("host1")));
assertTrue(verifyWithRetries("suspend", false));
assertTrue(verifyWithRetries("suspend", true));
}
use of com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.OrchestratorException in project vespa by vespa-engine.
the class NodeAdminStateUpdaterImpl method tick.
void tick() {
State wantedStateCopy;
synchronized (monitor) {
while (!workToDoNow) {
Duration timeSinceLastConverge = Duration.between(lastTick, clock.instant());
long remainder = nodeAdminConvergeStateInterval.minus(timeSinceLastConverge).toMillis();
if (remainder > 0) {
try {
monitor.wait(remainder);
} catch (InterruptedException e) {
log.info("Interrupted, but ignoring this: NodeAdminStateUpdater");
}
} else
break;
}
lastTick = clock.instant();
workToDoNow = false;
// wantedState may change asynchronously, so we grab a copy of it here
wantedStateCopy = this.wantedState;
}
try {
convergeState(wantedStateCopy);
} catch (OrchestratorException | ConvergenceException | HttpException e) {
log.info("Unable to converge to " + wantedStateCopy + ": " + e.getMessage());
} catch (Exception e) {
log.log(LogLevel.ERROR, "Error while trying to converge to " + wantedStateCopy, e);
}
if (wantedStateCopy != RESUMED && currentState == TRANSITIONING) {
Duration subsystemFreezeDuration = nodeAdmin.subsystemFreezeDuration();
if (subsystemFreezeDuration.compareTo(FREEZE_CONVERGENCE_TIMEOUT) > 0) {
// We have spent too much time trying to freeze and node admin is still not frozen.
// To avoid node agents stalling for too long, we'll force unfrozen ticks now.
log.info("Timed out trying to freeze, will force unfreezed ticks");
nodeAdmin.setFrozen(false);
}
}
fetchContainersToRunFromNodeRepository();
}
Aggregations