Search in sources :

Example 1 with OrchestratorException

use of com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.OrchestratorException in project vespa by vespa-engine.

the class RunInContainerTest method testGetContainersToRunAPi.

@Ignore
@Test
public void testGetContainersToRunAPi() throws IOException, InterruptedException {
    doThrow(new OrchestratorException("Cannot suspend because...")).when(orchestratorMock).suspend(parentHostname);
    when(nodeRepositoryMock.getContainersToRun(eq(parentHostname))).thenReturn(Collections.emptyList());
    waitForJdiscContainerToServe();
    assertTrue("The initial resume command should fail because it needs to converge first", verifyWithRetries("resume", false));
    doNothing().when(orchestratorMock).resume(parentHostname);
    assertTrue(verifyWithRetries("resume", true));
    doThrow(new OrchestratorException("Cannot suspend because...")).when(orchestratorMock).suspend(parentHostname, Collections.singletonList(parentHostname));
    assertTrue("Should fail because orchestrator does not allow node-admin to suspend", verifyWithRetries("suspend/node-admin", false));
    // Orchestrator changes its mind, allows node-admin to suspend
    doNothing().when(orchestratorMock).suspend(parentHostname, Collections.singletonList(parentHostname));
    assertTrue(verifyWithRetries("suspend/node-admin", true));
    // Lets try to suspend everything now, should be trivial as we have no active containers to stop services at
    assertTrue(verifyWithRetries("suspend", false));
    assertTrue(verifyWithRetries("suspend", true));
    // Back to resume
    assertTrue(verifyWithRetries("resume", false));
    assertTrue(verifyWithRetries("resume", true));
    // Lets try the same, but with an active container running on this host
    when(nodeRepositoryMock.getContainersToRun(eq(parentHostname))).thenReturn(Collections.singletonList(new ContainerNodeSpec.Builder().hostname("host1.test.yahoo.com").wantedDockerImage(new DockerImage("dockerImage")).nodeState(Node.State.active).nodeType("tenant").nodeFlavor("docker").build()));
    doThrow(new OrchestratorException("Cannot suspend because...")).when(orchestratorMock).suspend("localhost.test.yahoo.com", Arrays.asList("host1.test.yahoo.com", parentHostname));
    // Initially we are denied to suspend because we have to freeze all the node-agents
    assertTrue(verifyWithRetries("suspend/node-admin", false));
    // At this point they should be frozen, but Orchestrator doesn't allow to suspend either the container or the node-admin
    assertTrue(verifyWithRetries("suspend/node-admin", false));
    doNothing().when(orchestratorMock).suspend("localhost.test.yahoo.com", Arrays.asList("host1.test.yahoo.com", parentHostname));
    // Orchestrator successfully suspended everything
    assertTrue(verifyWithRetries("suspend/node-admin", true));
    // Allow stopping services in active nodes
    doNothing().when(dockerOperationsMock).trySuspendNode(eq(new ContainerName("host1")));
    doNothing().when(dockerOperationsMock).stopServicesOnNode(eq(new ContainerName("host1")));
    assertTrue(verifyWithRetries("suspend", false));
    assertTrue(verifyWithRetries("suspend", true));
}
Also used : ContainerName(com.yahoo.vespa.hosted.dockerapi.ContainerName) HttpClientBuilder(org.apache.http.impl.client.HttpClientBuilder) OrchestratorException(com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.OrchestratorException) DockerImage(com.yahoo.vespa.hosted.dockerapi.DockerImage) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 2 with OrchestratorException

use of com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.OrchestratorException in project vespa by vespa-engine.

the class NodeAdminStateUpdaterImpl method tick.

void tick() {
    State wantedStateCopy;
    synchronized (monitor) {
        while (!workToDoNow) {
            Duration timeSinceLastConverge = Duration.between(lastTick, clock.instant());
            long remainder = nodeAdminConvergeStateInterval.minus(timeSinceLastConverge).toMillis();
            if (remainder > 0) {
                try {
                    monitor.wait(remainder);
                } catch (InterruptedException e) {
                    log.info("Interrupted, but ignoring this: NodeAdminStateUpdater");
                }
            } else
                break;
        }
        lastTick = clock.instant();
        workToDoNow = false;
        // wantedState may change asynchronously, so we grab a copy of it here
        wantedStateCopy = this.wantedState;
    }
    try {
        convergeState(wantedStateCopy);
    } catch (OrchestratorException | ConvergenceException | HttpException e) {
        log.info("Unable to converge to " + wantedStateCopy + ": " + e.getMessage());
    } catch (Exception e) {
        log.log(LogLevel.ERROR, "Error while trying to converge to " + wantedStateCopy, e);
    }
    if (wantedStateCopy != RESUMED && currentState == TRANSITIONING) {
        Duration subsystemFreezeDuration = nodeAdmin.subsystemFreezeDuration();
        if (subsystemFreezeDuration.compareTo(FREEZE_CONVERGENCE_TIMEOUT) > 0) {
            // We have spent too much time trying to freeze and node admin is still not frozen.
            // To avoid node agents stalling for too long, we'll force unfrozen ticks now.
            log.info("Timed out trying to freeze, will force unfreezed ticks");
            nodeAdmin.setFrozen(false);
        }
    }
    fetchContainersToRunFromNodeRepository();
}
Also used : OrchestratorException(com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.OrchestratorException) Duration(java.time.Duration) HttpException(com.yahoo.vespa.hosted.node.admin.configserver.HttpException) HttpException(com.yahoo.vespa.hosted.node.admin.configserver.HttpException) LockInterruptException(com.yahoo.concurrent.classlock.LockInterruptException) OrchestratorException(com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.OrchestratorException)

Aggregations

OrchestratorException (com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.OrchestratorException)2 LockInterruptException (com.yahoo.concurrent.classlock.LockInterruptException)1 ContainerName (com.yahoo.vespa.hosted.dockerapi.ContainerName)1 DockerImage (com.yahoo.vespa.hosted.dockerapi.DockerImage)1 HttpException (com.yahoo.vespa.hosted.node.admin.configserver.HttpException)1 Duration (java.time.Duration)1 HttpClientBuilder (org.apache.http.impl.client.HttpClientBuilder)1 Ignore (org.junit.Ignore)1 Test (org.junit.Test)1