use of com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec in project vespa by vespa-engine.
the class RealNodeRepositoryTest method testGetContainersToRunApi.
@Test
public void testGetContainersToRunApi() throws InterruptedException {
waitForJdiscContainerToServe();
NodeRepository nodeRepositoryApi = new RealNodeRepository(configServerApi);
String dockerHostHostname = "dockerhost1.yahoo.com";
final List<ContainerNodeSpec> containersToRun = nodeRepositoryApi.getContainersToRun(dockerHostHostname);
assertThat(containersToRun.size(), is(1));
final ContainerNodeSpec nodeSpec = containersToRun.get(0);
assertThat(nodeSpec.hostname, is("host4.yahoo.com"));
assertThat(nodeSpec.wantedDockerImage.get(), is(new DockerImage("docker-registry.domain.tld:8080/dist/vespa:6.42.0")));
assertThat(nodeSpec.nodeState, is(Node.State.active));
assertThat(nodeSpec.wantedRestartGeneration.get(), is(0L));
assertThat(nodeSpec.currentRestartGeneration.get(), is(0L));
assertThat(nodeSpec.minCpuCores, is(0.2));
assertThat(nodeSpec.minMainMemoryAvailableGb, is(0.5));
assertThat(nodeSpec.minDiskAvailableGb, is(100.0));
}
use of com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec in project vespa by vespa-engine.
the class DockerFailTest method dockerFailTest.
@Test
public void dockerFailTest() throws Exception {
try (DockerTester dockerTester = new DockerTester()) {
ContainerNodeSpec containerNodeSpec = new ContainerNodeSpec.Builder().hostname("host1.test.yahoo.com").wantedDockerImage(new DockerImage("dockerImage")).nodeState(Node.State.active).nodeType("tenant").nodeFlavor("docker").wantedRestartGeneration(1L).currentRestartGeneration(1L).minCpuCores(1).minMainMemoryAvailableGb(1).minDiskAvailableGb(1).build();
dockerTester.addContainerNodeSpec(containerNodeSpec);
// Wait for node admin to be notified with node repo state and the docker container has been started
while (dockerTester.nodeAdmin.getListOfHosts().size() == 0) {
Thread.sleep(10);
}
dockerTester.callOrderVerifier.assertInOrder(1200, "createContainerCommand with DockerImage { imageId=dockerImage }, HostName: host1.test.yahoo.com, ContainerName { name=host1 }", "executeInContainerAsRoot with ContainerName { name=host1 }, args: [" + DockerTester.NODE_PROGRAM + ", resume]");
dockerTester.dockerMock.deleteContainer(new ContainerName("host1"));
dockerTester.callOrderVerifier.assertInOrder("deleteContainer with ContainerName { name=host1 }", "createContainerCommand with DockerImage { imageId=dockerImage }, HostName: host1.test.yahoo.com, ContainerName { name=host1 }", "executeInContainerAsRoot with ContainerName { name=host1 }, args: [" + DockerTester.NODE_PROGRAM + ", resume]");
}
}
use of com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec in project vespa by vespa-engine.
the class NodeAdminStateUpdaterImpl method updateHardwareDivergence.
private void updateHardwareDivergence(StorageMaintainer maintainer) {
if (currentState != RESUMED)
return;
try {
ContainerNodeSpec nodeSpec = nodeRepository.getContainerNodeSpec(dockerHostHostName).orElseThrow(() -> new RuntimeException("Failed to get host's node spec from node-repo"));
String hardwareDivergence = maintainer.getHardwareDivergence(nodeSpec);
// Only update hardware divergence if there is a change.
if (!nodeSpec.hardwareDivergence.orElse("null").equals(hardwareDivergence)) {
NodeAttributes nodeAttributes = new NodeAttributes().withHardwareDivergence(hardwareDivergence);
nodeRepository.updateNodeAttributes(dockerHostHostName, nodeAttributes);
}
} catch (RuntimeException e) {
log.log(Level.WARNING, "Failed to report hardware divergence", e);
}
}
use of com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec in project vespa by vespa-engine.
the class NodeAgentImpl method converge.
// Public for testing
void converge() {
final Optional<ContainerNodeSpec> nodeSpecOptional = nodeRepository.getContainerNodeSpec(hostname);
// We just removed the node from node repo, so this is expected until NodeAdmin stop this NodeAgent
if (!nodeSpecOptional.isPresent() && expectNodeNotInNodeRepo)
return;
final ContainerNodeSpec nodeSpec = nodeSpecOptional.orElseThrow(() -> new IllegalStateException(String.format("Node '%s' missing from node repository.", hostname)));
expectNodeNotInNodeRepo = false;
Optional<Container> container = getContainer();
if (!nodeSpec.equals(lastNodeSpec)) {
// will change and we will be reporting duplicate metrics.
if (container.map(c -> c.state.isRunning()).orElse(false)) {
storageMaintainer.writeMetricsConfig(containerName, nodeSpec);
}
addDebugMessage("Loading new node spec: " + nodeSpec.toString());
lastNodeSpec = nodeSpec;
}
switch(nodeSpec.nodeState) {
case ready:
case reserved:
case parked:
case failed:
removeContainerIfNeededUpdateContainerState(nodeSpec, container);
updateNodeRepoWithCurrentAttributes(nodeSpec);
break;
case active:
storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
storageMaintainer.getDiskUsageFor(containerName).map(diskUsage -> (double) diskUsage / BYTES_IN_GB / nodeSpec.minDiskAvailableGb).filter(diskUtil -> diskUtil >= 0.8).ifPresent(diskUtil -> storageMaintainer.removeOldFilesFromNode(containerName));
scheduleDownLoadIfNeeded(nodeSpec);
if (isDownloadingImage()) {
addDebugMessage("Waiting for image to download " + imageBeingDownloaded.asString());
return;
}
container = removeContainerIfNeededUpdateContainerState(nodeSpec, container);
if (!container.isPresent()) {
storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
containerState = STARTING;
startContainer(nodeSpec);
containerState = UNKNOWN;
}
runLocalResumeScriptIfNeeded(nodeSpec);
// Because it's more important to stop a bad release from rolling out in prod,
// we put the resume call last. So if we fail after updating the node repo attributes
// but before resume, the app may go through the tenant pipeline but will halt in prod.
//
// Note that this problem exists only because there are 2 different mechanisms
// that should really be parts of a single mechanism:
// - The content of node repo is used to determine whether a new Vespa+application
// has been successfully rolled out.
// - Slobrok and internal orchestrator state is used to determine whether
// to allow upgrade (suspend).
updateNodeRepoWithCurrentAttributes(nodeSpec);
logger.info("Call resume against Orchestrator");
orchestrator.resume(hostname);
break;
case inactive:
removeContainerIfNeededUpdateContainerState(nodeSpec, container);
updateNodeRepoWithCurrentAttributes(nodeSpec);
break;
case provisioned:
nodeRepository.markAsDirty(hostname);
break;
case dirty:
removeContainerIfNeededUpdateContainerState(nodeSpec, container);
logger.info("State is " + nodeSpec.nodeState + ", will delete application storage and mark node as ready");
storageMaintainer.cleanupNodeStorage(containerName, nodeSpec);
updateNodeRepoWithCurrentAttributes(nodeSpec);
nodeRepository.markNodeAvailableForNewAllocation(hostname);
expectNodeNotInNodeRepo = true;
break;
default:
throw new RuntimeException("UNKNOWN STATE " + nodeSpec.nodeState.name());
}
}
use of com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec in project vespa by vespa-engine.
the class NodeAgentImpl method updateNodeRepoWithCurrentAttributes.
private void updateNodeRepoWithCurrentAttributes(final ContainerNodeSpec nodeSpec) {
final NodeAttributes currentNodeAttributes = new NodeAttributes().withRestartGeneration(nodeSpec.currentRestartGeneration.orElse(null)).withRebootGeneration(nodeSpec.currentRebootGeneration.orElse(0L)).withDockerImage(nodeSpec.currentDockerImage.orElse(new DockerImage(""))).withVespaVersion(nodeSpec.vespaVersion.orElse(""));
final NodeAttributes wantedNodeAttributes = new NodeAttributes().withRestartGeneration(nodeSpec.wantedRestartGeneration.orElse(null)).withRebootGeneration(nodeSpec.wantedRebootGeneration.orElse(0L)).withDockerImage(nodeSpec.wantedDockerImage.filter(node -> containerState == UNKNOWN).orElse(new DockerImage(""))).withVespaVersion(nodeSpec.wantedVespaVersion.filter(node -> containerState == UNKNOWN).orElse(""));
publishStateToNodeRepoIfChanged(currentNodeAttributes, wantedNodeAttributes);
}
Aggregations