Search in sources :

Example 91 with Node

use of com.yahoo.vespa.hosted.provision.Node in project vespa by vespa-engine.

the class Preparer method prepare.

/**
 * Ensure sufficient nodes are reserved or active for the given application and cluster
 *
 * @return the list of nodes this cluster will have allocated if activated
 */
// Note: This operation may make persisted changes to the set of reserved and inactive nodes,
// but it may not change the set of active nodes, as the active nodes must stay in sync with the
// active config model which is changed on activate
public List<Node> prepare(ApplicationId application, ClusterSpec cluster, NodeSpec requestedNodes, int wantedGroups) {
    List<Node> surplusNodes = findNodesInRemovableGroups(application, cluster, wantedGroups);
    MutableInteger highestIndex = new MutableInteger(findHighestIndex(application, cluster));
    List<Node> acceptedNodes = new ArrayList<>();
    for (int groupIndex = 0; groupIndex < wantedGroups; groupIndex++) {
        ClusterSpec clusterGroup = cluster.with(Optional.of(ClusterSpec.Group.from(groupIndex)));
        List<Node> accepted = groupPreparer.prepare(application, clusterGroup, requestedNodes.fraction(wantedGroups), surplusNodes, highestIndex, spareCount);
        replace(acceptedNodes, accepted);
    }
    moveToActiveGroup(surplusNodes, wantedGroups, cluster.group());
    replace(acceptedNodes, retire(surplusNodes));
    return acceptedNodes;
}
Also used : Node(com.yahoo.vespa.hosted.provision.Node) MutableInteger(com.yahoo.lang.MutableInteger) ArrayList(java.util.ArrayList) ClusterSpec(com.yahoo.config.provision.ClusterSpec)

Example 92 with Node

use of com.yahoo.vespa.hosted.provision.Node in project vespa by vespa-engine.

the class NodeAclResponse method toSlime.

private void toSlime(String hostname, Cursor object) {
    Node node = nodeRepository.getNode(hostname).orElseGet(() -> nodeRepository.getConfigNode(hostname).orElseThrow(() -> new NotFoundException("No node with hostname '" + hostname + "'")));
    Cursor trustedNodesArray = object.setArray("trustedNodes");
    nodeRepository.getNodeAcls(node, aclsForChildren).forEach(nodeAcl -> toSlime(nodeAcl, trustedNodesArray));
    Cursor trustedNetworksArray = object.setArray("trustedNetworks");
    nodeRepository.getNodeAcls(node, aclsForChildren).forEach(nodeAcl -> toSlime(nodeAcl.trustedNetworks(), nodeAcl.node(), trustedNetworksArray));
}
Also used : Node(com.yahoo.vespa.hosted.provision.Node) Cursor(com.yahoo.slime.Cursor)

Example 93 with Node

use of com.yahoo.vespa.hosted.provision.Node in project vespa by vespa-engine.

the class NodesResponse method toSlime.

private void toSlime(Node node, boolean allFields, Cursor object) {
    object.setString("url", nodeParentUrl + node.hostname());
    if (!allFields)
        return;
    object.setString("id", node.id());
    object.setString("state", NodeStateSerializer.wireNameOf(node.state()));
    object.setString("type", node.type().name());
    object.setString("hostname", node.hostname());
    object.setString("type", toString(node.type()));
    if (node.parentHostname().isPresent()) {
        object.setString("parentHostname", node.parentHostname().get());
    }
    object.setString("openStackId", node.openStackId());
    object.setString("flavor", node.flavor().name());
    object.setString("canonicalFlavor", node.flavor().canonicalName());
    object.setDouble("minDiskAvailableGb", node.flavor().getMinDiskAvailableGb());
    object.setDouble("minMainMemoryAvailableGb", node.flavor().getMinMainMemoryAvailableGb());
    if (node.flavor().getDescription() != null && !node.flavor().getDescription().isEmpty())
        object.setString("description", node.flavor().getDescription());
    object.setDouble("minCpuCores", node.flavor().getMinCpuCores());
    if (node.flavor().cost() > 0)
        object.setLong("cost", node.flavor().cost());
    object.setBool("fastDisk", node.flavor().hasFastDisk());
    object.setString("environment", node.flavor().getType().name());
    if (node.allocation().isPresent()) {
        toSlime(node.allocation().get().owner(), object.setObject("owner"));
        toSlime(node.allocation().get().membership(), object.setObject("membership"));
        object.setLong("restartGeneration", node.allocation().get().restartGeneration().wanted());
        object.setLong("currentRestartGeneration", node.allocation().get().restartGeneration().current());
        object.setString("wantedDockerImage", nodeRepository.dockerImage().withTag(node.allocation().get().membership().cluster().vespaVersion()).asString());
        object.setString("wantedVespaVersion", node.allocation().get().membership().cluster().vespaVersion().toFullString());
        try {
            object.setBool("allowedToBeDown", orchestrator.getNodeStatus(new HostName(node.hostname())) == HostStatus.ALLOWED_TO_BE_DOWN);
        } catch (HostNameNotFoundException e) {
        /* ok */
        }
    }
    object.setLong("rebootGeneration", node.status().reboot().wanted());
    object.setLong("currentRebootGeneration", node.status().reboot().current());
    node.status().vespaVersion().filter(version -> !version.isEmpty()).ifPresent(version -> {
        object.setString("vespaVersion", version.toFullString());
        object.setString("currentDockerImage", nodeRepository.dockerImage().withTag(version).asString());
        // TODO: Remove these when they are no longer read
        object.setString("hostedVersion", version.toFullString());
        object.setString("convergedStateVersion", version.toFullString());
    });
    object.setLong("failCount", node.status().failCount());
    object.setBool("hardwareFailure", node.status().hardwareFailureDescription().isPresent());
    node.status().hardwareFailureDescription().ifPresent(failure -> object.setString("hardwareFailureDescription", failure));
    object.setBool("wantToRetire", node.status().wantToRetire());
    object.setBool("wantToDeprovision", node.status().wantToDeprovision());
    toSlime(node.history(), object.setArray("history"));
    ipAddressesToSlime(node.ipAddresses(), object.setArray("ipAddresses"));
    ipAddressesToSlime(node.additionalIpAddresses(), object.setArray("additionalIpAddresses"));
    node.status().hardwareDivergence().ifPresent(hardwareDivergence -> object.setString("hardwareDivergence", hardwareDivergence));
}
Also used : NodeFilter(com.yahoo.vespa.hosted.provision.node.filter.NodeFilter) HostNameNotFoundException(com.yahoo.vespa.orchestrator.HostNameNotFoundException) OutputStream(java.io.OutputStream) Cursor(com.yahoo.slime.Cursor) ApplicationId(com.yahoo.config.provision.ApplicationId) ClusterMembership(com.yahoo.config.provision.ClusterMembership) NodeType(com.yahoo.config.provision.NodeType) Slime(com.yahoo.slime.Slime) HttpRequest(com.yahoo.container.jdisc.HttpRequest) Set(java.util.Set) IOException(java.io.IOException) Orchestrator(com.yahoo.vespa.orchestrator.Orchestrator) Node(com.yahoo.vespa.hosted.provision.Node) NodeRepository(com.yahoo.vespa.hosted.provision.NodeRepository) List(java.util.List) History(com.yahoo.vespa.hosted.provision.node.History) HostName(com.yahoo.vespa.applicationmodel.HostName) SlimeUtils(com.yahoo.vespa.config.SlimeUtils) HostStatus(com.yahoo.vespa.orchestrator.status.HostStatus) URI(java.net.URI) HttpResponse(com.yahoo.container.jdisc.HttpResponse) HostName(com.yahoo.vespa.applicationmodel.HostName) HostNameNotFoundException(com.yahoo.vespa.orchestrator.HostNameNotFoundException)

Example 94 with Node

use of com.yahoo.vespa.hosted.provision.Node in project vespa by vespa-engine.

the class ServiceMonitorStub method getAllApplicationInstances.

@Override
public Map<ApplicationInstanceReference, ApplicationInstance> getAllApplicationInstances() {
    // Convert apps information to the response payload to return
    Map<ApplicationInstanceReference, ApplicationInstance> status = new HashMap<>();
    for (Map.Entry<ApplicationId, MockDeployer.ApplicationContext> app : apps.entrySet()) {
        Set<ServiceInstance> serviceInstances = new HashSet<>();
        for (Node node : nodeRepository.getNodes(app.getValue().id(), Node.State.active)) {
            serviceInstances.add(new ServiceInstance(new ConfigId("configid"), new HostName(node.hostname()), getHostStatus(node.hostname())));
        }
        Set<ServiceCluster> serviceClusters = new HashSet<>();
        serviceClusters.add(new ServiceCluster(new ClusterId(app.getValue().clusterContexts().get(0).cluster().id().value()), new ServiceType("serviceType"), serviceInstances));
        TenantId tenantId = new TenantId(app.getKey().tenant().value());
        ApplicationInstanceId applicationInstanceId = new ApplicationInstanceId(app.getKey().application().value());
        status.put(new ApplicationInstanceReference(tenantId, applicationInstanceId), new ApplicationInstance(tenantId, applicationInstanceId, serviceClusters));
    }
    return status;
}
Also used : HashMap(java.util.HashMap) ServiceCluster(com.yahoo.vespa.applicationmodel.ServiceCluster) ClusterId(com.yahoo.vespa.applicationmodel.ClusterId) Node(com.yahoo.vespa.hosted.provision.Node) ServiceInstance(com.yahoo.vespa.applicationmodel.ServiceInstance) ApplicationInstanceId(com.yahoo.vespa.applicationmodel.ApplicationInstanceId) TenantId(com.yahoo.vespa.applicationmodel.TenantId) ApplicationInstance(com.yahoo.vespa.applicationmodel.ApplicationInstance) ServiceType(com.yahoo.vespa.applicationmodel.ServiceType) ApplicationInstanceReference(com.yahoo.vespa.applicationmodel.ApplicationInstanceReference) ConfigId(com.yahoo.vespa.applicationmodel.ConfigId) ApplicationId(com.yahoo.config.provision.ApplicationId) HashMap(java.util.HashMap) Map(java.util.Map) HostName(com.yahoo.vespa.applicationmodel.HostName) HashSet(java.util.HashSet)

Example 95 with Node

use of com.yahoo.vespa.hosted.provision.Node in project vespa by vespa-engine.

the class NodeAgentImpl method converge.

// Public for testing
void converge() {
    final Optional<ContainerNodeSpec> nodeSpecOptional = nodeRepository.getContainerNodeSpec(hostname);
    // We just removed the node from node repo, so this is expected until NodeAdmin stop this NodeAgent
    if (!nodeSpecOptional.isPresent() && expectNodeNotInNodeRepo)
        return;
    final ContainerNodeSpec nodeSpec = nodeSpecOptional.orElseThrow(() -> new IllegalStateException(String.format("Node '%s' missing from node repository.", hostname)));
    expectNodeNotInNodeRepo = false;
    Optional<Container> container = getContainer();
    if (!nodeSpec.equals(lastNodeSpec)) {
        // will change and we will be reporting duplicate metrics.
        if (container.map(c -> c.state.isRunning()).orElse(false)) {
            storageMaintainer.writeMetricsConfig(containerName, nodeSpec);
        }
        addDebugMessage("Loading new node spec: " + nodeSpec.toString());
        lastNodeSpec = nodeSpec;
    }
    switch(nodeSpec.nodeState) {
        case ready:
        case reserved:
        case parked:
        case failed:
            removeContainerIfNeededUpdateContainerState(nodeSpec, container);
            updateNodeRepoWithCurrentAttributes(nodeSpec);
            break;
        case active:
            storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
            storageMaintainer.getDiskUsageFor(containerName).map(diskUsage -> (double) diskUsage / BYTES_IN_GB / nodeSpec.minDiskAvailableGb).filter(diskUtil -> diskUtil >= 0.8).ifPresent(diskUtil -> storageMaintainer.removeOldFilesFromNode(containerName));
            scheduleDownLoadIfNeeded(nodeSpec);
            if (isDownloadingImage()) {
                addDebugMessage("Waiting for image to download " + imageBeingDownloaded.asString());
                return;
            }
            container = removeContainerIfNeededUpdateContainerState(nodeSpec, container);
            if (!container.isPresent()) {
                storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
                containerState = STARTING;
                startContainer(nodeSpec);
                containerState = UNKNOWN;
            }
            runLocalResumeScriptIfNeeded(nodeSpec);
            // Because it's more important to stop a bad release from rolling out in prod,
            // we put the resume call last. So if we fail after updating the node repo attributes
            // but before resume, the app may go through the tenant pipeline but will halt in prod.
            // 
            // Note that this problem exists only because there are 2 different mechanisms
            // that should really be parts of a single mechanism:
            // - The content of node repo is used to determine whether a new Vespa+application
            // has been successfully rolled out.
            // - Slobrok and internal orchestrator state is used to determine whether
            // to allow upgrade (suspend).
            updateNodeRepoWithCurrentAttributes(nodeSpec);
            logger.info("Call resume against Orchestrator");
            orchestrator.resume(hostname);
            break;
        case inactive:
            removeContainerIfNeededUpdateContainerState(nodeSpec, container);
            updateNodeRepoWithCurrentAttributes(nodeSpec);
            break;
        case provisioned:
            nodeRepository.markAsDirty(hostname);
            break;
        case dirty:
            removeContainerIfNeededUpdateContainerState(nodeSpec, container);
            logger.info("State is " + nodeSpec.nodeState + ", will delete application storage and mark node as ready");
            storageMaintainer.cleanupNodeStorage(containerName, nodeSpec);
            updateNodeRepoWithCurrentAttributes(nodeSpec);
            nodeRepository.markNodeAvailableForNewAllocation(hostname);
            expectNodeNotInNodeRepo = true;
            break;
        default:
            throw new RuntimeException("UNKNOWN STATE " + nodeSpec.nodeState.name());
    }
}
Also used : DockerException(com.yahoo.vespa.hosted.dockerapi.DockerException) ContainerName(com.yahoo.vespa.hosted.dockerapi.ContainerName) Dimensions(com.yahoo.vespa.hosted.dockerapi.metrics.Dimensions) StorageMaintainer(com.yahoo.vespa.hosted.node.admin.maintenance.StorageMaintainer) Environment(com.yahoo.vespa.hosted.node.admin.component.Environment) Date(java.util.Date) PromptContainerData(com.yahoo.vespa.hosted.node.admin.containerdata.PromptContainerData) ThreadFactoryFactory(com.yahoo.concurrent.ThreadFactoryFactory) ProcessResult(com.yahoo.vespa.hosted.dockerapi.ProcessResult) SimpleDateFormat(java.text.SimpleDateFormat) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Node(com.yahoo.vespa.hosted.provision.Node) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) MetricReceiverWrapper(com.yahoo.vespa.hosted.dockerapi.metrics.MetricReceiverWrapper) Future(java.util.concurrent.Future) Duration(java.time.Duration) Map(java.util.Map) ContainerData(com.yahoo.vespa.hosted.node.admin.containerdata.ContainerData) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) LinkedList(java.util.LinkedList) PrefixLogger(com.yahoo.vespa.hosted.node.admin.util.PrefixLogger) ConfigServerContainerData(com.yahoo.vespa.hosted.node.admin.containerdata.ConfigServerContainerData) NodeType(com.yahoo.config.provision.NodeType) DockerOperations(com.yahoo.vespa.hosted.node.admin.docker.DockerOperations) Container(com.yahoo.vespa.hosted.dockerapi.Container) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) DimensionMetrics(com.yahoo.vespa.hosted.dockerapi.metrics.DimensionMetrics) NodeRepository(com.yahoo.vespa.hosted.node.admin.configserver.noderepository.NodeRepository) Instant(java.time.Instant) DockerExecTimeoutException(com.yahoo.vespa.hosted.dockerapi.DockerExecTimeoutException) Executors(java.util.concurrent.Executors) MotdContainerData(com.yahoo.vespa.hosted.node.admin.containerdata.MotdContainerData) UncheckedIOException(java.io.UncheckedIOException) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) List(java.util.List) ContainerNodeSpec(com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec) STARTING(com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl.ContainerState.STARTING) DockerImage(com.yahoo.vespa.hosted.dockerapi.DockerImage) Docker(com.yahoo.vespa.hosted.dockerapi.Docker) ABSENT(com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl.ContainerState.ABSENT) Orchestrator(com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.Orchestrator) UNKNOWN(com.yahoo.vespa.hosted.node.admin.nodeagent.NodeAgentImpl.ContainerState.UNKNOWN) Clock(java.time.Clock) Optional(java.util.Optional) OrchestratorException(com.yahoo.vespa.hosted.node.admin.configserver.orchestrator.OrchestratorException) ContainerResources(com.yahoo.vespa.hosted.dockerapi.ContainerResources) Container(com.yahoo.vespa.hosted.dockerapi.Container) ContainerNodeSpec(com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec)

Aggregations

Node (com.yahoo.vespa.hosted.provision.Node)121 Test (org.junit.Test)67 ApplicationId (com.yahoo.config.provision.ApplicationId)40 ClusterSpec (com.yahoo.config.provision.ClusterSpec)33 List (java.util.List)26 ArrayList (java.util.ArrayList)23 Zone (com.yahoo.config.provision.Zone)22 Flavor (com.yahoo.config.provision.Flavor)21 HashSet (java.util.HashSet)19 Collectors (java.util.stream.Collectors)19 Optional (java.util.Optional)18 NodeRepository (com.yahoo.vespa.hosted.provision.NodeRepository)16 Duration (java.time.Duration)16 HostSpec (com.yahoo.config.provision.HostSpec)15 NodeType (com.yahoo.config.provision.NodeType)15 Agent (com.yahoo.vespa.hosted.provision.node.Agent)13 Map (java.util.Map)13 HashMap (java.util.HashMap)12 Collections (java.util.Collections)11 Set (java.util.Set)11