Search in sources :

Example 1 with HostStatus

use of com.yahoo.vespa.orchestrator.status.HostStatus in project vespa by vespa-engine.

the class MetricsReporter method updateNodeMetrics.

private void updateNodeMetrics(Node node, Map<HostName, List<ServiceInstance>> servicesByHost) {
    Metric.Context context;
    Optional<Allocation> allocation = node.allocation();
    if (allocation.isPresent()) {
        ApplicationId applicationId = allocation.get().owner();
        context = getContextAt("state", node.state().name(), "host", node.hostname(), "tenantName", applicationId.tenant().value(), "applicationId", applicationId.serializedForm().replace(':', '.'), "app", toApp(applicationId), "clustertype", allocation.get().membership().cluster().type().name(), "clusterid", allocation.get().membership().cluster().id().value());
        long wantedRestartGeneration = allocation.get().restartGeneration().wanted();
        metric.set("wantedRestartGeneration", wantedRestartGeneration, context);
        long currentRestartGeneration = allocation.get().restartGeneration().current();
        metric.set("currentRestartGeneration", currentRestartGeneration, context);
        boolean wantToRestart = currentRestartGeneration < wantedRestartGeneration;
        metric.set("wantToRestart", wantToRestart ? 1 : 0, context);
        Version wantedVersion = allocation.get().membership().cluster().vespaVersion();
        double wantedVersionNumber = getVersionAsNumber(wantedVersion);
        metric.set("wantedVespaVersion", wantedVersionNumber, context);
        Optional<Version> currentVersion = node.status().vespaVersion();
        boolean converged = currentVersion.isPresent() && currentVersion.get().equals(wantedVersion);
        metric.set("wantToChangeVespaVersion", converged ? 0 : 1, context);
    } else {
        context = getContextAt("state", node.state().name(), "host", node.hostname());
    }
    Optional<Version> currentVersion = node.status().vespaVersion();
    // Node repo checks for !isEmpty(), so let's do that here too.
    if (currentVersion.isPresent() && !currentVersion.get().isEmpty()) {
        double currentVersionNumber = getVersionAsNumber(currentVersion.get());
        metric.set("currentVespaVersion", currentVersionNumber, context);
    }
    long wantedRebootGeneration = node.status().reboot().wanted();
    metric.set("wantedRebootGeneration", wantedRebootGeneration, context);
    long currentRebootGeneration = node.status().reboot().current();
    metric.set("currentRebootGeneration", currentRebootGeneration, context);
    boolean wantToReboot = currentRebootGeneration < wantedRebootGeneration;
    metric.set("wantToReboot", wantToReboot ? 1 : 0, context);
    metric.set("wantToRetire", node.status().wantToRetire() ? 1 : 0, context);
    metric.set("wantToDeprovision", node.status().wantToDeprovision() ? 1 : 0, context);
    metric.set("hardwareFailure", node.status().hardwareFailureDescription().isPresent() ? 1 : 0, context);
    metric.set("hardwareDivergence", node.status().hardwareDivergence().isPresent() ? 1 : 0, context);
    try {
        HostStatus status = orchestrator.getNodeStatus(new HostName(node.hostname()));
        boolean allowedToBeDown = status == HostStatus.ALLOWED_TO_BE_DOWN;
        metric.set("allowedToBeDown", allowedToBeDown ? 1 : 0, context);
    } catch (HostNameNotFoundException e) {
    // Ignore
    }
    long numberOfServices;
    HostName hostName = new HostName(node.hostname());
    List<ServiceInstance> services = servicesByHost.get(hostName);
    if (services == null) {
        numberOfServices = 0;
    } else {
        Map<ServiceStatus, Long> servicesCount = services.stream().collect(Collectors.groupingBy(ServiceInstance::serviceStatus, Collectors.counting()));
        numberOfServices = servicesCount.values().stream().mapToLong(Long::longValue).sum();
        metric.set("numberOfServicesUp", servicesCount.getOrDefault(ServiceStatus.UP, 0L), context);
        metric.set("numberOfServicesNotChecked", servicesCount.getOrDefault(ServiceStatus.NOT_CHECKED, 0L), context);
        long numberOfServicesDown = servicesCount.getOrDefault(ServiceStatus.DOWN, 0L);
        metric.set("numberOfServicesDown", numberOfServicesDown, context);
        metric.set("someServicesDown", (numberOfServicesDown > 0 ? 1 : 0), context);
        boolean badNode = NodeFailer.badNode(services);
        metric.set("nodeFailerBadNode", (badNode ? 1 : 0), context);
        boolean nodeDownInNodeRepo = node.history().event(History.Event.Type.down).isPresent();
        metric.set("downInNodeRepo", (nodeDownInNodeRepo ? 1 : 0), context);
    }
    metric.set("numberOfServices", numberOfServices, context);
}
Also used : ServiceInstance(com.yahoo.vespa.applicationmodel.ServiceInstance) Allocation(com.yahoo.vespa.hosted.provision.node.Allocation) Version(com.yahoo.component.Version) ServiceStatus(com.yahoo.vespa.applicationmodel.ServiceStatus) Metric(com.yahoo.jdisc.Metric) HostStatus(com.yahoo.vespa.orchestrator.status.HostStatus) ApplicationId(com.yahoo.config.provision.ApplicationId) HostName(com.yahoo.vespa.applicationmodel.HostName) HostNameNotFoundException(com.yahoo.vespa.orchestrator.HostNameNotFoundException)

Example 2 with HostStatus

use of com.yahoo.vespa.orchestrator.status.HostStatus in project vespa by vespa-engine.

the class ApplicationApiImplTest method verifyUpConditionWith.

private void verifyUpConditionWith(HostStatus hostStatus, ServiceStatus serviceStatus, boolean expectUp) {
    HostName hostName1 = modelUtils.createNode("host1", hostStatus);
    ApplicationInstance applicationInstance = modelUtils.createApplicationInstance(Arrays.asList(modelUtils.createServiceCluster("cluster-1", VespaModelUtil.STORAGENODE_SERVICE_TYPE, Arrays.asList(modelUtils.createServiceInstance("config-id-1", hostName1, serviceStatus)))));
    ApplicationApiImpl applicationApi = modelUtils.createApplicationApiImpl(applicationInstance, hostName1);
    List<HostName> upStorageNodes = expectUp ? Arrays.asList(hostName1) : new ArrayList<>();
    List<HostName> actualStorageNodes = applicationApi.getUpStorageNodesInGroupInClusterOrder().stream().map(storageNode -> storageNode.hostName()).collect(Collectors.toList());
    assertEquals(upStorageNodes, actualStorageNodes);
}
Also used : ServiceType(com.yahoo.vespa.applicationmodel.ServiceType) Arrays(java.util.Arrays) List(java.util.List) HostName(com.yahoo.vespa.applicationmodel.HostName) HostStatus(com.yahoo.vespa.orchestrator.status.HostStatus) Test(org.junit.Test) ApplicationInstance(com.yahoo.vespa.applicationmodel.ApplicationInstance) ServiceStatus(com.yahoo.vespa.applicationmodel.ServiceStatus) Collectors(java.util.stream.Collectors) Assert.assertEquals(org.junit.Assert.assertEquals) ArrayList(java.util.ArrayList) ApplicationInstance(com.yahoo.vespa.applicationmodel.ApplicationInstance) HostName(com.yahoo.vespa.applicationmodel.HostName)

Example 3 with HostStatus

use of com.yahoo.vespa.orchestrator.status.HostStatus in project vespa by vespa-engine.

the class OrchestratorImpl method resume.

@Override
public void resume(HostName hostName) throws HostStateChangeDeniedException, HostNameNotFoundException {
    /*
        * When making a state transition to this state, we have to consider that if the host has been in
        * ALLOWED_TO_BE_DOWN state, services on the host may recently have been stopped (and, presumably, started).
        * Service monitoring may not have had enough time to detect that services were stopped,
        * and may therefore mistakenly report services as up, even if they still haven't initialized and
        * are not yet ready for serving. Erroneously reporting both host and services as up causes a race
        * where services on other hosts may be stopped prematurely. A delay here ensures that service
        * monitoring will have had time to catch up. Since we don't want do the delay with the lock held,
        * and the host status service's locking functionality does not support something like condition
        * variables or Object.wait(), we break out here, releasing the lock before delaying.
        */
    sleep(serviceMonitorConvergenceLatencySeconds, TimeUnit.SECONDS);
    ApplicationInstance appInstance = getApplicationInstance(hostName);
    try (MutableStatusRegistry statusRegistry = statusService.lockApplicationInstance_forCurrentThreadOnly(appInstance.reference())) {
        final HostStatus currentHostState = statusRegistry.getHostStatus(hostName);
        if (HostStatus.NO_REMARKS == currentHostState) {
            return;
        }
        ApplicationInstanceStatus appStatus = statusService.forApplicationInstance(appInstance.reference()).getApplicationInstanceStatus();
        if (appStatus == ApplicationInstanceStatus.NO_REMARKS) {
            policy.releaseSuspensionGrant(appInstance, hostName, statusRegistry);
        }
    }
}
Also used : ApplicationInstance(com.yahoo.vespa.applicationmodel.ApplicationInstance) MutableStatusRegistry(com.yahoo.vespa.orchestrator.status.MutableStatusRegistry) ApplicationInstanceStatus(com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus) HostStatus(com.yahoo.vespa.orchestrator.status.HostStatus)

Example 4 with HostStatus

use of com.yahoo.vespa.orchestrator.status.HostStatus in project vespa by vespa-engine.

the class OrchestratorImpl method getHost.

@Override
public Host getHost(HostName hostName) throws HostNameNotFoundException {
    ApplicationInstance applicationInstance = getApplicationInstance(hostName);
    List<ServiceInstance> serviceInstances = applicationInstance.serviceClusters().stream().flatMap(cluster -> cluster.serviceInstances().stream()).filter(serviceInstance -> hostName.equals(serviceInstance.hostName())).collect(Collectors.toList());
    HostStatus hostStatus = getNodeStatus(applicationInstance.reference(), hostName);
    return new Host(hostName, hostStatus, applicationInstance.reference(), serviceInstances);
}
Also used : ApplicationId(com.yahoo.config.provision.ApplicationId) Inject(com.google.inject.Inject) HostedVespaClusterPolicy(com.yahoo.vespa.orchestrator.policy.HostedVespaClusterPolicy) HashMap(java.util.HashMap) ClusterControllerClientFactory(com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory) ApplicationInstance(com.yahoo.vespa.applicationmodel.ApplicationInstance) ClusterId(com.yahoo.vespa.applicationmodel.ClusterId) ApplicationApi(com.yahoo.vespa.orchestrator.model.ApplicationApi) MutableStatusRegistry(com.yahoo.vespa.orchestrator.status.MutableStatusRegistry) ApplicationApiImpl(com.yahoo.vespa.orchestrator.model.ApplicationApiImpl) BatchHostStateChangeDeniedException(com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException) HostName(com.yahoo.vespa.applicationmodel.HostName) HostStateChangeDeniedException(com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException) Map(java.util.Map) LogLevel(com.yahoo.log.LogLevel) ClusterControllerStateResponse(com.yahoo.vespa.orchestrator.controller.ClusterControllerStateResponse) ApplicationInstanceReference(com.yahoo.vespa.applicationmodel.ApplicationInstanceReference) ServiceInstance(com.yahoo.vespa.applicationmodel.ServiceInstance) ClusterControllerNodeState(com.yahoo.vespa.orchestrator.controller.ClusterControllerNodeState) StatusService(com.yahoo.vespa.orchestrator.status.StatusService) Policy(com.yahoo.vespa.orchestrator.policy.Policy) OrchestratorConfig(com.yahoo.vespa.orchestrator.config.OrchestratorConfig) ServiceCluster(com.yahoo.vespa.applicationmodel.ServiceCluster) VespaModelUtil(com.yahoo.vespa.orchestrator.model.VespaModelUtil) Set(java.util.Set) IOException(java.io.IOException) Logger(java.util.logging.Logger) ApplicationInstanceStatus(com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus) Collectors(java.util.stream.Collectors) TimeUnit(java.util.concurrent.TimeUnit) ClusterControllerClient(com.yahoo.vespa.orchestrator.controller.ClusterControllerClient) List(java.util.List) NodeGroup(com.yahoo.vespa.orchestrator.model.NodeGroup) HostedVespaPolicy(com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy) HostStatus(com.yahoo.vespa.orchestrator.status.HostStatus) ApplicationInstance(com.yahoo.vespa.applicationmodel.ApplicationInstance) ServiceInstance(com.yahoo.vespa.applicationmodel.ServiceInstance) HostStatus(com.yahoo.vespa.orchestrator.status.HostStatus)

Example 5 with HostStatus

use of com.yahoo.vespa.orchestrator.status.HostStatus in project vespa by vespa-engine.

the class HostResource method patch.

@Override
public PatchHostResponse patch(String hostNameString, PatchHostRequest request) {
    HostName hostName = new HostName(hostNameString);
    if (request.state != null) {
        HostStatus state;
        try {
            state = HostStatus.valueOf(request.state);
        } catch (IllegalArgumentException dummy) {
            throw new BadRequestException("Bad state in request: '" + request.state + "'");
        }
        try {
            orchestrator.setNodeStatus(hostName, state);
        } catch (HostNameNotFoundException e) {
            log.log(LogLevel.INFO, "Host not found: " + hostName);
            throw new NotFoundException(e);
        } catch (OrchestrationException e) {
            String message = "Failed to set " + hostName + " to " + state + ": " + e.getMessage();
            log.log(LogLevel.INFO, message, e);
            throw new InternalServerErrorException(message);
        }
    }
    PatchHostResponse response = new PatchHostResponse();
    response.description = "ok";
    return response;
}
Also used : PatchHostResponse(com.yahoo.vespa.orchestrator.restapi.wire.PatchHostResponse) OrchestrationException(com.yahoo.vespa.orchestrator.OrchestrationException) BadRequestException(javax.ws.rs.BadRequestException) HostNameNotFoundException(com.yahoo.vespa.orchestrator.HostNameNotFoundException) NotFoundException(javax.ws.rs.NotFoundException) InternalServerErrorException(javax.ws.rs.InternalServerErrorException) HostStatus(com.yahoo.vespa.orchestrator.status.HostStatus) HostName(com.yahoo.vespa.applicationmodel.HostName) HostNameNotFoundException(com.yahoo.vespa.orchestrator.HostNameNotFoundException)

Aggregations

HostStatus (com.yahoo.vespa.orchestrator.status.HostStatus)6 HostName (com.yahoo.vespa.applicationmodel.HostName)5 ApplicationInstance (com.yahoo.vespa.applicationmodel.ApplicationInstance)4 ApplicationId (com.yahoo.config.provision.ApplicationId)2 ApplicationInstanceReference (com.yahoo.vespa.applicationmodel.ApplicationInstanceReference)2 ServiceInstance (com.yahoo.vespa.applicationmodel.ServiceInstance)2 ServiceStatus (com.yahoo.vespa.applicationmodel.ServiceStatus)2 HostNameNotFoundException (com.yahoo.vespa.orchestrator.HostNameNotFoundException)2 ApplicationInstanceStatus (com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus)2 MutableStatusRegistry (com.yahoo.vespa.orchestrator.status.MutableStatusRegistry)2 List (java.util.List)2 Collectors (java.util.stream.Collectors)2 Inject (com.google.inject.Inject)1 Version (com.yahoo.component.Version)1 Metric (com.yahoo.jdisc.Metric)1 LogLevel (com.yahoo.log.LogLevel)1 ClusterId (com.yahoo.vespa.applicationmodel.ClusterId)1 ServiceCluster (com.yahoo.vespa.applicationmodel.ServiceCluster)1 ServiceType (com.yahoo.vespa.applicationmodel.ServiceType)1 Allocation (com.yahoo.vespa.hosted.provision.node.Allocation)1