Search in sources :

Example 1 with ContainerNodeSpec

use of com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec in project vespa by vespa-engine.

the class NodeAgentImpl method updateContainerNodeMetrics.

@SuppressWarnings("unchecked")
public void updateContainerNodeMetrics() {
    final ContainerNodeSpec nodeSpec = lastNodeSpec;
    if (nodeSpec == null || containerState != UNKNOWN)
        return;
    Optional<Docker.ContainerStats> containerStats = dockerOperations.getContainerStats(containerName);
    if (!containerStats.isPresent())
        return;
    Dimensions.Builder dimensionsBuilder = new Dimensions.Builder().add("host", hostname).add("role", "tenants").add("state", nodeSpec.nodeState.toString()).add("parentHostname", environment.getParentHostHostname());
    nodeSpec.allowedToBeDown.ifPresent(allowed -> dimensionsBuilder.add("orchestratorState", allowed ? "ALLOWED_TO_BE_DOWN" : "NO_REMARKS"));
    Dimensions dimensions = dimensionsBuilder.build();
    Docker.ContainerStats stats = containerStats.get();
    final String APP = MetricReceiverWrapper.APPLICATION_NODE;
    final int totalNumCpuCores = ((List<Number>) ((Map) stats.getCpuStats().get("cpu_usage")).get("percpu_usage")).size();
    final long cpuContainerKernelTime = ((Number) ((Map) stats.getCpuStats().get("cpu_usage")).get("usage_in_kernelmode")).longValue();
    final long cpuContainerTotalTime = ((Number) ((Map) stats.getCpuStats().get("cpu_usage")).get("total_usage")).longValue();
    final long cpuSystemTotalTime = ((Number) stats.getCpuStats().get("system_cpu_usage")).longValue();
    final long memoryTotalBytes = ((Number) stats.getMemoryStats().get("limit")).longValue();
    final long memoryTotalBytesUsage = ((Number) stats.getMemoryStats().get("usage")).longValue();
    final long memoryTotalBytesCache = ((Number) ((Map) stats.getMemoryStats().get("stats")).get("cache")).longValue();
    final long diskTotalBytes = (long) (nodeSpec.minDiskAvailableGb * BYTES_IN_GB);
    final Optional<Long> diskTotalBytesUsed = storageMaintainer.getDiskUsageFor(containerName);
    lastCpuMetric.updateCpuDeltas(cpuSystemTotalTime, cpuContainerTotalTime, cpuContainerKernelTime);
    // Ratio of CPU cores allocated to this container to total number of CPU cores on this host
    final double allocatedCpuRatio = nodeSpec.minCpuCores / totalNumCpuCores;
    double cpuUsageRatioOfAllocated = lastCpuMetric.getCpuUsageRatio() / allocatedCpuRatio;
    double cpuKernelUsageRatioOfAllocated = lastCpuMetric.getCpuKernelUsageRatio() / allocatedCpuRatio;
    long memoryTotalBytesUsed = memoryTotalBytesUsage - memoryTotalBytesCache;
    double memoryUsageRatio = (double) memoryTotalBytesUsed / memoryTotalBytes;
    Optional<Double> diskUsageRatio = diskTotalBytesUsed.map(used -> (double) used / diskTotalBytes);
    List<DimensionMetrics> metrics = new ArrayList<>();
    DimensionMetrics.Builder systemMetricsBuilder = new DimensionMetrics.Builder(APP, dimensions).withMetric("mem.limit", memoryTotalBytes).withMetric("mem.used", memoryTotalBytesUsed).withMetric("mem.util", 100 * memoryUsageRatio).withMetric("cpu.util", 100 * cpuUsageRatioOfAllocated).withMetric("cpu.sys.util", 100 * cpuKernelUsageRatioOfAllocated).withMetric("disk.limit", diskTotalBytes);
    diskTotalBytesUsed.ifPresent(diskUsed -> systemMetricsBuilder.withMetric("disk.used", diskUsed));
    diskUsageRatio.ifPresent(diskRatio -> systemMetricsBuilder.withMetric("disk.util", 100 * diskRatio));
    metrics.add(systemMetricsBuilder.build());
    stats.getNetworks().forEach((interfaceName, interfaceStats) -> {
        Dimensions netDims = dimensionsBuilder.add("interface", interfaceName).build();
        Map<String, Number> infStats = (Map<String, Number>) interfaceStats;
        DimensionMetrics networkMetrics = new DimensionMetrics.Builder(APP, netDims).withMetric("net.in.bytes", infStats.get("rx_bytes").longValue()).withMetric("net.in.errors", infStats.get("rx_errors").longValue()).withMetric("net.in.dropped", infStats.get("rx_dropped").longValue()).withMetric("net.out.bytes", infStats.get("tx_bytes").longValue()).withMetric("net.out.errors", infStats.get("tx_errors").longValue()).withMetric("net.out.dropped", infStats.get("tx_dropped").longValue()).build();
        metrics.add(networkMetrics);
    });
    pushMetricsToContainer(metrics);
}
Also used : ArrayList(java.util.ArrayList) Dimensions(com.yahoo.vespa.hosted.dockerapi.metrics.Dimensions) DimensionMetrics(com.yahoo.vespa.hosted.dockerapi.metrics.DimensionMetrics) Docker(com.yahoo.vespa.hosted.dockerapi.Docker) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) ContainerNodeSpec(com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec)

Example 2 with ContainerNodeSpec

use of com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec in project vespa by vespa-engine.

the class RealNodeRepository method getContainersToRun.

@Override
public List<ContainerNodeSpec> getContainersToRun(String baseHostName) {
    final GetNodesResponse nodesForHost = configServerApi.get("/nodes/v2/node/?parentHost=" + baseHostName + "&recursive=true", GetNodesResponse.class);
    List<ContainerNodeSpec> nodes = new ArrayList<>(nodesForHost.nodes.size());
    for (GetNodesResponse.Node node : nodesForHost.nodes) {
        ContainerNodeSpec nodeSpec;
        try {
            nodeSpec = createContainerNodeSpec(node);
        } catch (IllegalArgumentException | NullPointerException e) {
            NODE_ADMIN_LOGGER.warning("Bad node received from node repo when requesting children of the " + baseHostName + " host: " + node, e);
            continue;
        }
        nodes.add(nodeSpec);
    }
    return nodes;
}
Also used : ArrayList(java.util.ArrayList) GetNodesResponse(com.yahoo.vespa.hosted.node.admin.configserver.noderepository.bindings.GetNodesResponse) ContainerNodeSpec(com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec)

Example 3 with ContainerNodeSpec

use of com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec in project vespa by vespa-engine.

the class StorageMaintainerTest method testMaintenanceThrottlingAfterSuccessfulMaintenance.

@Test
public void testMaintenanceThrottlingAfterSuccessfulMaintenance() {
    String hostname = "node-123.us-north-3.test.yahoo.com";
    ContainerName containerName = ContainerName.fromHostname(hostname);
    ContainerNodeSpec nodeSpec = new ContainerNodeSpec.Builder().hostname(hostname).nodeState(Node.State.ready).nodeType("tenants").nodeFlavor("docker").minCpuCores(1).minMainMemoryAvailableGb(1).minDiskAvailableGb(1).build();
    try {
        when(processExecuter.exec(any(String[].class))).thenReturn(new Pair<>(0, ""));
    } catch (IOException ignored) {
    }
    storageMaintainer.removeOldFilesFromNode(containerName);
    verifyProcessExecuterCalled(1);
    // Will not actually run maintenance job until an hour passes
    storageMaintainer.removeOldFilesFromNode(containerName);
    verifyProcessExecuterCalled(1);
    // Coredump handler has its own throttler
    storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
    verifyProcessExecuterCalled(2);
    clock.advance(Duration.ofMinutes(61));
    storageMaintainer.removeOldFilesFromNode(containerName);
    verifyProcessExecuterCalled(3);
    storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
    verifyProcessExecuterCalled(4);
    storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
    verifyProcessExecuterCalled(4);
    storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, true);
    verifyProcessExecuterCalled(5);
    storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, true);
    verifyProcessExecuterCalled(6);
    storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
    verifyProcessExecuterCalled(6);
    // cleanupNodeStorage is unthrottled and it should reset previous times
    storageMaintainer.cleanupNodeStorage(containerName, nodeSpec);
    verifyProcessExecuterCalled(7);
    storageMaintainer.cleanupNodeStorage(containerName, nodeSpec);
    verifyProcessExecuterCalled(8);
    storageMaintainer.handleCoreDumpsForContainer(containerName, nodeSpec, false);
    verifyProcessExecuterCalled(9);
}
Also used : ContainerName(com.yahoo.vespa.hosted.dockerapi.ContainerName) IOException(java.io.IOException) ContainerNodeSpec(com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec) Test(org.junit.Test)

Example 4 with ContainerNodeSpec

use of com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec in project vespa by vespa-engine.

the class NodeAgentImplTest method testRestartDeadContainerAfterNodeAdminRestart.

@Test
public void testRestartDeadContainerAfterNodeAdminRestart() throws IOException {
    final ContainerNodeSpec nodeSpec = nodeSpecBuilder.currentDockerImage(dockerImage).wantedDockerImage(dockerImage).nodeState(Node.State.active).vespaVersion(vespaVersion).build();
    NodeAgentImpl nodeAgent = makeNodeAgent(dockerImage, false);
    when(nodeRepository.getContainerNodeSpec(eq(hostName))).thenReturn(Optional.of(nodeSpec));
    when(pathResolver.getApplicationStoragePathForNodeAdmin()).thenReturn(Files.createTempDirectory("foo"));
    when(pathResolver.getApplicationStoragePathForHost()).thenReturn(Files.createTempDirectory("bar"));
    when(storageMaintainer.getDiskUsageFor(eq(containerName))).thenReturn(Optional.of(201326592000L));
    nodeAgent.tick();
    verify(dockerOperations, times(1)).removeContainer(any(), any());
    verify(dockerOperations, times(1)).createContainer(eq(containerName), eq(nodeSpec));
    verify(dockerOperations, times(1)).startContainer(eq(containerName), eq(nodeSpec));
}
Also used : ContainerNodeSpec(com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec) Test(org.junit.Test)

Example 5 with ContainerNodeSpec

use of com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec in project vespa by vespa-engine.

the class NodeAgentImplTest method testGetRelevantMetricsForReadyNode.

@Test
public void testGetRelevantMetricsForReadyNode() {
    final ContainerNodeSpec nodeSpec = nodeSpecBuilder.nodeState(Node.State.ready).build();
    NodeAgentImpl nodeAgent = makeNodeAgent(null, false);
    when(nodeRepository.getContainerNodeSpec(eq(hostName))).thenReturn(Optional.of(nodeSpec));
    when(dockerOperations.getContainerStats(eq(containerName))).thenReturn(Optional.empty());
    // Run the converge loop once to initialize lastNodeSpec
    nodeAgent.converge();
    nodeAgent.updateContainerNodeMetrics();
    Set<Map<String, Object>> actualMetrics = metricReceiver.getAllMetricsRaw();
    assertEquals(Collections.emptySet(), actualMetrics);
}
Also used : Map(java.util.Map) ContainerNodeSpec(com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec) Test(org.junit.Test)

Aggregations

ContainerNodeSpec (com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec)30 Test (org.junit.Test)22 InOrder (org.mockito.InOrder)8 ContainerName (com.yahoo.vespa.hosted.dockerapi.ContainerName)7 DockerImage (com.yahoo.vespa.hosted.dockerapi.DockerImage)7 Docker (com.yahoo.vespa.hosted.dockerapi.Docker)4 DockerException (com.yahoo.vespa.hosted.dockerapi.DockerException)4 ArrayList (java.util.ArrayList)4 DimensionMetrics (com.yahoo.vespa.hosted.dockerapi.metrics.DimensionMetrics)3 Dimensions (com.yahoo.vespa.hosted.dockerapi.metrics.Dimensions)3 Node (com.yahoo.vespa.hosted.provision.Node)3 IOException (java.io.IOException)3 LinkedHashMap (java.util.LinkedHashMap)3 Map (java.util.Map)3 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)2 ThreadFactoryFactory (com.yahoo.concurrent.ThreadFactoryFactory)2 NodeType (com.yahoo.config.provision.NodeType)2 Container (com.yahoo.vespa.hosted.dockerapi.Container)2 ContainerResources (com.yahoo.vespa.hosted.dockerapi.ContainerResources)2 DockerExecTimeoutException (com.yahoo.vespa.hosted.dockerapi.DockerExecTimeoutException)2