use of com.yahoo.vespa.hosted.provision.maintenance.JobControl in project vespa by vespa-engine.
the class MetricsReporterTest method test_registered_metric.
@Test
public void test_registered_metric() throws Exception {
NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default");
Curator curator = new MockCurator();
NodeRepository nodeRepository = new NodeRepository(nodeFlavors, curator, Clock.systemUTC(), Zone.defaultZone(), new MockNameResolver().mockAnyLookup(), new DockerImage("docker-registry.domain.tld:8080/dist/vespa"), true);
Node node = nodeRepository.createNode("openStackId", "hostname", Optional.empty(), nodeFlavors.getFlavorOrThrow("default"), NodeType.tenant);
nodeRepository.addNodes(Collections.singletonList(node));
Node hostNode = nodeRepository.createNode("openStackId2", "parent", Optional.empty(), nodeFlavors.getFlavorOrThrow("default"), NodeType.proxy);
nodeRepository.addNodes(Collections.singletonList(hostNode));
Map<String, Number> expectedMetrics = new HashMap<>();
expectedMetrics.put("hostedVespa.provisionedHosts", 1L);
expectedMetrics.put("hostedVespa.parkedHosts", 0L);
expectedMetrics.put("hostedVespa.readyHosts", 0L);
expectedMetrics.put("hostedVespa.reservedHosts", 0L);
expectedMetrics.put("hostedVespa.activeHosts", 0L);
expectedMetrics.put("hostedVespa.inactiveHosts", 0L);
expectedMetrics.put("hostedVespa.dirtyHosts", 0L);
expectedMetrics.put("hostedVespa.failedHosts", 0L);
expectedMetrics.put("hostedVespa.docker.totalCapacityDisk", 0.0);
expectedMetrics.put("hostedVespa.docker.totalCapacityMem", 0.0);
expectedMetrics.put("hostedVespa.docker.totalCapacityCpu", 0.0);
expectedMetrics.put("hostedVespa.docker.freeCapacityDisk", 0.0);
expectedMetrics.put("hostedVespa.docker.freeCapacityMem", 0.0);
expectedMetrics.put("hostedVespa.docker.freeCapacityCpu", 0.0);
expectedMetrics.put("wantedRebootGeneration", 0L);
expectedMetrics.put("currentRebootGeneration", 0L);
expectedMetrics.put("wantToReboot", 0);
expectedMetrics.put("wantToRetire", 0);
expectedMetrics.put("wantToDeprovision", 0);
expectedMetrics.put("hardwareFailure", 0);
expectedMetrics.put("hardwareDivergence", 0);
expectedMetrics.put("allowedToBeDown", 0);
expectedMetrics.put("numberOfServices", 0L);
Orchestrator orchestrator = mock(Orchestrator.class);
ServiceMonitor serviceMonitor = mock(ServiceMonitor.class);
when(orchestrator.getNodeStatus(any())).thenReturn(HostStatus.NO_REMARKS);
ServiceModel serviceModel = mock(ServiceModel.class);
when(serviceMonitor.getServiceModelSnapshot()).thenReturn(serviceModel);
when(serviceModel.getServiceInstancesByHostName()).thenReturn(Collections.emptyMap());
TestMetric metric = new TestMetric();
MetricsReporter metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, Duration.ofMinutes(1), new JobControl(nodeRepository.database()));
metricsReporter.maintain();
assertEquals(expectedMetrics, metric.values);
}
use of com.yahoo.vespa.hosted.provision.maintenance.JobControl in project vespa by vespa-engine.
the class MultigroupProvisioningTest method test_provisioning_of_multiple_groups_after_flavor_migration_and_exiration.
@Test
public void test_provisioning_of_multiple_groups_after_flavor_migration_and_exiration() {
ProvisioningTester tester = new ProvisioningTester(new Zone(Environment.prod, RegionName.from("us-east")));
ApplicationId application1 = tester.makeApplicationId();
tester.makeReadyNodes(10, "small");
tester.makeReadyNodes(10, "large");
deploy(application1, 8, 1, "small", tester);
deploy(application1, 8, 1, "large", tester);
// Expire small nodes
tester.advanceTime(Duration.ofDays(7));
MockDeployer deployer = new MockDeployer(tester.provisioner(), Collections.singletonMap(application1, new MockDeployer.ApplicationContext(application1, cluster(), Capacity.fromNodeCount(8, Optional.of("large"), false), 1)));
new RetiredExpirer(tester.nodeRepository(), tester.orchestrator(), deployer, tester.clock(), Duration.ofDays(30), Duration.ofHours(12), new JobControl(tester.nodeRepository().database())).run();
assertEquals(8, tester.getNodes(application1, Node.State.inactive).flavor("small").size());
deploy(application1, 8, 8, "large", tester);
}
use of com.yahoo.vespa.hosted.provision.maintenance.JobControl in project vespa by vespa-engine.
the class NodeTypeProvisioningTest method retire_proxy.
@Test
public void retire_proxy() {
MockDeployer deployer = new MockDeployer(tester.provisioner(), Collections.singletonMap(application, new MockDeployer.ApplicationContext(application, clusterSpec, capacity, 1)));
RetiredExpirer retiredExpirer = new RetiredExpirer(tester.nodeRepository(), tester.orchestrator(), deployer, tester.clock(), Duration.ofDays(30), Duration.ofMinutes(10), new JobControl(tester.nodeRepository().database()));
{
// Deploy
List<HostSpec> hosts = deployProxies(application, tester);
assertEquals("Reserved all proxies", 11, hosts.size());
tester.activate(application, new HashSet<>(hosts));
List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
assertEquals("Activated all proxies", 11, nodes.size());
}
Node nodeToRetire = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active).get(5);
{
// Pick out a node and retire it
tester.nodeRepository().write(nodeToRetire.with(nodeToRetire.status().withWantToRetire(true)));
List<HostSpec> hosts = deployProxies(application, tester);
assertEquals(11, hosts.size());
tester.activate(application, new HashSet<>(hosts));
List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
assertEquals(11, nodes.size());
// Verify that wantToRetire has been propagated
assertTrue(tester.nodeRepository().getNode(nodeToRetire.hostname()).flatMap(Node::allocation).map(allocation -> allocation.membership().retired()).orElseThrow(RuntimeException::new));
}
{
// Redeploying while the node is still retiring has no effect
List<HostSpec> hosts = deployProxies(application, tester);
assertEquals(11, hosts.size());
tester.activate(application, new HashSet<>(hosts));
List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
assertEquals(11, nodes.size());
// Verify that the node is still marked as retired
assertTrue(tester.nodeRepository().getNode(nodeToRetire.hostname()).flatMap(Node::allocation).map(allocation -> allocation.membership().retired()).orElseThrow(RuntimeException::new));
}
{
tester.advanceTime(Duration.ofMinutes(11));
retiredExpirer.run();
List<HostSpec> hosts = deployProxies(application, tester);
assertEquals(10, hosts.size());
tester.activate(application, new HashSet<>(hosts));
List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
assertEquals(10, nodes.size());
// Verify that the node is now inactive
assertEquals(Node.State.inactive, tester.nodeRepository().getNode(nodeToRetire.hostname()).orElseThrow(RuntimeException::new).state());
}
}
use of com.yahoo.vespa.hosted.provision.maintenance.JobControl in project vespa by vespa-engine.
the class ProvisioningTest method application_deployment_extends_existing_reservations_on_deploy.
@Test
public void application_deployment_extends_existing_reservations_on_deploy() {
ProvisioningTester tester = new ProvisioningTester(new Zone(Environment.prod, RegionName.from("us-east")));
ApplicationId application = tester.makeApplicationId();
tester.makeReadyNodes(2, "default");
// Deploy fails with out of capacity
try {
prepare(application, 2, 0, 2, 0, "default", tester);
fail("Expected exception");
} catch (OutOfCapacityException ignored) {
}
assertEquals("Reserved a subset of required nodes", 2, tester.getNodes(application, Node.State.reserved).size());
// Enough nodes become available
tester.makeReadyNodes(2, "default");
// Deploy is retried after a few minutes
tester.clock().advance(Duration.ofMinutes(2));
SystemState state = prepare(application, 2, 0, 2, 0, "default", tester);
List<Node> reserved = tester.getNodes(application, Node.State.reserved).asList();
assertEquals("Reserved required nodes", 4, reserved.size());
assertTrue("Time of event is updated for all nodes", reserved.stream().allMatch(n -> n.history().event(History.Event.Type.reserved).get().at().equals(tester.clock().instant())));
// Over 10 minutes pass since first reservation. First set of reserved nodes are not expired
tester.clock().advance(Duration.ofMinutes(8).plus(Duration.ofSeconds(1)));
ReservationExpirer expirer = new ReservationExpirer(tester.nodeRepository(), tester.clock(), Duration.ofMinutes(10), new JobControl(tester.nodeRepository().database()));
expirer.run();
assertEquals("Nodes remain reserved", 4, tester.getNodes(application, Node.State.reserved).size());
tester.activate(application, state.allHosts);
assertEquals(4, tester.getNodes(application, Node.State.active).size());
}
use of com.yahoo.vespa.hosted.provision.maintenance.JobControl in project vespa by vespa-engine.
the class NodeTypeProvisioningTest method retire_multiple_proxy_simultaneously.
@Test
public void retire_multiple_proxy_simultaneously() {
MockDeployer deployer = new MockDeployer(tester.provisioner(), Collections.singletonMap(application, new MockDeployer.ApplicationContext(application, clusterSpec, capacity, 1)));
RetiredExpirer retiredExpirer = new RetiredExpirer(tester.nodeRepository(), tester.orchestrator(), deployer, tester.clock(), Duration.ofDays(30), Duration.ofMinutes(10), new JobControl(tester.nodeRepository().database()));
final int numNodesToRetire = 5;
{
// Deploy
List<HostSpec> hosts = deployProxies(application, tester);
assertEquals("Reserved all proxies", 11, hosts.size());
tester.activate(application, new HashSet<>(hosts));
List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
assertEquals("Activated all proxies", 11, nodes.size());
}
List<Node> nodesToRetire = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active).subList(3, 3 + numNodesToRetire);
String currentyRetiringHostname;
{
nodesToRetire.forEach(nodeToRetire -> tester.nodeRepository().write(nodeToRetire.with(nodeToRetire.status().withWantToRetire(true))));
List<HostSpec> hosts = deployProxies(application, tester);
assertEquals(11, hosts.size());
tester.activate(application, new HashSet<>(hosts));
List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
assertEquals(11, nodes.size());
// Verify that wantToRetire has been propagated
List<Node> nodesCurrentlyRetiring = nodes.stream().filter(node -> node.allocation().get().membership().retired()).collect(Collectors.toList());
assertEquals(1, nodesCurrentlyRetiring.size());
// The retiring node should be one of the nodes we marked for retirement
currentyRetiringHostname = nodesCurrentlyRetiring.get(0).hostname();
assertTrue(nodesToRetire.stream().map(Node::hostname).filter(hostname -> hostname.equals(currentyRetiringHostname)).count() == 1);
}
{
// Redeploying while the node is still retiring has no effect
List<HostSpec> hosts = deployProxies(application, tester);
assertEquals(11, hosts.size());
tester.activate(application, new HashSet<>(hosts));
List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
assertEquals(11, nodes.size());
// Verify that wantToRetire has been propagated
List<Node> nodesCurrentlyRetiring = nodes.stream().filter(node -> node.allocation().get().membership().retired()).collect(Collectors.toList());
assertEquals(1, nodesCurrentlyRetiring.size());
// The node that started retiring is still the only one retiring
assertEquals(currentyRetiringHostname, nodesCurrentlyRetiring.get(0).hostname());
}
{
tester.advanceTime(Duration.ofMinutes(11));
retiredExpirer.run();
List<HostSpec> hosts = deployProxies(application, tester);
assertEquals(10, hosts.size());
tester.activate(application, new HashSet<>(hosts));
List<Node> nodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active);
assertEquals(10, nodes.size());
// Verify the node we previously set to retire has finished retiring
assertEquals(Node.State.inactive, tester.nodeRepository().getNode(currentyRetiringHostname).orElseThrow(RuntimeException::new).state());
// Verify that a node is currently retiring
List<Node> nodesCurrentlyRetiring = nodes.stream().filter(node -> node.allocation().get().membership().retired()).collect(Collectors.toList());
assertEquals(1, nodesCurrentlyRetiring.size());
// This node is different from the one that was retiring previously
String newRetiringHostname = nodesCurrentlyRetiring.get(0).hostname();
assertNotEquals(currentyRetiringHostname, newRetiringHostname);
// ... but is one of the nodes that were put to wantToRetire earlier
assertTrue(nodesToRetire.stream().map(Node::hostname).filter(hostname -> hostname.equals(newRetiringHostname)).count() == 1);
}
for (int i = 0; i < 10; i++) {
tester.advanceTime(Duration.ofMinutes(11));
retiredExpirer.run();
List<HostSpec> hosts = deployProxies(application, tester);
tester.activate(application, new HashSet<>(hosts));
}
// After a long time, all currently active proxy nodes are not marked with wantToRetire or as retired
long numRetiredActiveProxyNodes = tester.nodeRepository().getNodes(NodeType.proxy, Node.State.active).stream().filter(node -> !node.status().wantToRetire()).filter(node -> !node.allocation().get().membership().retired()).count();
assertEquals(11 - numNodesToRetire, numRetiredActiveProxyNodes);
// All the nodes that were marked with wantToRetire earlier are now inactive
assertEquals(nodesToRetire.stream().map(Node::hostname).collect(Collectors.toSet()), tester.nodeRepository().getNodes(Node.State.inactive).stream().map(Node::hostname).collect(Collectors.toSet()));
}
Aggregations