use of com.spotify.helios.agent.AgentMain in project helios by spotify.
the class MultiplePortJobTest method test.
@Test
public void test() throws Exception {
startDefaultMaster();
// 'Reserve' a 2 port wide range of ports.
final Range<Integer> portRange = temporaryPorts.localPortRange("agent1", 2);
// Start an agent using the aforementioned 2 port wide range.
final AgentMain agent1 = startDefaultAgent(testHost(), "--port-range=" + portRange.lowerEndpoint() + ":" + portRange.upperEndpoint());
try (final DockerClient dockerClient = getNewDockerClient()) {
final HeliosClient client = defaultClient();
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// foo is a mapping of 4711 -> A port dynamically allocated by the agent's PortAllocator
// bar is a mapping of 4712 -> A static port randomly selected by temporaryPorts
final Map<String, PortMapping> ports1 = ImmutableMap.of("foo", PortMapping.of(4711), "bar", staticMapping1);
// foo is a mapping of 4711 -> A port dynamically allocated by the agent's PortAllocator
// bar is a mapping of 4712 -> A static port randomly selected by temporaryPorts
final Map<String, PortMapping> ports2 = ImmutableMap.of("foo", PortMapping.of(4711), "bar", staticMapping2);
final JobId jobId1 = createJob(testJobName + 1, testJobVersion, BUSYBOX, IDLE_COMMAND, EMPTY_ENV, ports1);
assertNotNull(jobId1);
deployJob(jobId1, testHost());
final TaskStatus firstTaskStatus1 = awaitJobState(client, testHost(), jobId1, RUNNING, LONG_WAIT_SECONDS, SECONDS);
final JobId jobId2 = createJob(testJobName + 2, testJobVersion, BUSYBOX, IDLE_COMMAND, EMPTY_ENV, ports2);
assertNotNull(jobId2);
deployJob(jobId2, testHost());
final TaskStatus firstTaskStatus2 = awaitJobState(client, testHost(), jobId2, RUNNING, LONG_WAIT_SECONDS, SECONDS);
// Verify we allocated dynamic ports from within the specified range.
assertTrue(portRange.contains(firstTaskStatus1.getPorts().get("foo").getExternalPort()));
assertTrue(portRange.contains(firstTaskStatus2.getPorts().get("foo").getExternalPort()));
// Verify we allocated the static ports we asked for.
assertEquals(staticMapping1, firstTaskStatus1.getPorts().get("bar"));
assertEquals(staticMapping2, firstTaskStatus2.getPorts().get("bar"));
// Verify we didn't allocate the same dynamic port to both jobs.
assertNotEquals(firstTaskStatus1.getPorts().get("foo"), firstTaskStatus2.getPorts().get("foo"));
// TODO (dano): the supervisor should report the allocated ports at all times
// Verify that port allocation is kept across container restarts
dockerClient.killContainer(firstTaskStatus1.getContainerId());
final TaskStatus restartedTaskStatus1 = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {
@Override
public TaskStatus call() throws Exception {
final HostStatus hostStatus = client.hostStatus(testHost()).get();
final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId1);
return (taskStatus != null && taskStatus.getState() == RUNNING && !Objects.equals(taskStatus.getContainerId(), firstTaskStatus1.getContainerId())) ? taskStatus : null;
}
});
assertEquals(firstTaskStatus1.getPorts(), restartedTaskStatus1.getPorts());
// Verify that port allocation is kept across agent restarts
agent1.stopAsync().awaitTerminated();
dockerClient.killContainer(firstTaskStatus2.getContainerId());
startDefaultAgent(testHost());
final TaskStatus restartedTaskStatus2 = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {
@Override
public TaskStatus call() throws Exception {
final HostStatus hostStatus = client.hostStatus(testHost()).get();
final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId2);
return (taskStatus != null && taskStatus.getState() == RUNNING && !Objects.equals(taskStatus.getContainerId(), firstTaskStatus2.getContainerId())) ? taskStatus : null;
}
});
assertEquals(firstTaskStatus2.getPorts(), restartedTaskStatus2.getPorts());
}
}
use of com.spotify.helios.agent.AgentMain in project helios by spotify.
the class SystemTestBase method startAgent.
protected AgentMain startAgent(final String... args) throws Exception {
final AgentMain main = new AgentMain(args);
main.startAsync().awaitRunning();
services.add(main);
return main;
}
use of com.spotify.helios.agent.AgentMain in project helios by spotify.
the class ZooKeeperRestoreTest method verifyAgentPushesTaskStateAfterRestore.
@Test
public void verifyAgentPushesTaskStateAfterRestore() throws Exception {
// Start agent once to have it register
final AgentMain agent1 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
agent1.stopAsync().awaitTerminated();
// Deploy job
final Deployment deployment = Deployment.of(fooJob.getId(), START);
final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
// Back up zk
zkc.backup(backupDir);
// Start agent
startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for agent to indicate that job is running
awaitJobState(client, testHost(), fooJob.getId(), RUNNING, LONG_WAIT_SECONDS, SECONDS);
// Restore zk, erasing task state
zkc.stop();
zkc.restore(backupDir);
zkc.start();
// Wait for agent to again indicate that job is running
awaitJobState(client, testHost(), fooJob.getId(), RUNNING, LONG_WAIT_SECONDS, SECONDS);
// Remove task status
zkc.curatorWithSuperAuth().delete().forPath(Paths.statusHostJob(testHost(), fooJob.getId()));
}
use of com.spotify.helios.agent.AgentMain in project helios by spotify.
the class DeregisterTest method testDeregister.
@Test
public void testDeregister() throws Exception {
startDefaultMaster();
final String host = testHost();
final AgentMain agent = startDefaultAgent(host);
final HeliosClient client = defaultClient();
// Create a job
final Job job = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(IDLE_COMMAND).setPorts(ImmutableMap.of("foo", PortMapping.of(4711), "bar", PortMapping.of(4712, ports.localPort("bar")))).build();
final JobId jobId = job.getId();
final CreateJobResponse created = client.createJob(job).get();
assertEquals(CreateJobResponse.Status.OK, created.getStatus());
// Wait for agent to come up
awaitHostRegistered(client, host, LONG_WAIT_SECONDS, SECONDS);
awaitHostStatus(client, host, UP, LONG_WAIT_SECONDS, SECONDS);
// Deploy the job on the agent
final Deployment deployment = Deployment.of(jobId, START);
final JobDeployResponse deployed = client.deploy(deployment, host).get();
assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
// Wait for the job to run
awaitJobState(client, host, jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
// Kill off agent
agent.stopAsync().awaitTerminated();
// Deregister agent
final HostDeregisterResponse deregisterResponse = client.deregisterHost(host).get();
assertEquals(HostDeregisterResponse.Status.OK, deregisterResponse.getStatus());
// Verify that it's possible to remove the job
final JobDeleteResponse deleteResponse = client.deleteJob(jobId).get();
assertEquals(JobDeleteResponse.Status.OK, deleteResponse.getStatus());
}
use of com.spotify.helios.agent.AgentMain in project helios by spotify.
the class AgentRestartTest method test.
@Test
public void test() throws Exception {
startDefaultMaster();
final DockerClient dockerClient = getNewDockerClient();
final HeliosClient client = defaultClient();
final AgentMain agent1 = startDefaultAgent(testHost());
// Create a job
final Job job = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(IDLE_COMMAND).setCreatingUser(TEST_USER).build();
final JobId jobId = job.getId();
final CreateJobResponse created = client.createJob(job).get();
assertEquals(CreateJobResponse.Status.OK, created.getStatus());
// Wait for agent to come up
awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Deploy the job on the agent
final Deployment deployment = Deployment.of(jobId, START);
final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
// Wait for the job to run
final TaskStatus firstTaskStatus = awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
assertJobEquals(job, firstTaskStatus.getJob());
assertEquals(1, listContainers(dockerClient, testTag).size());
assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
// Stop the agent
agent1.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Start the agent again
final AgentMain agent2 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for a while and make sure that the same container is still running
Thread.sleep(5000);
final HostStatus hostStatus = client.hostStatus(testHost()).get();
final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
if (firstTaskStatus.getState() == PULLING_IMAGE) {
final State state = taskStatus.getState();
assertTrue(state == RUNNING || state == PULLING_IMAGE);
} else {
assertEquals(RUNNING, taskStatus.getState());
}
assertEquals(firstTaskStatus.getContainerId(), taskStatus.getContainerId());
assertEquals(1, listContainers(dockerClient, testTag).size());
assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
// Stop the agent
agent2.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Kill the container
dockerClient.killContainer(firstTaskStatus.getContainerId());
assertEquals(0, listContainers(dockerClient, testTag).size());
// Start the agent again
final AgentMain agent3 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for the job to be restarted in a new container
final TaskStatus secondTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {
@Override
public TaskStatus call() throws Exception {
final HostStatus hostStatus = client.hostStatus(testHost()).get();
final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(firstTaskStatus.getContainerId())) ? taskStatus : null;
}
});
assertEquals(1, listContainers(dockerClient, testTag).size());
assertTrue(dockerClient.inspectContainer(secondTaskStatus.getContainerId()).state().running());
// Stop the agent
agent3.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Kill and destroy the container
dockerClient.killContainer(secondTaskStatus.getContainerId());
removeContainer(dockerClient, secondTaskStatus.getContainerId());
// Start the agent again
final AgentMain agent4 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for the task to be restarted in a new container
final TaskStatus thirdTaskStatus = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {
@Override
public TaskStatus call() throws Exception {
final HostStatus hostStatus = client.hostStatus(testHost()).get();
final TaskStatus taskStatus = hostStatus.getStatuses().get(jobId);
return (taskStatus != null && taskStatus.getContainerId() != null && taskStatus.getState() == RUNNING && !taskStatus.getContainerId().equals(secondTaskStatus.getContainerId())) ? taskStatus : null;
}
});
assertEquals(1, listContainers(dockerClient, testTag).size());
assertTrue(dockerClient.inspectContainer(thirdTaskStatus.getContainerId()).state().running());
// Stop the agent
agent4.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Stop the job
final SetGoalResponse stopped = client.setGoal(Deployment.of(jobId, STOP), testHost()).get();
assertEquals(SetGoalResponse.Status.OK, stopped.getStatus());
// Start the agent again
final AgentMain agent5 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Verify that the task is stopped
awaitJobState(client, testHost(), jobId, STOPPED, LONG_WAIT_SECONDS, SECONDS);
assertEquals(0, listContainers(dockerClient, testTag).size());
// Stop the agent
agent5.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Start the job
final SetGoalResponse started = client.setGoal(Deployment.of(jobId, START), testHost()).get();
assertEquals(SetGoalResponse.Status.OK, started.getStatus());
// Start the agent again
final AgentMain agent6 = startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Verify that the task is started
awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
assertEquals(1, listContainers(dockerClient, testTag).size());
// Stop the agent
agent6.stopAsync().awaitTerminated();
awaitHostStatus(client, testHost(), DOWN, LONG_WAIT_SECONDS, SECONDS);
// Undeploy the job
final JobUndeployResponse undeployed = client.undeploy(jobId, testHost()).get();
assertEquals(JobUndeployResponse.Status.OK, undeployed.getStatus());
// Start the agent again
startDefaultAgent(testHost());
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for the task to get removed
awaitTaskGone(client, testHost(), jobId, LONG_WAIT_SECONDS, SECONDS);
assertEquals(0, listContainers(dockerClient, testTag).size());
}
Aggregations