use of com.spotify.helios.agent.AgentMain in project helios by spotify.
the class DeploymentGroupTest method testRollingUpdateCoordination.
@Test
public void testRollingUpdateCoordination() throws Exception {
// stop the default master
master.stopAsync().awaitTerminated();
// start a bunch of masters and agents
final Map<String, MasterMain> masters = startDefaultMasters(3);
final Map<String, AgentMain> agents = Maps.newLinkedHashMap();
for (int i = 0; i < 20; i++) {
final String name = TEST_HOST + i;
agents.put(name, startDefaultAgent(name, "--labels", TEST_LABEL));
}
// create a deployment group and start rolling out
cli("create-deployment-group", "--json", TEST_GROUP, TEST_LABEL);
final JobId jobId = createJob(testJobName, testJobVersion, BUSYBOX, IDLE_COMMAND);
cli("rolling-update", "--async", "--par", String.valueOf(agents.size()), testJobNameAndVersion, TEST_GROUP);
// wait until the task is running on the final agent
awaitTaskState(jobId, getLast(agents.keySet()), TaskStatus.State.RUNNING);
// ensure that all masters were involved
final Set<String> deployingMasters = Sets.newHashSet();
final Map<String, HostStatus> hostStatuses = defaultClient().hostStatuses(Lists.newArrayList(agents.keySet())).get();
for (final HostStatus status : hostStatuses.values()) {
for (final Deployment deployment : status.getJobs().values()) {
deployingMasters.add(deployment.getDeployerMaster());
}
}
assertEquals(masters.size(), deployingMasters.size());
}
use of com.spotify.helios.agent.AgentMain in project helios by spotify.
the class DeploymentGroupTest method testRemovingAgentTagUndeploysJob.
@Test
public void testRemovingAgentTagUndeploysJob() throws Exception {
final HeliosClient client = defaultClient();
final String oldHost = testHost();
final String deregisterHost = testHost() + "2";
final String unchangedHost = testHost() + "3";
final String newHost = testHost() + "4";
final String anotherNewHost = testHost() + "5";
@SuppressWarnings("VariableDeclarationUsageDistance") AgentMain oldAgent = startDefaultAgent(oldHost, "--labels", "foo=bar");
awaitUpWithLabels(oldHost, "foo", "bar");
final AgentMain deregisterAgent = startDefaultAgent(deregisterHost, "--labels", "foo=bar");
awaitUpWithLabels(deregisterHost, "foo", "bar");
startDefaultAgent(unchangedHost, "--labels", "foo=bar");
awaitUpWithLabels(unchangedHost, "foo", "bar");
cli("create-deployment-group", "--json", TEST_GROUP, "foo=bar");
final JobId jobId = createJob(testJobName, testJobVersion, BUSYBOX, IDLE_COMMAND);
cli("rolling-update", "--async", testJobNameAndVersion, TEST_GROUP);
awaitTaskState(jobId, oldHost, TaskStatus.State.RUNNING);
awaitTaskState(jobId, deregisterHost, TaskStatus.State.RUNNING);
awaitTaskState(jobId, unchangedHost, TaskStatus.State.RUNNING);
awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.DONE);
// Rollout should be complete and on its second iteration at this point.
// Start another agent and wait for it to have the job deployed to it.
startDefaultAgent(newHost, "--labels", "foo=bar");
awaitUpWithLabels(newHost, "foo", "bar");
awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.ROLLING_OUT);
awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.DONE);
awaitTaskState(jobId, newHost, TaskStatus.State.RUNNING);
// Restart the old agent with labels that still match the deployment group
// The job should not be undeployed.
stopAgent(oldAgent);
oldAgent = startDefaultAgent(oldHost, "--labels", "foo=bar", "another=label");
awaitUpWithLabels(oldHost, "foo", "bar", "another", "label");
awaitTaskState(jobId, oldHost, TaskStatus.State.RUNNING);
// Restart the old agent with labels that do not match the deployment group.
stopAgent(oldAgent);
oldAgent = startDefaultAgent(oldHost, "--labels", "foo=notbar");
awaitUpWithLabels(oldHost, "foo", "notbar");
// ...which should trigger a rolling update
awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.ROLLING_OUT);
// Start yet another agent in order to trigger another rolling update.
startDefaultAgent(anotherNewHost, "--labels", "foo=bar");
// Wait for the rolling update(s) to finish.
awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.ROLLING_OUT);
awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.DONE);
// ...which should remove the job.
awaitUndeployed(oldHost, jobId);
// Restart the old agent with labels that match the deployment group (again)
// The job should be deployed.
stopAgent(oldAgent);
startDefaultAgent(oldHost, "--labels", "foo=bar");
awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.ROLLING_OUT);
awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.DONE);
awaitTaskState(jobId, oldHost, TaskStatus.State.RUNNING);
// Deregister an agent
stopAgent(deregisterAgent);
final HostDeregisterResponse deregisterResponse = client.deregisterHost(deregisterHost).get();
assertEquals(HostDeregisterResponse.Status.OK, deregisterResponse.getStatus());
// Make sure we 'undeploy' from the now non-existent agent.
awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.ROLLING_OUT);
awaitDeploymentGroupStatus(client, TEST_GROUP, DeploymentGroupStatus.State.DONE);
}
use of com.spotify.helios.agent.AgentMain in project helios by spotify.
the class AgentZooKeeperDownTolerationTest method test.
@Test
public void test() throws Exception {
startDefaultMaster();
final DockerClient dockerClient = getNewDockerClient();
final HeliosClient client = defaultClient();
final AgentMain agent1 = startDefaultAgent(testHost());
// Create a job
final Job job = Job.newBuilder().setName(testJobName).setVersion(testJobVersion).setImage(BUSYBOX).setCommand(IDLE_COMMAND).setCreatingUser(TEST_USER).build();
final JobId jobId = job.getId();
final CreateJobResponse created = client.createJob(job).get();
assertEquals(CreateJobResponse.Status.OK, created.getStatus());
// Wait for agent to come up
awaitHostRegistered(client, testHost(), LONG_WAIT_SECONDS, SECONDS);
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Deploy the job on the agent
final Deployment deployment = Deployment.of(jobId, START);
final JobDeployResponse deployed = client.deploy(deployment, testHost()).get();
assertEquals(JobDeployResponse.Status.OK, deployed.getStatus());
// Wait for the job to run
final TaskStatus firstTaskStatus = awaitJobState(client, testHost(), jobId, RUNNING, LONG_WAIT_SECONDS, SECONDS);
assertJobEquals(job, firstTaskStatus.getJob());
assertNotNull(dockerClient.inspectContainer(firstTaskStatus.getContainerId()));
// Stop zookeeper
zk().stop();
// Wait for a while and make sure that the container is still running
Thread.sleep(5000);
assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
// Stop the agent
agent1.stopAsync().awaitTerminated();
// Start the agent again
final AgentMain agent2 = startDefaultAgent(testHost());
// Wait for a while and make sure that the same container is still running
Thread.sleep(5000);
assertTrue(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
// Kill the container
dockerClient.killContainer(firstTaskStatus.getContainerId());
assertFalse(dockerClient.inspectContainer(firstTaskStatus.getContainerId()).state().running());
// Wait for a while and make sure that a new container was spawned
final String firstRestartedContainerId = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<String>() {
@Override
public String call() throws Exception {
final List<Container> containers = listContainers(dockerClient, testTag);
return containers.size() == 1 ? containers.get(0).id() : null;
}
});
// Stop the agent
agent2.stopAsync().awaitTerminated();
// Kill the container
dockerClient.killContainer(firstRestartedContainerId);
assertFalse(dockerClient.inspectContainer(firstRestartedContainerId).state().running());
// Start the agent again
startDefaultAgent(testHost());
// Wait for a while and make sure that a new container was spawned
final String secondRestartedContainerId = Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<String>() {
@Override
public String call() throws Exception {
final List<Container> containers = listContainers(dockerClient, testTag);
return containers.size() == 1 ? containers.get(0).id() : null;
}
});
assertTrue(dockerClient.inspectContainer(secondRestartedContainerId).state().running());
// Start zookeeper
zk().start();
// Verify that the agent is listed as up
awaitHostStatus(client, testHost(), UP, LONG_WAIT_SECONDS, SECONDS);
// Wait for the new container id to be reflected in the task status
Polling.await(LONG_WAIT_SECONDS, SECONDS, new Callable<TaskStatus>() {
@Override
public TaskStatus call() throws Exception {
final JobStatus jobStatus = client.jobStatus(jobId).get();
final TaskStatus taskStatus = jobStatus.getTaskStatuses().get(testHost());
return taskStatus != null && Objects.equals(taskStatus.getContainerId(), secondRestartedContainerId) ? taskStatus : null;
}
});
}
Aggregations