use of com.spotify.helios.servicescommon.coordination.ZooKeeperOperation in project helios by spotify.
the class DeploymentGroupTest method testUpdateFailedManualDeploymentGroupHosts.
// A test that ensures deployment groups that failed during a manual rolling update will not
// perform a new rolling update if the hosts change.
@Test
public void testUpdateFailedManualDeploymentGroupHosts() throws Exception {
final ZooKeeperClient client = spy(this.client);
final ZooKeeperMasterModel masterModel = spy(newMasterModel(client));
// Return a job so we can add a real deployment group.
final Job job = Job.newBuilder().setCommand(ImmutableList.of("COMMAND")).setImage("IMAGE").setName("JOB_NAME").setVersion("VERSION").build();
doReturn(job).when(masterModel).getJob(job.getId());
// Add a real deployment group.
final DeploymentGroup dg = DeploymentGroup.newBuilder().setName(GROUP_NAME).setHostSelectors(ImmutableList.of(HostSelector.parse("role=melmac"))).setJobId(job.getId()).setRolloutOptions(RolloutOptions.newBuilder().build()).setRollingUpdateReason(MANUAL).build();
masterModel.addDeploymentGroup(dg);
// Give the deployment group a host.
client.setData(Paths.statusDeploymentGroupHosts(dg.getName()), Json.asBytes(ImmutableList.of("host1")));
// And a status...
client.setData(Paths.statusDeploymentGroup(dg.getName()), DeploymentGroupStatus.newBuilder().setState(FAILED).build().toJsonBytes());
// Pretend our new host is UP.
final HostStatus statusUp = mock(HostStatus.class);
doReturn(HostStatus.Status.UP).when(statusUp).getStatus();
doReturn(statusUp).when(masterModel).getHostStatus("host2");
// Switch out our host!
masterModel.updateDeploymentGroupHosts(dg.getName(), ImmutableList.of("host2"));
// Ensure we do not set the DG status to HOSTS_CHANGED.
// We don't want to trigger a rolling update because the last one was manual, and failed.
final ZooKeeperOperation setDeploymentGroupHostChanged = set(Paths.configDeploymentGroup(dg.getName()), dg.toBuilder().setRollingUpdateReason(HOSTS_CHANGED).build());
verify(client, times(2)).transaction(opCaptor.capture());
assertThat(opCaptor.getValue(), not(hasItem(setDeploymentGroupHostChanged)));
}
use of com.spotify.helios.servicescommon.coordination.ZooKeeperOperation in project helios by spotify.
the class DeploymentGroupTest method testUpdateFailedHostsChangedDeploymentGroupHosts.
// A test that ensures deployment groups that failed during a rolling update triggered by
// changing hosts will perform a new rolling update if the hosts change again.
@Test
public void testUpdateFailedHostsChangedDeploymentGroupHosts() throws Exception {
final ZooKeeperClient client = spy(this.client);
final ZooKeeperMasterModel masterModel = spy(newMasterModel(client));
// Return a job so we can add a real deployment group.
final Job job = Job.newBuilder().setCommand(ImmutableList.of("COMMAND")).setImage("IMAGE").setName("JOB_NAME").setVersion("VERSION").build();
doReturn(job).when(masterModel).getJob(job.getId());
// Add a real deployment group.
final DeploymentGroup dg = DeploymentGroup.newBuilder().setName(GROUP_NAME).setHostSelectors(ImmutableList.of(HostSelector.parse("role=melmac"))).setJobId(job.getId()).setRolloutOptions(RolloutOptions.newBuilder().build()).setRollingUpdateReason(HOSTS_CHANGED).build();
masterModel.addDeploymentGroup(dg);
// Give the deployment group a host.
client.setData(Paths.statusDeploymentGroupHosts(dg.getName()), Json.asBytes(ImmutableList.of("host1")));
// And a status...
client.setData(Paths.statusDeploymentGroup(dg.getName()), DeploymentGroupStatus.newBuilder().setState(FAILED).build().toJsonBytes());
// Pretend our new host is UP.
final HostStatus statusUp = mock(HostStatus.class);
doReturn(HostStatus.Status.UP).when(statusUp).getStatus();
doReturn(statusUp).when(masterModel).getHostStatus("host2");
// Switch out our host!
masterModel.updateDeploymentGroupHosts(dg.getName(), ImmutableList.of("host2"));
// Ensure we write the same DG status again.
// This is a no-op, but it means we triggered a rolling update.
final ZooKeeperOperation setDeploymentGroup = set(Paths.configDeploymentGroup(dg.getName()), dg);
verify(client, times(2)).transaction(opCaptor.capture());
assertThat(opCaptor.getValue(), hasItem(setDeploymentGroup));
}
use of com.spotify.helios.servicescommon.coordination.ZooKeeperOperation in project helios by spotify.
the class ZooKeeperMasterModel method removeJob.
/**
* Deletes a job from ZooKeeper. Ensures that job is not currently running anywhere.
*/
@Override
public Job removeJob(final JobId id, final String token) throws JobDoesNotExistException, JobStillDeployedException, TokenVerificationException {
log.info("removing job: id={}", id);
final ZooKeeperClient client = provider.get("removeJob");
final Job job = getJob(client, id);
if (job == null) {
throw new JobDoesNotExistException(id);
}
verifyToken(token, job);
// TODO (dano): handle retry failures
try {
final ImmutableList.Builder<ZooKeeperOperation> operations = ImmutableList.builder();
final UUID jobCreationOperationId = getJobCreation(client, id);
if (jobCreationOperationId != null) {
operations.add(delete(Paths.configJobCreation(id, jobCreationOperationId)));
}
operations.add(delete(Paths.configJobHosts(id)), delete(Paths.configJobRefShort(id)), delete(Paths.configJob(id)), // change down the tree. Effectively, make it that version == cVersion.
set(Paths.configJobs(), UUID.randomUUID().toString().getBytes()));
client.transaction(operations.build());
} catch (final NoNodeException e) {
throw new JobDoesNotExistException(id);
} catch (final NotEmptyException e) {
throw new JobStillDeployedException(id, listJobHosts(client, id));
} catch (final KeeperException e) {
throw new HeliosRuntimeException("removing job " + id + " failed", e);
}
// Delete job history on a best effort basis
try {
client.deleteRecursive(Paths.historyJob(id));
} catch (NoNodeException ignored) {
// There's no history for this job
} catch (KeeperException e) {
log.warn("error removing job history for job {}", id, e);
}
return job;
}
use of com.spotify.helios.servicescommon.coordination.ZooKeeperOperation in project helios by spotify.
the class ZooKeeperMasterModel method rollingUpdateStep.
@Override
public void rollingUpdateStep() {
final ZooKeeperClient client = provider.get("rollingUpdateStep");
final Map<String, VersionedValue<DeploymentGroupTasks>> tasksMap = getDeploymentGroupTasks(client);
for (final Map.Entry<String, VersionedValue<DeploymentGroupTasks>> entry : tasksMap.entrySet()) {
final String deploymentGroupName = entry.getKey();
final VersionedValue<DeploymentGroupTasks> versionedTasks = entry.getValue();
final DeploymentGroupTasks tasks = versionedTasks.value();
final int taskIndex = tasks.getTaskIndex();
log.info("rolling-update step on deployment-group {}. Doing taskIndex {} of {}: {}. ", deploymentGroupName, taskIndex, tasks.getRolloutTasks().size(), tasks.getRolloutTasks().get(taskIndex));
try {
final RollingUpdateOpFactory opFactory = new RollingUpdateOpFactory(tasks, DEPLOYMENT_GROUP_EVENT_FACTORY);
final RolloutTask task = tasks.getRolloutTasks().get(taskIndex);
final RollingUpdateOp op = processRollingUpdateTask(client, opFactory, task, tasks.getDeploymentGroup());
if (!op.operations().isEmpty()) {
final List<ZooKeeperOperation> ops = Lists.newArrayList();
ops.add(check(Paths.statusDeploymentGroupTasks(deploymentGroupName), versionedTasks.version()));
ops.addAll(op.operations());
log.info("rolling-update step on deployment-group: name={}, zookeeper operations={}", deploymentGroupName, ops);
try {
client.transaction(ops);
emitEvents(deploymentGroupEventTopic, op.events());
} catch (BadVersionException e) {
// some other master beat us in processing this rolling update step. not exceptional.
// ideally we would check the path in the exception, but curator doesn't provide a path
// for exceptions thrown as part of a transaction.
log.info("rolling-update step on deployment-group was processed by another master" + ": name={}, zookeeper operations={}", deploymentGroupName, ops);
} catch (KeeperException e) {
log.error("rolling-update on deployment-group {} failed", deploymentGroupName, e);
}
}
} catch (final Exception e) {
log.error("error processing rolling update step for {}", deploymentGroupName, e);
}
}
}
use of com.spotify.helios.servicescommon.coordination.ZooKeeperOperation in project helios by spotify.
the class ZooKeeperRegistrarUtil method deregisterHost.
public static void deregisterHost(final ZooKeeperClient client, final String host) throws HostNotFoundException, HostStillInUseException {
log.info("deregistering host: {}", host);
// TODO (dano): handle retry failures
try {
final List<ZooKeeperOperation> operations = Lists.newArrayList();
if (client.exists(Paths.configHost(host)) == null) {
throw new HostNotFoundException("host [" + host + "] does not exist");
}
// Remove all jobs deployed to this host
final List<String> jobs = safeGetChildren(client, Paths.configHostJobs(host));
for (final String jobString : jobs) {
final JobId job = JobId.fromString(jobString);
final String hostJobPath = Paths.configHostJob(host, job);
final List<String> nodes = safeListRecursive(client, hostJobPath);
for (final String node : reverse(nodes)) {
operations.add(delete(node));
}
if (client.exists(Paths.configJobHost(job, host)) != null) {
operations.add(delete(Paths.configJobHost(job, host)));
}
// Clean out the history for each job
final List<String> history = safeListRecursive(client, Paths.historyJobHost(job, host));
for (final String s : reverse(history)) {
operations.add(delete(s));
}
}
operations.add(delete(Paths.configHostJobs(host)));
// Remove the host status
final List<String> nodes = safeListRecursive(client, Paths.statusHost(host));
for (final String node : reverse(nodes)) {
operations.add(delete(node));
}
// Remove port allocations
final List<String> ports = safeGetChildren(client, Paths.configHostPorts(host));
for (final String port : ports) {
operations.add(delete(Paths.configHostPort(host, Integer.valueOf(port))));
}
operations.add(delete(Paths.configHostPorts(host)));
// Remove host id
final String idPath = Paths.configHostId(host);
if (client.exists(idPath) != null) {
operations.add(delete(idPath));
}
// Remove host config root
operations.add(delete(Paths.configHost(host)));
client.transaction(operations);
} catch (NoNodeException e) {
throw new HostNotFoundException(host);
} catch (KeeperException e) {
throw new HeliosRuntimeException(e);
}
}
Aggregations