use of com.spotify.helios.common.descriptors.DeploymentGroupTasks in project helios by spotify.
the class ZooKeeperMasterModel method getDeploymentGroupTasks.
private Map<String, VersionedValue<DeploymentGroupTasks>> getDeploymentGroupTasks(final ZooKeeperClient client) {
final String folder = Paths.statusDeploymentGroupTasks();
try {
final List<String> names;
try {
names = client.getChildren(folder);
} catch (NoNodeException e) {
return Collections.emptyMap();
}
final Map<String, VersionedValue<DeploymentGroupTasks>> ret = Maps.newHashMap();
for (final String name : names) {
final String path = Paths.statusDeploymentGroupTasks(name);
try {
final Node node = client.getNode(path);
final byte[] data = node.getBytes();
final int version = node.getStat().getVersion();
if (data.length == 0) {
// This can happen because of ensurePath creates an empty node
log.debug("Ignoring empty deployment group tasks {}", name);
} else {
final DeploymentGroupTasks val = parse(data, DeploymentGroupTasks.class);
ret.put(name, VersionedValue.of(val, version));
}
} catch (NoNodeException e) {
// Ignore, the deployment group was deleted before we had a chance to read it.
log.debug("Ignoring deleted deployment group tasks {}", name);
}
}
return ret;
} catch (KeeperException | IOException e) {
throw new HeliosRuntimeException("getting deployment group tasks failed", e);
}
}
use of com.spotify.helios.common.descriptors.DeploymentGroupTasks in project helios by spotify.
the class ZooKeeperMasterModel method rollingUpdateStep.
@Override
public void rollingUpdateStep() {
final ZooKeeperClient client = provider.get("rollingUpdateStep");
final Map<String, VersionedValue<DeploymentGroupTasks>> tasksMap = getDeploymentGroupTasks(client);
for (final Map.Entry<String, VersionedValue<DeploymentGroupTasks>> entry : tasksMap.entrySet()) {
final String deploymentGroupName = entry.getKey();
final VersionedValue<DeploymentGroupTasks> versionedTasks = entry.getValue();
final DeploymentGroupTasks tasks = versionedTasks.value();
final int taskIndex = tasks.getTaskIndex();
log.info("rolling-update step on deployment-group {}. Doing taskIndex {} of {}: {}. ", deploymentGroupName, taskIndex, tasks.getRolloutTasks().size(), tasks.getRolloutTasks().get(taskIndex));
try {
final RollingUpdateOpFactory opFactory = new RollingUpdateOpFactory(tasks, DEPLOYMENT_GROUP_EVENT_FACTORY);
final RolloutTask task = tasks.getRolloutTasks().get(taskIndex);
final RollingUpdateOp op = processRollingUpdateTask(client, opFactory, task, tasks.getDeploymentGroup());
if (!op.operations().isEmpty()) {
final List<ZooKeeperOperation> ops = Lists.newArrayList();
ops.add(check(Paths.statusDeploymentGroupTasks(deploymentGroupName), versionedTasks.version()));
ops.addAll(op.operations());
log.info("rolling-update step on deployment-group: name={}, zookeeper operations={}", deploymentGroupName, ops);
try {
client.transaction(ops);
emitEvents(deploymentGroupEventTopic, op.events());
} catch (BadVersionException e) {
// some other master beat us in processing this rolling update step. not exceptional.
// ideally we would check the path in the exception, but curator doesn't provide a path
// for exceptions thrown as part of a transaction.
log.info("rolling-update step on deployment-group was processed by another master" + ": name={}, zookeeper operations={}", deploymentGroupName, ops);
} catch (KeeperException e) {
log.error("rolling-update on deployment-group {} failed. {}", deploymentGroupName, e.getMessage(), e);
}
}
} catch (final Exception e) {
log.error("error processing rolling update step for {}", deploymentGroupName, e);
}
}
}
use of com.spotify.helios.common.descriptors.DeploymentGroupTasks in project helios by spotify.
the class RollingUpdateOpFactory method start.
public RollingUpdateOp start(final DeploymentGroup deploymentGroup, final ZooKeeperClient client) throws KeeperException {
client.ensurePath(Paths.statusDeploymentGroupTasks());
final List<ZooKeeperOperation> ops = Lists.newArrayList();
final List<Map<String, Object>> events = Lists.newArrayList();
final List<RolloutTask> rolloutTasks = tasks.getRolloutTasks();
events.add(eventFactory.rollingUpdateStarted(deploymentGroup));
final Stat tasksStat = client.exists(Paths.statusDeploymentGroupTasks(deploymentGroup.getName()));
if (tasksStat == null) {
// Create the tasks path if it doesn't already exist. The following operations (delete or set)
// assume the node already exists. If the tasks path is created/deleted before the transaction
// is committed it will fail. This will on occasion generate a user-visible error but is
// better than having inconsistent state.
ops.add(create(Paths.statusDeploymentGroupTasks(deploymentGroup.getName())));
}
final DeploymentGroupStatus status;
if (rolloutTasks.isEmpty()) {
status = DeploymentGroupStatus.newBuilder().setState(DONE).build();
ops.add(delete(Paths.statusDeploymentGroupTasks(deploymentGroup.getName())));
events.add(eventFactory.rollingUpdateDone(deploymentGroup));
} else {
final DeploymentGroupTasks tasks = DeploymentGroupTasks.newBuilder().setRolloutTasks(rolloutTasks).setTaskIndex(0).setDeploymentGroup(deploymentGroup).build();
status = DeploymentGroupStatus.newBuilder().setState(ROLLING_OUT).build();
ops.add(set(Paths.statusDeploymentGroupTasks(deploymentGroup.getName()), tasks));
}
// NOTE: If the DG was removed this set() cause the transaction to fail, because removing
// the DG removes this node. It's *important* that there's an operation that causes the
// transaction to fail if the DG was removed or we'll end up with inconsistent state.
ops.add(set(Paths.statusDeploymentGroup(deploymentGroup.getName()), status));
return new RollingUpdateOp(ImmutableList.copyOf(ops), ImmutableList.copyOf(events));
}
use of com.spotify.helios.common.descriptors.DeploymentGroupTasks in project helios by spotify.
the class RollingUpdateOpFactoryTest method testErrorWhenIgnoreFailuresIsTrue.
@Test
public void testErrorWhenIgnoreFailuresIsTrue() {
final DeploymentGroup deploymentGroup = DeploymentGroup.newBuilder().setName("ignore_failure_group").setRolloutOptions(RolloutOptions.newBuilder().setIgnoreFailures(true).build()).setRollingUpdateReason(MANUAL).build();
// the current task is the AWAIT_RUNNING one
final DeploymentGroupTasks tasks = DeploymentGroupTasks.newBuilder().setTaskIndex(2).setRolloutTasks(ImmutableList.of(RolloutTask.of(RolloutTask.Action.UNDEPLOY_OLD_JOBS, "host1"), RolloutTask.of(RolloutTask.Action.DEPLOY_NEW_JOB, "host1"), RolloutTask.of(RolloutTask.Action.AWAIT_RUNNING, "host1"))).setDeploymentGroup(deploymentGroup).build();
final RollingUpdateOpFactory opFactory = new RollingUpdateOpFactory(tasks, eventFactory);
final RollingUpdateOp nextOp = opFactory.error("something went wrong", "host1", RollingUpdateError.TIMED_OUT_WAITING_FOR_JOB_TO_REACH_RUNNING);
assertThat(nextOp.operations(), containsInAnyOrder(new SetData("/status/deployment-groups/ignore_failure_group", DeploymentGroupStatus.newBuilder().setState(DeploymentGroupStatus.State.DONE).setError(null).build().toJsonBytes()), new Delete("/status/deployment-group-tasks/ignore_failure_group")));
}
use of com.spotify.helios.common.descriptors.DeploymentGroupTasks in project helios by spotify.
the class RollingUpdateOpFactoryTest method testStartHostsChanged.
@Test
public void testStartHostsChanged() throws Exception {
// Create a DeploymentGroupTasks object with some rolloutTasks.
final ArrayList<RolloutTask> rolloutTasks = Lists.newArrayList(RolloutTask.of(RolloutTask.Action.UNDEPLOY_OLD_JOBS, "host1"), RolloutTask.of(RolloutTask.Action.DEPLOY_NEW_JOB, "host1"), RolloutTask.of(RolloutTask.Action.AWAIT_RUNNING, "host1"));
final DeploymentGroupTasks deploymentGroupTasks = DeploymentGroupTasks.newBuilder().setTaskIndex(0).setRolloutTasks(rolloutTasks).setDeploymentGroup(HOSTS_CHANGED_DEPLOYMENT_GROUP).build();
final RollingUpdateOpFactory opFactory = new RollingUpdateOpFactory(deploymentGroupTasks, eventFactory);
final ZooKeeperClient client = mock(ZooKeeperClient.class);
when(client.exists(anyString())).thenReturn(null);
final RollingUpdateOp op = opFactory.start(HOSTS_CHANGED_DEPLOYMENT_GROUP, client);
// Three ZK operations should return:
// * create tasks node
// * set the task index to 0
// * another to set the status to ROLLING_OUT
assertEquals(ImmutableSet.of(new CreateEmpty("/status/deployment-group-tasks/my_group"), new SetData("/status/deployment-group-tasks/my_group", DeploymentGroupTasks.newBuilder().setRolloutTasks(rolloutTasks).setTaskIndex(0).setDeploymentGroup(HOSTS_CHANGED_DEPLOYMENT_GROUP).build().toJsonBytes()), new SetData("/status/deployment-groups/my_group", DeploymentGroupStatus.newBuilder().setState(DeploymentGroupStatus.State.ROLLING_OUT).build().toJsonBytes())), ImmutableSet.copyOf(op.operations()));
// Two events should return: rollingUpdateStarted and rollingUpdateDone
assertEquals(1, op.events().size());
verify(eventFactory).rollingUpdateStarted(HOSTS_CHANGED_DEPLOYMENT_GROUP);
}
Aggregations