use of com.spotify.helios.common.descriptors.RolloutTask in project helios by spotify.
the class ZooKeeperMasterModel method rollingUpdateStep.
@Override
public void rollingUpdateStep() {
final ZooKeeperClient client = provider.get("rollingUpdateStep");
final Map<String, VersionedValue<DeploymentGroupTasks>> tasksMap = getDeploymentGroupTasks(client);
for (final Map.Entry<String, VersionedValue<DeploymentGroupTasks>> entry : tasksMap.entrySet()) {
final String deploymentGroupName = entry.getKey();
final VersionedValue<DeploymentGroupTasks> versionedTasks = entry.getValue();
final DeploymentGroupTasks tasks = versionedTasks.value();
final int taskIndex = tasks.getTaskIndex();
log.info("rolling-update step on deployment-group {}. Doing taskIndex {} of {}: {}. ", deploymentGroupName, taskIndex, tasks.getRolloutTasks().size(), tasks.getRolloutTasks().get(taskIndex));
try {
final RollingUpdateOpFactory opFactory = new RollingUpdateOpFactory(tasks, DEPLOYMENT_GROUP_EVENT_FACTORY);
final RolloutTask task = tasks.getRolloutTasks().get(taskIndex);
final RollingUpdateOp op = processRollingUpdateTask(client, opFactory, task, tasks.getDeploymentGroup());
if (!op.operations().isEmpty()) {
final List<ZooKeeperOperation> ops = Lists.newArrayList();
ops.add(check(Paths.statusDeploymentGroupTasks(deploymentGroupName), versionedTasks.version()));
ops.addAll(op.operations());
log.info("rolling-update step on deployment-group: name={}, zookeeper operations={}", deploymentGroupName, ops);
try {
client.transaction(ops);
emitEvents(deploymentGroupEventTopic, op.events());
} catch (BadVersionException e) {
// some other master beat us in processing this rolling update step. not exceptional.
// ideally we would check the path in the exception, but curator doesn't provide a path
// for exceptions thrown as part of a transaction.
log.info("rolling-update step on deployment-group was processed by another master" + ": name={}, zookeeper operations={}", deploymentGroupName, ops);
} catch (KeeperException e) {
log.error("rolling-update on deployment-group {} failed", deploymentGroupName, e);
}
}
} catch (final Exception e) {
log.error("error processing rolling update step for {}", deploymentGroupName, e);
}
}
}
use of com.spotify.helios.common.descriptors.RolloutTask in project helios by spotify.
the class RollingUpdateOpFactory method start.
public RollingUpdateOp start(final DeploymentGroup deploymentGroup, final ZooKeeperClient client) throws KeeperException {
client.ensurePath(Paths.statusDeploymentGroupTasks());
final List<ZooKeeperOperation> ops = Lists.newArrayList();
final List<Map<String, Object>> events = Lists.newArrayList();
final List<RolloutTask> rolloutTasks = tasks.getRolloutTasks();
events.add(eventFactory.rollingUpdateStarted(deploymentGroup));
final Stat tasksStat = client.exists(Paths.statusDeploymentGroupTasks(deploymentGroup.getName()));
if (tasksStat == null) {
// Create the tasks path if it doesn't already exist. The following operations (delete or set)
// assume the node already exists. If the tasks path is created/deleted before the transaction
// is committed it will fail. This will on occasion generate a user-visible error but is
// better than having inconsistent state.
ops.add(create(Paths.statusDeploymentGroupTasks(deploymentGroup.getName())));
}
final DeploymentGroupStatus status;
if (rolloutTasks.isEmpty()) {
status = DeploymentGroupStatus.newBuilder().setState(DONE).build();
ops.add(delete(Paths.statusDeploymentGroupTasks(deploymentGroup.getName())));
events.add(eventFactory.rollingUpdateDone(deploymentGroup));
} else {
final DeploymentGroupTasks tasks = DeploymentGroupTasks.newBuilder().setRolloutTasks(rolloutTasks).setTaskIndex(0).setDeploymentGroup(deploymentGroup).build();
status = DeploymentGroupStatus.newBuilder().setState(ROLLING_OUT).build();
ops.add(set(Paths.statusDeploymentGroupTasks(deploymentGroup.getName()), tasks));
}
// NOTE: If the DG was removed this set() cause the transaction to fail, because removing
// the DG removes this node. It's *important* that there's an operation that causes the
// transaction to fail if the DG was removed or we'll end up with inconsistent state.
ops.add(set(Paths.statusDeploymentGroup(deploymentGroup.getName()), status));
return new RollingUpdateOp(ImmutableList.copyOf(ops), ImmutableList.copyOf(events));
}
use of com.spotify.helios.common.descriptors.RolloutTask in project helios by spotify.
the class RollingUpdateOpFactory method nextTask.
public RollingUpdateOp nextTask(final List<ZooKeeperOperation> operations) {
final List<ZooKeeperOperation> ops = Lists.newArrayList(operations);
final List<Map<String, Object>> events = Lists.newArrayList();
final RolloutTask task = tasks.getRolloutTasks().get(tasks.getTaskIndex());
// Update the task index, delete tasks if done
if (tasks.getTaskIndex() + 1 == tasks.getRolloutTasks().size()) {
final DeploymentGroupStatus status = DeploymentGroupStatus.newBuilder().setState(DONE).build();
// We are done -> delete tasks & update status
ops.add(delete(Paths.statusDeploymentGroupTasks(deploymentGroup.getName())));
ops.add(set(Paths.statusDeploymentGroup(deploymentGroup.getName()), status));
// Emit an event signalling that we're DONE!
events.add(eventFactory.rollingUpdateDone(deploymentGroup));
} else {
ops.add(set(Paths.statusDeploymentGroupTasks(deploymentGroup.getName()), tasks.toBuilder().setTaskIndex(tasks.getTaskIndex() + 1).build()));
// the task was effectively a no-op.
if (!operations.isEmpty()) {
events.add(eventFactory.rollingUpdateTaskSucceeded(deploymentGroup, task));
}
}
return new RollingUpdateOp(ImmutableList.copyOf(ops), ImmutableList.copyOf(events));
}
use of com.spotify.helios.common.descriptors.RolloutTask in project helios by spotify.
the class ZooKeeperMasterModel method getInitRollingUpdateOps.
private RollingUpdateOp getInitRollingUpdateOps(final DeploymentGroup deploymentGroup, final List<String> updateHosts, final List<String> undeployHosts, final ZooKeeperClient zooKeeperClient) throws KeeperException {
final List<RolloutTask> rolloutTasks = new ArrayList<>();
// give precedence to the updateHosts list so we don't end up in a state where we updated a host
// and then removed the job from it (because of buggy logic in the calling method)
final List<String> updateHostsCopy = new ArrayList<>(updateHosts);
final List<String> undeployHostsCopy = new ArrayList<>(undeployHosts);
undeployHostsCopy.removeAll(updateHostsCopy);
// we only care about hosts that are UP
final List<String> upHostsToUndeploy = undeployHostsCopy.stream().filter(host -> checkHostUp(zooKeeperClient, host)).collect(Collectors.toList());
final List<String> upHostsToDeploy = updateHostsCopy.stream().filter(host -> checkHostUp(zooKeeperClient, host)).collect(Collectors.toList());
rolloutTasks.addAll(RollingUndeployPlanner.of(deploymentGroup).plan(upHostsToUndeploy));
rolloutTasks.addAll(RollingUpdatePlanner.of(deploymentGroup).plan(upHostsToDeploy));
log.info("generated rolloutTasks for deployment-group name={} " + "updateHosts={} undeployHosts={}: {}", deploymentGroup.getName(), updateHosts, undeployHosts, rolloutTasks);
final DeploymentGroupTasks tasks = DeploymentGroupTasks.newBuilder().setRolloutTasks(rolloutTasks).setTaskIndex(0).setDeploymentGroup(deploymentGroup).build();
return new RollingUpdateOpFactory(tasks, DEPLOYMENT_GROUP_EVENT_FACTORY).start(deploymentGroup, zooKeeperClient);
}
use of com.spotify.helios.common.descriptors.RolloutTask in project helios by spotify.
the class RollingUpdateOpFactory method error.
public RollingUpdateOp error(final String msg, final String host, final RollingUpdateError errorCode, final Map<String, Object> metadata) {
final List<ZooKeeperOperation> operations = Lists.newArrayList();
final String errMsg = isNullOrEmpty(host) ? msg : host + ": " + msg;
final DeploymentGroupStatus status = DeploymentGroupStatus.newBuilder().setState(FAILED).setError(errMsg).build();
// Delete tasks, set state to FAILED
operations.add(delete(Paths.statusDeploymentGroupTasks(deploymentGroup.getName())));
operations.add(set(Paths.statusDeploymentGroup(deploymentGroup.getName()), status));
final RolloutTask task = tasks.getRolloutTasks().get(tasks.getTaskIndex());
// Emit a FAILED event and a failed task event
final List<Map<String, Object>> events = Lists.newArrayList();
final Map<String, Object> taskEv = eventFactory.rollingUpdateTaskFailed(deploymentGroup, task, errMsg, errorCode, metadata);
events.add(taskEv);
events.add(eventFactory.rollingUpdateFailed(deploymentGroup, taskEv));
return new RollingUpdateOp(ImmutableList.copyOf(operations), ImmutableList.copyOf(events));
}
Aggregations