Search in sources :

Example 1 with RollingUpdateOp

use of com.spotify.helios.rollingupdate.RollingUpdateOp in project helios by spotify.

the class ZooKeeperMasterModel method rollingUpdateStep.

@Override
public void rollingUpdateStep() {
    final ZooKeeperClient client = provider.get("rollingUpdateStep");
    final Map<String, VersionedValue<DeploymentGroupTasks>> tasksMap = getDeploymentGroupTasks(client);
    for (final Map.Entry<String, VersionedValue<DeploymentGroupTasks>> entry : tasksMap.entrySet()) {
        final String deploymentGroupName = entry.getKey();
        final VersionedValue<DeploymentGroupTasks> versionedTasks = entry.getValue();
        final DeploymentGroupTasks tasks = versionedTasks.value();
        final int taskIndex = tasks.getTaskIndex();
        log.info("rolling-update step on deployment-group {}. Doing taskIndex {} of {}: {}. ", deploymentGroupName, taskIndex, tasks.getRolloutTasks().size(), tasks.getRolloutTasks().get(taskIndex));
        try {
            final RollingUpdateOpFactory opFactory = new RollingUpdateOpFactory(tasks, DEPLOYMENT_GROUP_EVENT_FACTORY);
            final RolloutTask task = tasks.getRolloutTasks().get(taskIndex);
            final RollingUpdateOp op = processRollingUpdateTask(client, opFactory, task, tasks.getDeploymentGroup());
            if (!op.operations().isEmpty()) {
                final List<ZooKeeperOperation> ops = Lists.newArrayList();
                ops.add(check(Paths.statusDeploymentGroupTasks(deploymentGroupName), versionedTasks.version()));
                ops.addAll(op.operations());
                log.info("rolling-update step on deployment-group: name={}, zookeeper operations={}", deploymentGroupName, ops);
                try {
                    client.transaction(ops);
                    emitEvents(deploymentGroupEventTopic, op.events());
                } catch (BadVersionException e) {
                    // some other master beat us in processing this rolling update step. not exceptional.
                    // ideally we would check the path in the exception, but curator doesn't provide a path
                    // for exceptions thrown as part of a transaction.
                    log.info("rolling-update step on deployment-group was processed by another master" + ": name={}, zookeeper operations={}", deploymentGroupName, ops);
                } catch (KeeperException e) {
                    log.error("rolling-update on deployment-group {} failed", deploymentGroupName, e);
                }
            }
        } catch (final Exception e) {
            log.error("error processing rolling update step for {}", deploymentGroupName, e);
        }
    }
}
Also used : VersionedValue(com.spotify.helios.servicescommon.VersionedValue) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) DeploymentGroupTasks(com.spotify.helios.common.descriptors.DeploymentGroupTasks) RolloutTask(com.spotify.helios.common.descriptors.RolloutTask) BadVersionException(org.apache.zookeeper.KeeperException.BadVersionException) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) JsonParseException(com.fasterxml.jackson.core.JsonParseException) NotEmptyException(org.apache.zookeeper.KeeperException.NotEmptyException) JsonMappingException(com.fasterxml.jackson.databind.JsonMappingException) BadVersionException(org.apache.zookeeper.KeeperException.BadVersionException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) NodeExistsException(org.apache.zookeeper.KeeperException.NodeExistsException) RollingUpdateOpFactory(com.spotify.helios.rollingupdate.RollingUpdateOpFactory) RollingUpdateOp(com.spotify.helios.rollingupdate.RollingUpdateOp) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.emptyMap(java.util.Collections.emptyMap) KeeperException(org.apache.zookeeper.KeeperException)

Example 2 with RollingUpdateOp

use of com.spotify.helios.rollingupdate.RollingUpdateOp in project helios by spotify.

the class ZooKeeperMasterModel method updateDeploymentGroupHosts.

@Override
public void updateDeploymentGroupHosts(final String groupName, final List<String> hosts) throws DeploymentGroupDoesNotExistException {
    log.debug("updating deployment-group hosts: name={}", groupName);
    final ZooKeeperClient client = provider.get("updateDeploymentGroupHosts");
    try {
        final DeploymentGroupStatus status = getDeploymentGroupStatus(groupName);
        if (!allowHostChange(status)) {
            return;
        }
        // statusDeploymentGroupRemovedHosts may not exist for deployment groups created before it was
        // introduced.
        client.ensurePathAndSetData(Paths.statusDeploymentGroupRemovedHosts(groupName), Json.asBytesUnchecked(emptyList()));
        final List<String> curHosts = getHosts(client, Paths.statusDeploymentGroupHosts(groupName));
        final List<String> previouslyRemovedHosts = getHosts(client, Paths.statusDeploymentGroupRemovedHosts(groupName));
        final List<String> removedHosts = removedHosts(curHosts, hosts, previouslyRemovedHosts);
        if (hosts.equals(curHosts) && removedHosts.equals(previouslyRemovedHosts)) {
            return;
        }
        log.info("for deployment-group name={}, curHosts={}, new hosts={}, " + "previouslyRemovedHosts={}, derived removedHosts={}", groupName, curHosts, hosts, previouslyRemovedHosts, removedHosts);
        final List<ZooKeeperOperation> ops = Lists.newArrayList();
        ops.add(set(Paths.statusDeploymentGroupHosts(groupName), Json.asBytes(hosts)));
        ops.add(set(Paths.statusDeploymentGroupRemovedHosts(groupName), Json.asBytes(removedHosts)));
        final Node dgn = client.getNode(Paths.configDeploymentGroup(groupName));
        final Integer deploymentGroupVersion = dgn.getStat().getVersion();
        DeploymentGroup deploymentGroup = Json.read(dgn.getBytes(), DeploymentGroup.class);
        List<Map<String, Object>> events = ImmutableList.of();
        if (deploymentGroup.getJobId() != null && updateOnHostChange(deploymentGroup, status)) {
            deploymentGroup = deploymentGroup.toBuilder().setRollingUpdateReason(HOSTS_CHANGED).build();
            // Fail transaction if the deployment group has been updated elsewhere.
            ops.add(check(Paths.configDeploymentGroup(groupName), deploymentGroupVersion));
            // NOTE: If the DG was removed this set() cause the transaction to fail, because
            // removing the DG removes this node. It's *important* that there's an operation that
            // causes the transaction to fail if the DG was removed or we'll end up with
            // inconsistent state.
            ops.add(set(Paths.configDeploymentGroup(deploymentGroup.getName()), deploymentGroup));
            final RollingUpdateOp op = getInitRollingUpdateOps(deploymentGroup, hosts, removedHosts, client);
            ops.addAll(op.operations());
            events = op.events();
        }
        log.info("starting zookeeper transaction for updateDeploymentGroupHosts on deployment-group: " + "name={} jobId={} operations={}", groupName, deploymentGroup.getJobId(), ops);
        client.transaction(ops);
        emitEvents(deploymentGroupEventTopic, events);
    } catch (BadVersionException e) {
        // some other master beat us in processing this host update. not exceptional.
        // ideally we would check the path in the exception, but curator doesn't provide a path
        // for exceptions thrown as part of a transaction.
        log.info("zookeeper transaction for updateDeploymentGroupHosts on deployment-group was " + "processed by another master: name={}", groupName);
    } catch (NoNodeException e) {
        throw new DeploymentGroupDoesNotExistException(groupName, e);
    } catch (KeeperException | IOException e) {
        throw new HeliosRuntimeException("updating deployment group hosts failed", e);
    }
}
Also used : NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) Node(com.spotify.helios.servicescommon.coordination.Node) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) DeploymentGroupStatus(com.spotify.helios.common.descriptors.DeploymentGroupStatus) BadVersionException(org.apache.zookeeper.KeeperException.BadVersionException) IOException(java.io.IOException) RollingUpdateOp(com.spotify.helios.rollingupdate.RollingUpdateOp) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.emptyMap(java.util.Collections.emptyMap) DeploymentGroup(com.spotify.helios.common.descriptors.DeploymentGroup) KeeperException(org.apache.zookeeper.KeeperException)

Example 3 with RollingUpdateOp

use of com.spotify.helios.rollingupdate.RollingUpdateOp in project helios by spotify.

the class ZooKeeperMasterModel method rollingUpdate.

@Override
public void rollingUpdate(final DeploymentGroup deploymentGroup, final JobId jobId, final RolloutOptions options) throws DeploymentGroupDoesNotExistException, JobDoesNotExistException {
    checkNotNull(deploymentGroup, "deploymentGroup");
    log.info("preparing to initiate rolling-update on deployment-group: name={}, jobId={}", deploymentGroup.getName(), jobId);
    final DeploymentGroup updated = deploymentGroup.toBuilder().setJobId(jobId).setRolloutOptions(options).setRollingUpdateReason(MANUAL).build();
    if (getJob(jobId) == null) {
        throw new JobDoesNotExistException(jobId);
    }
    final List<ZooKeeperOperation> operations = Lists.newArrayList();
    final ZooKeeperClient client = provider.get("rollingUpdate");
    operations.add(set(Paths.configDeploymentGroup(updated.getName()), updated));
    try {
        final RollingUpdateOp op = getInitRollingUpdateOps(updated, client);
        operations.addAll(op.operations());
        log.info("starting zookeeper transaction for rolling-update on " + "deployment-group name={} jobId={}. List of operations: {}", deploymentGroup.getName(), jobId, operations);
        client.transaction(operations);
        emitEvents(deploymentGroupEventTopic, op.events());
        log.info("initiated rolling-update on deployment-group: name={}, jobId={}", deploymentGroup.getName(), jobId);
    } catch (final NoNodeException e) {
        throw new DeploymentGroupDoesNotExistException(deploymentGroup.getName());
    } catch (final KeeperException e) {
        throw new HeliosRuntimeException("rolling-update on deployment-group " + deploymentGroup.getName() + " failed", e);
    }
}
Also used : RollingUpdateOp(com.spotify.helios.rollingupdate.RollingUpdateOp) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) DeploymentGroup(com.spotify.helios.common.descriptors.DeploymentGroup) KeeperException(org.apache.zookeeper.KeeperException)

Example 4 with RollingUpdateOp

use of com.spotify.helios.rollingupdate.RollingUpdateOp in project helios by spotify.

the class ZooKeeperMasterModel method getInitRollingUpdateOps.

private RollingUpdateOp getInitRollingUpdateOps(final DeploymentGroup deploymentGroup, final List<String> updateHosts, final List<String> undeployHosts, final ZooKeeperClient zooKeeperClient) throws KeeperException {
    final List<RolloutTask> rolloutTasks = new ArrayList<>();
    // give precedence to the updateHosts list so we don't end up in a state where we updated a host
    // and then removed the job from it (because of buggy logic in the calling method)
    final List<String> updateHostsCopy = new ArrayList<>(updateHosts);
    final List<String> undeployHostsCopy = new ArrayList<>(undeployHosts);
    undeployHostsCopy.removeAll(updateHostsCopy);
    // we only care about hosts that are UP
    final List<String> upHostsToUndeploy = undeployHostsCopy.stream().filter(host -> checkHostUp(zooKeeperClient, host)).collect(Collectors.toList());
    final List<String> upHostsToDeploy = updateHostsCopy.stream().filter(host -> checkHostUp(zooKeeperClient, host)).collect(Collectors.toList());
    rolloutTasks.addAll(RollingUndeployPlanner.of(deploymentGroup).plan(upHostsToUndeploy));
    rolloutTasks.addAll(RollingUpdatePlanner.of(deploymentGroup).plan(upHostsToDeploy));
    log.info("generated rolloutTasks for deployment-group name={} " + "updateHosts={} undeployHosts={}: {}", deploymentGroup.getName(), updateHosts, undeployHosts, rolloutTasks);
    final DeploymentGroupTasks tasks = DeploymentGroupTasks.newBuilder().setRolloutTasks(rolloutTasks).setTaskIndex(0).setDeploymentGroup(deploymentGroup).build();
    return new RollingUpdateOpFactory(tasks, DEPLOYMENT_GROUP_EVENT_FACTORY).start(deploymentGroup, zooKeeperClient);
}
Also used : Descriptor.parse(com.spotify.helios.common.descriptors.Descriptor.parse) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) LoggerFactory(org.slf4j.LoggerFactory) Stat(org.apache.zookeeper.data.Stat) ThrottleState(com.spotify.helios.common.descriptors.ThrottleState) FAILED(com.spotify.helios.common.descriptors.DeploymentGroupStatus.State.FAILED) ZooKeeperOperations.create(com.spotify.helios.servicescommon.coordination.ZooKeeperOperations.create) Collections.singletonList(java.util.Collections.singletonList) Optional.fromNullable(com.google.common.base.Optional.fromNullable) Json(com.spotify.helios.common.Json) RolloutOptions(com.spotify.helios.common.descriptors.RolloutOptions) RollingUndeployPlanner(com.spotify.helios.rollingupdate.RollingUndeployPlanner) ZooKeeperOperations.set(com.spotify.helios.servicescommon.coordination.ZooKeeperOperations.set) VersionedValue(com.spotify.helios.servicescommon.VersionedValue) Map(java.util.Map) Deployment(com.spotify.helios.common.descriptors.Deployment) TypeReference(com.fasterxml.jackson.core.type.TypeReference) JsonParseException(com.fasterxml.jackson.core.JsonParseException) HostInfo(com.spotify.helios.common.descriptors.HostInfo) Function(com.google.common.base.Function) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.emptyList(java.util.Collections.emptyList) DeploymentGroup(com.spotify.helios.common.descriptors.DeploymentGroup) EventSender(com.spotify.helios.servicescommon.EventSender) Set(java.util.Set) PortMapping(com.spotify.helios.common.descriptors.PortMapping) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) RollingUpdateOp(com.spotify.helios.rollingupdate.RollingUpdateOp) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) Objects(java.util.Objects) Nullable(org.jetbrains.annotations.Nullable) HOSTS_CHANGED(com.spotify.helios.common.descriptors.DeploymentGroup.RollingUpdateReason.HOSTS_CHANGED) TaskStatusEvent(com.spotify.helios.common.descriptors.TaskStatusEvent) List(java.util.List) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) NotEmptyException(org.apache.zookeeper.KeeperException.NotEmptyException) MANUAL(com.spotify.helios.common.descriptors.DeploymentGroup.RollingUpdateReason.MANUAL) UP(com.spotify.helios.common.descriptors.HostStatus.Status.UP) JsonMappingException(com.fasterxml.jackson.databind.JsonMappingException) RollingUpdateOpFactory(com.spotify.helios.rollingupdate.RollingUpdateOpFactory) Lists.reverse(com.google.common.collect.Lists.reverse) RollingUpdatePlanner(com.spotify.helios.rollingupdate.RollingUpdatePlanner) ZooKeeperOperations.delete(com.spotify.helios.servicescommon.coordination.ZooKeeperOperations.delete) Joiner(com.google.common.base.Joiner) JobId(com.spotify.helios.common.descriptors.JobId) RollingUpdateError(com.spotify.helios.rollingupdate.RollingUpdateError) Goal(com.spotify.helios.common.descriptors.Goal) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) Paths(com.spotify.helios.servicescommon.coordination.Paths) ZooKeeperRegistrarUtil(com.spotify.helios.servicescommon.ZooKeeperRegistrarUtil) ArrayList(java.util.ArrayList) ROLLING_OUT(com.spotify.helios.common.descriptors.DeploymentGroupStatus.State.ROLLING_OUT) Strings(com.google.common.base.Strings) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) DeploymentGroupTasks(com.spotify.helios.common.descriptors.DeploymentGroupTasks) HostStatus(com.spotify.helios.common.descriptors.HostStatus) DOWN(com.spotify.helios.common.descriptors.HostStatus.Status.DOWN) OpResult(org.apache.zookeeper.OpResult) Task(com.spotify.helios.common.descriptors.Task) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) BadVersionException(org.apache.zookeeper.KeeperException.BadVersionException) Collections.emptyMap(java.util.Collections.emptyMap) Logger(org.slf4j.Logger) Job(com.spotify.helios.common.descriptors.Job) KeeperException(org.apache.zookeeper.KeeperException) Preconditions.checkNotNull(com.google.common.base.Preconditions.checkNotNull) MoreObjects(com.google.common.base.MoreObjects) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) Maps(com.google.common.collect.Maps) DeploymentGroupEventFactory(com.spotify.helios.rollingupdate.DeploymentGroupEventFactory) RolloutTask(com.spotify.helios.common.descriptors.RolloutTask) Node(com.spotify.helios.servicescommon.coordination.Node) Ordering(com.google.common.collect.Ordering) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) JobStatus(com.spotify.helios.common.descriptors.JobStatus) ZooKeeperClientProvider(com.spotify.helios.servicescommon.coordination.ZooKeeperClientProvider) DeploymentGroupStatus(com.spotify.helios.common.descriptors.DeploymentGroupStatus) ZooKeeperOperations.check(com.spotify.helios.servicescommon.coordination.ZooKeeperOperations.check) Preconditions(com.google.common.base.Preconditions) AgentInfo(com.spotify.helios.common.descriptors.AgentInfo) Comparator(java.util.Comparator) NodeExistsException(org.apache.zookeeper.KeeperException.NodeExistsException) Collections(java.util.Collections) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) DeploymentGroupTasks(com.spotify.helios.common.descriptors.DeploymentGroupTasks) RolloutTask(com.spotify.helios.common.descriptors.RolloutTask) RollingUpdateOpFactory(com.spotify.helios.rollingupdate.RollingUpdateOpFactory)

Aggregations

HeliosRuntimeException (com.spotify.helios.common.HeliosRuntimeException)4 ImmutableMap (com.google.common.collect.ImmutableMap)3 DeploymentGroup (com.spotify.helios.common.descriptors.DeploymentGroup)3 RollingUpdateOp (com.spotify.helios.rollingupdate.RollingUpdateOp)3 ZooKeeperClient (com.spotify.helios.servicescommon.coordination.ZooKeeperClient)3 ZooKeeperOperation (com.spotify.helios.servicescommon.coordination.ZooKeeperOperation)3 KeeperException (org.apache.zookeeper.KeeperException)3 NoNodeException (org.apache.zookeeper.KeeperException.NoNodeException)3 JsonParseException (com.fasterxml.jackson.core.JsonParseException)2 JsonMappingException (com.fasterxml.jackson.databind.JsonMappingException)2 DeploymentGroupStatus (com.spotify.helios.common.descriptors.DeploymentGroupStatus)2 DeploymentGroupTasks (com.spotify.helios.common.descriptors.DeploymentGroupTasks)2 IOException (java.io.IOException)2 Collections.emptyMap (java.util.Collections.emptyMap)2 Map (java.util.Map)2 BadVersionException (org.apache.zookeeper.KeeperException.BadVersionException)2 TypeReference (com.fasterxml.jackson.core.type.TypeReference)1 Function (com.google.common.base.Function)1 Joiner (com.google.common.base.Joiner)1 MoreObjects (com.google.common.base.MoreObjects)1