Search in sources :

Example 1 with DeploymentGroupStatus

use of com.spotify.helios.common.descriptors.DeploymentGroupStatus in project helios by spotify.

the class ZooKeeperMasterModel method updateDeploymentGroupHosts.

@Override
public void updateDeploymentGroupHosts(final String groupName, final List<String> hosts) throws DeploymentGroupDoesNotExistException {
    log.debug("updating deployment-group hosts: name={}", groupName);
    final ZooKeeperClient client = provider.get("updateDeploymentGroupHosts");
    try {
        final DeploymentGroupStatus status = getDeploymentGroupStatus(groupName);
        if (!allowHostChange(status)) {
            return;
        }
        // statusDeploymentGroupRemovedHosts may not exist for deployment groups created before it was
        // introduced.
        client.ensurePathAndSetData(Paths.statusDeploymentGroupRemovedHosts(groupName), Json.asBytesUnchecked(emptyList()));
        final List<String> curHosts = getHosts(client, Paths.statusDeploymentGroupHosts(groupName));
        final List<String> previouslyRemovedHosts = getHosts(client, Paths.statusDeploymentGroupRemovedHosts(groupName));
        final List<String> removedHosts = removedHosts(curHosts, hosts, previouslyRemovedHosts);
        if (hosts.equals(curHosts) && removedHosts.equals(previouslyRemovedHosts)) {
            return;
        }
        log.info("for deployment-group name={}, curHosts={}, new hosts={}, " + "previouslyRemovedHosts={}, derived removedHosts={}", groupName, curHosts, hosts, previouslyRemovedHosts, removedHosts);
        final List<ZooKeeperOperation> ops = Lists.newArrayList();
        ops.add(set(Paths.statusDeploymentGroupHosts(groupName), Json.asBytes(hosts)));
        ops.add(set(Paths.statusDeploymentGroupRemovedHosts(groupName), Json.asBytes(removedHosts)));
        final Node dgn = client.getNode(Paths.configDeploymentGroup(groupName));
        final Integer deploymentGroupVersion = dgn.getStat().getVersion();
        DeploymentGroup deploymentGroup = Json.read(dgn.getBytes(), DeploymentGroup.class);
        List<Map<String, Object>> events = ImmutableList.of();
        if (deploymentGroup.getJobId() != null && updateOnHostChange(deploymentGroup, status)) {
            deploymentGroup = deploymentGroup.toBuilder().setRollingUpdateReason(HOSTS_CHANGED).build();
            // Fail transaction if the deployment group has been updated elsewhere.
            ops.add(check(Paths.configDeploymentGroup(groupName), deploymentGroupVersion));
            // NOTE: If the DG was removed this set() cause the transaction to fail, because
            // removing the DG removes this node. It's *important* that there's an operation that
            // causes the transaction to fail if the DG was removed or we'll end up with
            // inconsistent state.
            ops.add(set(Paths.configDeploymentGroup(deploymentGroup.getName()), deploymentGroup));
            final RollingUpdateOp op = getInitRollingUpdateOps(deploymentGroup, hosts, removedHosts, client);
            ops.addAll(op.operations());
            events = op.events();
        }
        log.info("starting zookeeper transaction for updateDeploymentGroupHosts on deployment-group: " + "name={} jobId={} operations={}", groupName, deploymentGroup.getJobId(), ops);
        client.transaction(ops);
        emitEvents(deploymentGroupEventTopic, events);
    } catch (BadVersionException e) {
        // some other master beat us in processing this host update. not exceptional.
        // ideally we would check the path in the exception, but curator doesn't provide a path
        // for exceptions thrown as part of a transaction.
        log.info("zookeeper transaction for updateDeploymentGroupHosts on deployment-group was " + "processed by another master: name={}", groupName);
    } catch (NoNodeException e) {
        throw new DeploymentGroupDoesNotExistException(groupName, e);
    } catch (KeeperException | IOException e) {
        throw new HeliosRuntimeException("updating deployment group hosts failed", e);
    }
}
Also used : NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) Node(com.spotify.helios.servicescommon.coordination.Node) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) DeploymentGroupStatus(com.spotify.helios.common.descriptors.DeploymentGroupStatus) BadVersionException(org.apache.zookeeper.KeeperException.BadVersionException) IOException(java.io.IOException) RollingUpdateOp(com.spotify.helios.rollingupdate.RollingUpdateOp) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.emptyMap(java.util.Collections.emptyMap) DeploymentGroup(com.spotify.helios.common.descriptors.DeploymentGroup) KeeperException(org.apache.zookeeper.KeeperException)

Example 2 with DeploymentGroupStatus

use of com.spotify.helios.common.descriptors.DeploymentGroupStatus in project helios by spotify.

the class DeploymentGroupResource method getDeploymentGroupStatus.

@GET
@Path("/{name}/status")
@Produces(APPLICATION_JSON)
@Timed
@ExceptionMetered
public Response getDeploymentGroupStatus(@PathParam("name") @Valid final String name) {
    try {
        final DeploymentGroup deploymentGroup = model.getDeploymentGroup(name);
        final DeploymentGroupStatus deploymentGroupStatus = model.getDeploymentGroupStatus(name);
        final List<String> hosts = model.getDeploymentGroupHosts(name);
        final List<DeploymentGroupStatusResponse.HostStatus> result = Lists.newArrayList();
        for (final String host : hosts) {
            final HostStatus hostStatus = model.getHostStatus(host);
            JobId deployedJobId = null;
            TaskStatus.State state = null;
            if (hostStatus != null && hostStatus.getStatus().equals(HostStatus.Status.UP)) {
                for (final Map.Entry<JobId, Deployment> entry : hostStatus.getJobs().entrySet()) {
                    if (name.equals(entry.getValue().getDeploymentGroupName())) {
                        deployedJobId = entry.getKey();
                        final TaskStatus taskStatus = hostStatus.getStatuses().get(deployedJobId);
                        if (taskStatus != null) {
                            state = taskStatus.getState();
                        }
                        break;
                    }
                }
                result.add(new DeploymentGroupStatusResponse.HostStatus(host, deployedJobId, state));
            }
        }
        final DeploymentGroupStatusResponse.Status status;
        if (deploymentGroupStatus == null) {
            status = DeploymentGroupStatusResponse.Status.IDLE;
        } else if (deploymentGroupStatus.getState() == DeploymentGroupStatus.State.FAILED) {
            status = DeploymentGroupStatusResponse.Status.FAILED;
        } else if (deploymentGroupStatus.getState() == DeploymentGroupStatus.State.ROLLING_OUT) {
            status = DeploymentGroupStatusResponse.Status.ROLLING_OUT;
        } else {
            status = DeploymentGroupStatusResponse.Status.ACTIVE;
        }
        final String error = deploymentGroupStatus == null ? "" : deploymentGroupStatus.getError();
        return Response.ok(new DeploymentGroupStatusResponse(deploymentGroup, status, error, result, deploymentGroupStatus)).build();
    } catch (final DeploymentGroupDoesNotExistException e) {
        return Response.status(Response.Status.NOT_FOUND).build();
    }
}
Also used : Deployment(com.spotify.helios.common.descriptors.Deployment) DeploymentGroupStatus(com.spotify.helios.common.descriptors.DeploymentGroupStatus) DeploymentGroupDoesNotExistException(com.spotify.helios.master.DeploymentGroupDoesNotExistException) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) DeploymentGroupStatusResponse(com.spotify.helios.common.protocol.DeploymentGroupStatusResponse) HostStatus(com.spotify.helios.common.descriptors.HostStatus) Map(java.util.Map) DeploymentGroup(com.spotify.helios.common.descriptors.DeploymentGroup) JobId(com.spotify.helios.common.descriptors.JobId) Path(javax.ws.rs.Path) Produces(javax.ws.rs.Produces) Timed(com.codahale.metrics.annotation.Timed) GET(javax.ws.rs.GET) ExceptionMetered(com.codahale.metrics.annotation.ExceptionMetered)

Example 3 with DeploymentGroupStatus

use of com.spotify.helios.common.descriptors.DeploymentGroupStatus in project helios by spotify.

the class RollingUpdateOpFactory method start.

public RollingUpdateOp start(final DeploymentGroup deploymentGroup, final ZooKeeperClient client) throws KeeperException {
    client.ensurePath(Paths.statusDeploymentGroupTasks());
    final List<ZooKeeperOperation> ops = Lists.newArrayList();
    final List<Map<String, Object>> events = Lists.newArrayList();
    final List<RolloutTask> rolloutTasks = tasks.getRolloutTasks();
    events.add(eventFactory.rollingUpdateStarted(deploymentGroup));
    final Stat tasksStat = client.exists(Paths.statusDeploymentGroupTasks(deploymentGroup.getName()));
    if (tasksStat == null) {
        // Create the tasks path if it doesn't already exist. The following operations (delete or set)
        // assume the node already exists. If the tasks path is created/deleted before the transaction
        // is committed it will fail. This will on occasion generate a user-visible error but is
        // better than having inconsistent state.
        ops.add(create(Paths.statusDeploymentGroupTasks(deploymentGroup.getName())));
    }
    final DeploymentGroupStatus status;
    if (rolloutTasks.isEmpty()) {
        status = DeploymentGroupStatus.newBuilder().setState(DONE).build();
        ops.add(delete(Paths.statusDeploymentGroupTasks(deploymentGroup.getName())));
        events.add(eventFactory.rollingUpdateDone(deploymentGroup));
    } else {
        final DeploymentGroupTasks tasks = DeploymentGroupTasks.newBuilder().setRolloutTasks(rolloutTasks).setTaskIndex(0).setDeploymentGroup(deploymentGroup).build();
        status = DeploymentGroupStatus.newBuilder().setState(ROLLING_OUT).build();
        ops.add(set(Paths.statusDeploymentGroupTasks(deploymentGroup.getName()), tasks));
    }
    // NOTE: If the DG was removed this set() cause the transaction to fail, because removing
    // the DG removes this node. It's *important* that there's an operation that causes the
    // transaction to fail if the DG was removed or we'll end up with inconsistent state.
    ops.add(set(Paths.statusDeploymentGroup(deploymentGroup.getName()), status));
    return new RollingUpdateOp(ImmutableList.copyOf(ops), ImmutableList.copyOf(events));
}
Also used : Stat(org.apache.zookeeper.data.Stat) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) DeploymentGroupTasks(com.spotify.helios.common.descriptors.DeploymentGroupTasks) RolloutTask(com.spotify.helios.common.descriptors.RolloutTask) DeploymentGroupStatus(com.spotify.helios.common.descriptors.DeploymentGroupStatus) Map(java.util.Map)

Example 4 with DeploymentGroupStatus

use of com.spotify.helios.common.descriptors.DeploymentGroupStatus in project helios by spotify.

the class RollingUpdateOpFactory method nextTask.

public RollingUpdateOp nextTask(final List<ZooKeeperOperation> operations) {
    final List<ZooKeeperOperation> ops = Lists.newArrayList(operations);
    final List<Map<String, Object>> events = Lists.newArrayList();
    final RolloutTask task = tasks.getRolloutTasks().get(tasks.getTaskIndex());
    // Update the task index, delete tasks if done
    if (tasks.getTaskIndex() + 1 == tasks.getRolloutTasks().size()) {
        final DeploymentGroupStatus status = DeploymentGroupStatus.newBuilder().setState(DONE).build();
        // We are done -> delete tasks & update status
        ops.add(delete(Paths.statusDeploymentGroupTasks(deploymentGroup.getName())));
        ops.add(set(Paths.statusDeploymentGroup(deploymentGroup.getName()), status));
        // Emit an event signalling that we're DONE!
        events.add(eventFactory.rollingUpdateDone(deploymentGroup));
    } else {
        ops.add(set(Paths.statusDeploymentGroupTasks(deploymentGroup.getName()), tasks.toBuilder().setTaskIndex(tasks.getTaskIndex() + 1).build()));
        // the task was effectively a no-op.
        if (!operations.isEmpty()) {
            events.add(eventFactory.rollingUpdateTaskSucceeded(deploymentGroup, task));
        }
    }
    return new RollingUpdateOp(ImmutableList.copyOf(ops), ImmutableList.copyOf(events));
}
Also used : ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) RolloutTask(com.spotify.helios.common.descriptors.RolloutTask) DeploymentGroupStatus(com.spotify.helios.common.descriptors.DeploymentGroupStatus) Map(java.util.Map)

Example 5 with DeploymentGroupStatus

use of com.spotify.helios.common.descriptors.DeploymentGroupStatus in project helios by spotify.

the class DeploymentGroupTest method testStopDeploymentGroup.

// A test that...
// * Verifies that the state in ZK is correct after running stop
// * Verifies that the correct exception is thrown when the DG does not exist or there is a
// race condition
@Theory
public void testStopDeploymentGroup(@TestedOn(ints = { 0, 1 }) final int dgExistsInt, @TestedOn(ints = { 0, 1 }) final int tasksExistInt, @TestedOn(ints = { 0, 1 }) final int tasksExistWhenCommittingInt) throws Exception {
    final boolean dgExists = dgExistsInt != 0;
    final boolean tasksExist = tasksExistInt != 0;
    final boolean tasksExistWhenCommitting = tasksExistWhenCommittingInt != 0;
    // To be able to simulate triggering the race condition in stopDeploymentGroup we need to do
    // some mocking, relying on that the implementation uses client.exists() to check for the
    // presence of tasks.
    final ZooKeeperClient client = spy(this.client);
    when(client.exists(Paths.statusDeploymentGroupTasks(GROUP_NAME))).thenReturn(tasksExist ? mock(Stat.class) : null);
    final ZooKeeperMasterModel masterModel = newMasterModel(client);
    if (dgExists) {
        final DeploymentGroup dg = DeploymentGroup.newBuilder().setName(GROUP_NAME).build();
        masterModel.addDeploymentGroup(dg);
    }
    if (tasksExistWhenCommitting) {
        client.ensurePath(Paths.statusDeploymentGroupTasks());
        client.create(Paths.statusDeploymentGroupTasks(GROUP_NAME));
    }
    if (!dgExists) {
        exception.expect(DeploymentGroupDoesNotExistException.class);
    } else if (tasksExist != tasksExistWhenCommitting) {
        exception.expect(HeliosRuntimeException.class);
    }
    masterModel.stopDeploymentGroup(GROUP_NAME);
    // Verify that the state in ZK is correct:
    // * tasks are not present
    // * the status is set to FAILED
    // 
    // When checking for the existence of the tasks make sure we use the client that doesn't have
    // the exists() method mocked out!
    assertNull(this.client.exists(Paths.statusDeploymentGroupTasks(GROUP_NAME)));
    final DeploymentGroupStatus status = masterModel.getDeploymentGroupStatus(GROUP_NAME);
    assertEquals(FAILED, status.getState());
}
Also used : ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) DefaultZooKeeperClient(com.spotify.helios.servicescommon.coordination.DefaultZooKeeperClient) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) DeploymentGroupStatus(com.spotify.helios.common.descriptors.DeploymentGroupStatus) DeploymentGroup(com.spotify.helios.common.descriptors.DeploymentGroup) Theory(org.junit.experimental.theories.Theory)

Aggregations

DeploymentGroupStatus (com.spotify.helios.common.descriptors.DeploymentGroupStatus)7 ZooKeeperOperation (com.spotify.helios.servicescommon.coordination.ZooKeeperOperation)5 Map (java.util.Map)5 HeliosRuntimeException (com.spotify.helios.common.HeliosRuntimeException)3 DeploymentGroup (com.spotify.helios.common.descriptors.DeploymentGroup)3 RolloutTask (com.spotify.helios.common.descriptors.RolloutTask)3 ZooKeeperClient (com.spotify.helios.servicescommon.coordination.ZooKeeperClient)3 KeeperException (org.apache.zookeeper.KeeperException)2 NoNodeException (org.apache.zookeeper.KeeperException.NoNodeException)2 Stat (org.apache.zookeeper.data.Stat)2 ExceptionMetered (com.codahale.metrics.annotation.ExceptionMetered)1 Timed (com.codahale.metrics.annotation.Timed)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Deployment (com.spotify.helios.common.descriptors.Deployment)1 DeploymentGroupTasks (com.spotify.helios.common.descriptors.DeploymentGroupTasks)1 HostStatus (com.spotify.helios.common.descriptors.HostStatus)1 JobId (com.spotify.helios.common.descriptors.JobId)1 TaskStatus (com.spotify.helios.common.descriptors.TaskStatus)1 DeploymentGroupStatusResponse (com.spotify.helios.common.protocol.DeploymentGroupStatusResponse)1 DeploymentGroupDoesNotExistException (com.spotify.helios.master.DeploymentGroupDoesNotExistException)1