Search in sources :

Example 1 with Node

use of com.spotify.helios.servicescommon.coordination.Node in project helios by spotify.

the class ZooKeeperMasterModel method updateDeploymentGroupHosts.

@Override
public void updateDeploymentGroupHosts(final String groupName, final List<String> hosts) throws DeploymentGroupDoesNotExistException {
    log.debug("updating deployment-group hosts: name={}", groupName);
    final ZooKeeperClient client = provider.get("updateDeploymentGroupHosts");
    try {
        final DeploymentGroupStatus status = getDeploymentGroupStatus(groupName);
        if (!allowHostChange(status)) {
            return;
        }
        // statusDeploymentGroupRemovedHosts may not exist for deployment groups created before it was
        // introduced.
        client.ensurePathAndSetData(Paths.statusDeploymentGroupRemovedHosts(groupName), Json.asBytesUnchecked(emptyList()));
        final List<String> curHosts = getHosts(client, Paths.statusDeploymentGroupHosts(groupName));
        final List<String> previouslyRemovedHosts = getHosts(client, Paths.statusDeploymentGroupRemovedHosts(groupName));
        final List<String> removedHosts = removedHosts(curHosts, hosts, previouslyRemovedHosts);
        if (hosts.equals(curHosts) && removedHosts.equals(previouslyRemovedHosts)) {
            return;
        }
        log.info("for deployment-group name={}, curHosts={}, new hosts={}, " + "previouslyRemovedHosts={}, derived removedHosts={}", groupName, curHosts, hosts, previouslyRemovedHosts, removedHosts);
        final List<ZooKeeperOperation> ops = Lists.newArrayList();
        ops.add(set(Paths.statusDeploymentGroupHosts(groupName), Json.asBytes(hosts)));
        ops.add(set(Paths.statusDeploymentGroupRemovedHosts(groupName), Json.asBytes(removedHosts)));
        final Node dgn = client.getNode(Paths.configDeploymentGroup(groupName));
        final Integer deploymentGroupVersion = dgn.getStat().getVersion();
        DeploymentGroup deploymentGroup = Json.read(dgn.getBytes(), DeploymentGroup.class);
        List<Map<String, Object>> events = ImmutableList.of();
        if (deploymentGroup.getJobId() != null && updateOnHostChange(deploymentGroup, status)) {
            deploymentGroup = deploymentGroup.toBuilder().setRollingUpdateReason(HOSTS_CHANGED).build();
            // Fail transaction if the deployment group has been updated elsewhere.
            ops.add(check(Paths.configDeploymentGroup(groupName), deploymentGroupVersion));
            // NOTE: If the DG was removed this set() cause the transaction to fail, because
            // removing the DG removes this node. It's *important* that there's an operation that
            // causes the transaction to fail if the DG was removed or we'll end up with
            // inconsistent state.
            ops.add(set(Paths.configDeploymentGroup(deploymentGroup.getName()), deploymentGroup));
            final RollingUpdateOp op = getInitRollingUpdateOps(deploymentGroup, hosts, removedHosts, client);
            ops.addAll(op.operations());
            events = op.events();
        }
        log.info("starting zookeeper transaction for updateDeploymentGroupHosts on deployment-group: " + "name={} jobId={} operations={}", groupName, deploymentGroup.getJobId(), ops);
        client.transaction(ops);
        emitEvents(deploymentGroupEventTopic, events);
    } catch (BadVersionException e) {
        // some other master beat us in processing this host update. not exceptional.
        // ideally we would check the path in the exception, but curator doesn't provide a path
        // for exceptions thrown as part of a transaction.
        log.info("zookeeper transaction for updateDeploymentGroupHosts on deployment-group was " + "processed by another master: name={}", groupName);
    } catch (NoNodeException e) {
        throw new DeploymentGroupDoesNotExistException(groupName, e);
    } catch (KeeperException | IOException e) {
        throw new HeliosRuntimeException("updating deployment group hosts failed", e);
    }
}
Also used : NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) Node(com.spotify.helios.servicescommon.coordination.Node) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) DeploymentGroupStatus(com.spotify.helios.common.descriptors.DeploymentGroupStatus) BadVersionException(org.apache.zookeeper.KeeperException.BadVersionException) IOException(java.io.IOException) RollingUpdateOp(com.spotify.helios.rollingupdate.RollingUpdateOp) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.emptyMap(java.util.Collections.emptyMap) DeploymentGroup(com.spotify.helios.common.descriptors.DeploymentGroup) KeeperException(org.apache.zookeeper.KeeperException)

Example 2 with Node

use of com.spotify.helios.servicescommon.coordination.Node in project helios by spotify.

the class ZooKeeperMasterModel method getDeploymentGroupTasks.

private Map<String, VersionedValue<DeploymentGroupTasks>> getDeploymentGroupTasks(final ZooKeeperClient client) {
    final String folder = Paths.statusDeploymentGroupTasks();
    try {
        final List<String> names;
        try {
            names = client.getChildren(folder);
        } catch (NoNodeException e) {
            return Collections.emptyMap();
        }
        final Map<String, VersionedValue<DeploymentGroupTasks>> ret = Maps.newHashMap();
        for (final String name : names) {
            final String path = Paths.statusDeploymentGroupTasks(name);
            try {
                final Node node = client.getNode(path);
                final byte[] data = node.getBytes();
                final int version = node.getStat().getVersion();
                if (data.length == 0) {
                    // This can happen because of ensurePath creates an empty node
                    log.debug("Ignoring empty deployment group tasks {}", name);
                } else {
                    final DeploymentGroupTasks val = parse(data, DeploymentGroupTasks.class);
                    ret.put(name, VersionedValue.of(val, version));
                }
            } catch (NoNodeException e) {
                // Ignore, the deployment group was deleted before we had a chance to read it.
                log.debug("Ignoring deleted deployment group tasks {}", name);
            }
        }
        return ret;
    } catch (KeeperException | IOException e) {
        throw new HeliosRuntimeException("getting deployment group tasks failed", e);
    }
}
Also used : VersionedValue(com.spotify.helios.servicescommon.VersionedValue) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) Node(com.spotify.helios.servicescommon.coordination.Node) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) DeploymentGroupTasks(com.spotify.helios.common.descriptors.DeploymentGroupTasks) IOException(java.io.IOException) KeeperException(org.apache.zookeeper.KeeperException)

Example 3 with Node

use of com.spotify.helios.servicescommon.coordination.Node in project helios by spotify.

the class ZooKeeperMasterModel method getDeploymentGroupStatus.

@Override
public DeploymentGroupStatus getDeploymentGroupStatus(final String name) throws DeploymentGroupDoesNotExistException {
    log.debug("getting deployment group status: {}", name);
    final ZooKeeperClient client = provider.get("getDeploymentGroupStatus");
    final DeploymentGroup deploymentGroup = getDeploymentGroup(client, name);
    if (deploymentGroup == null) {
        return null;
    }
    try {
        final Node node = client.getNode(Paths.statusDeploymentGroup(name));
        final byte[] bytes = node.getBytes();
        if (bytes.length == 0) {
            return null;
        }
        return Json.read(bytes, DeploymentGroupStatus.class);
    } catch (NoNodeException e) {
        return null;
    } catch (KeeperException | IOException e) {
        throw new HeliosRuntimeException("getting deployment group status " + name + " failed", e);
    }
}
Also used : NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) Node(com.spotify.helios.servicescommon.coordination.Node) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) IOException(java.io.IOException) DeploymentGroup(com.spotify.helios.common.descriptors.DeploymentGroup) KeeperException(org.apache.zookeeper.KeeperException)

Example 4 with Node

use of com.spotify.helios.servicescommon.coordination.Node in project helios by spotify.

the class ZooKeeperMasterModel method rollingUpdateMarkUndeployed.

private RollingUpdateOp rollingUpdateMarkUndeployed(final ZooKeeperClient client, final RollingUpdateOpFactory opFactory, final DeploymentGroup deploymentGroup, final String host) {
    try {
        final Node node = client.getNode(Paths.statusDeploymentGroupRemovedHosts(deploymentGroup.getName()));
        final int version = node.getStat().getVersion();
        final List<String> hostsToUndeploy = Json.read(node.getBytes(), STRING_LIST_TYPE);
        if (!hostsToUndeploy.remove(host)) {
            // Something already removed this host. Don't bother trying to update the removed hosts.
            return opFactory.nextTask();
        }
        /*
      The below check() avoids the following race:

      1. Master A reads list of hosts X and Y
      2. Master B reads list of hosts X and Y
      3. Master A removes host X
      4. Master B removes host Y
      5. Master A commits list with Y and without X
      6. Master B commits list with X and without Y

      We would end up thinking we'd successfully removed both hosts, but in fact would have only
      removed host Y.

      The check() will cause this RollingUpdateOp's ZK transaction to fail without incrementing the
      task index, so this RollingUpdateOp will be retried by the next master that gets to it.
      */
        return opFactory.nextTask(ImmutableList.of(check(Paths.statusDeploymentGroupRemovedHosts(deploymentGroup.getName()), version), set(Paths.statusDeploymentGroupRemovedHosts(deploymentGroup.getName()), Json.asBytes(hostsToUndeploy))));
    } catch (KeeperException | IOException e) {
        return opFactory.error("unable to mark host undeployed after removal from deployment group", host, RollingUpdateError.UNABLE_TO_MARK_HOST_UNDEPLOYED);
    }
}
Also used : Node(com.spotify.helios.servicescommon.coordination.Node) IOException(java.io.IOException) KeeperException(org.apache.zookeeper.KeeperException)

Aggregations

Node (com.spotify.helios.servicescommon.coordination.Node)4 IOException (java.io.IOException)4 KeeperException (org.apache.zookeeper.KeeperException)4 HeliosRuntimeException (com.spotify.helios.common.HeliosRuntimeException)3 NoNodeException (org.apache.zookeeper.KeeperException.NoNodeException)3 DeploymentGroup (com.spotify.helios.common.descriptors.DeploymentGroup)2 ZooKeeperClient (com.spotify.helios.servicescommon.coordination.ZooKeeperClient)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 DeploymentGroupStatus (com.spotify.helios.common.descriptors.DeploymentGroupStatus)1 DeploymentGroupTasks (com.spotify.helios.common.descriptors.DeploymentGroupTasks)1 RollingUpdateOp (com.spotify.helios.rollingupdate.RollingUpdateOp)1 VersionedValue (com.spotify.helios.servicescommon.VersionedValue)1 ZooKeeperOperation (com.spotify.helios.servicescommon.coordination.ZooKeeperOperation)1 Collections.emptyMap (java.util.Collections.emptyMap)1 Map (java.util.Map)1 BadVersionException (org.apache.zookeeper.KeeperException.BadVersionException)1