Search in sources :

Example 6 with ZooKeeperClient

use of com.spotify.helios.servicescommon.coordination.ZooKeeperClient in project helios by spotify.

the class ZooKeeperMasterModel method updateDeploymentGroupHosts.

@Override
public void updateDeploymentGroupHosts(final String groupName, final List<String> hosts) throws DeploymentGroupDoesNotExistException {
    log.debug("updating deployment-group hosts: name={}", groupName);
    final ZooKeeperClient client = provider.get("updateDeploymentGroupHosts");
    try {
        final DeploymentGroupStatus status = getDeploymentGroupStatus(groupName);
        if (!allowHostChange(status)) {
            return;
        }
        // statusDeploymentGroupRemovedHosts may not exist for deployment groups created before it was
        // introduced.
        client.ensurePathAndSetData(Paths.statusDeploymentGroupRemovedHosts(groupName), Json.asBytesUnchecked(emptyList()));
        final List<String> curHosts = getHosts(client, Paths.statusDeploymentGroupHosts(groupName));
        final List<String> previouslyRemovedHosts = getHosts(client, Paths.statusDeploymentGroupRemovedHosts(groupName));
        final List<String> removedHosts = removedHosts(curHosts, hosts, previouslyRemovedHosts);
        if (hosts.equals(curHosts) && removedHosts.equals(previouslyRemovedHosts)) {
            return;
        }
        log.info("for deployment-group name={}, curHosts={}, new hosts={}, " + "previouslyRemovedHosts={}, derived removedHosts={}", groupName, curHosts, hosts, previouslyRemovedHosts, removedHosts);
        final List<ZooKeeperOperation> ops = Lists.newArrayList();
        ops.add(set(Paths.statusDeploymentGroupHosts(groupName), Json.asBytes(hosts)));
        ops.add(set(Paths.statusDeploymentGroupRemovedHosts(groupName), Json.asBytes(removedHosts)));
        final Node dgn = client.getNode(Paths.configDeploymentGroup(groupName));
        final Integer deploymentGroupVersion = dgn.getStat().getVersion();
        DeploymentGroup deploymentGroup = Json.read(dgn.getBytes(), DeploymentGroup.class);
        List<Map<String, Object>> events = ImmutableList.of();
        if (deploymentGroup.getJobId() != null && updateOnHostChange(deploymentGroup, status)) {
            deploymentGroup = deploymentGroup.toBuilder().setRollingUpdateReason(HOSTS_CHANGED).build();
            // Fail transaction if the deployment group has been updated elsewhere.
            ops.add(check(Paths.configDeploymentGroup(groupName), deploymentGroupVersion));
            // NOTE: If the DG was removed this set() cause the transaction to fail, because
            // removing the DG removes this node. It's *important* that there's an operation that
            // causes the transaction to fail if the DG was removed or we'll end up with
            // inconsistent state.
            ops.add(set(Paths.configDeploymentGroup(deploymentGroup.getName()), deploymentGroup));
            final RollingUpdateOp op = getInitRollingUpdateOps(deploymentGroup, hosts, removedHosts, client);
            ops.addAll(op.operations());
            events = op.events();
        }
        log.info("starting zookeeper transaction for updateDeploymentGroupHosts on deployment-group: " + "name={} jobId={} operations={}", groupName, deploymentGroup.getJobId(), ops);
        client.transaction(ops);
        emitEvents(deploymentGroupEventTopic, events);
    } catch (BadVersionException e) {
        // some other master beat us in processing this host update. not exceptional.
        // ideally we would check the path in the exception, but curator doesn't provide a path
        // for exceptions thrown as part of a transaction.
        log.info("zookeeper transaction for updateDeploymentGroupHosts on deployment-group was " + "processed by another master: name={}", groupName);
    } catch (NoNodeException e) {
        throw new DeploymentGroupDoesNotExistException(groupName, e);
    } catch (KeeperException | IOException e) {
        throw new HeliosRuntimeException("updating deployment group hosts failed", e);
    }
}
Also used : NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) Node(com.spotify.helios.servicescommon.coordination.Node) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) DeploymentGroupStatus(com.spotify.helios.common.descriptors.DeploymentGroupStatus) BadVersionException(org.apache.zookeeper.KeeperException.BadVersionException) IOException(java.io.IOException) RollingUpdateOp(com.spotify.helios.rollingupdate.RollingUpdateOp) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.emptyMap(java.util.Collections.emptyMap) DeploymentGroup(com.spotify.helios.common.descriptors.DeploymentGroup) KeeperException(org.apache.zookeeper.KeeperException)

Example 7 with ZooKeeperClient

use of com.spotify.helios.servicescommon.coordination.ZooKeeperClient in project helios by spotify.

the class ZooKeeperMasterModel method getRunningMasters.

/**
 * Returns a list of the host names of the currently running masters.
 */
@Override
public List<String> getRunningMasters() {
    final ZooKeeperClient client = provider.get("getRunningMasters");
    try {
        final List<String> masters = client.getChildren(Paths.statusMaster());
        final ImmutableList.Builder<String> upMasters = ImmutableList.builder();
        for (final String master : masters) {
            if (client.exists(Paths.statusMasterUp(master)) != null) {
                upMasters.add(master);
            }
        }
        return upMasters.build();
    } catch (KeeperException e) {
        throw new HeliosRuntimeException("listing masters failed", e);
    }
}
Also used : ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) ImmutableList(com.google.common.collect.ImmutableList) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) KeeperException(org.apache.zookeeper.KeeperException)

Example 8 with ZooKeeperClient

use of com.spotify.helios.servicescommon.coordination.ZooKeeperClient in project helios by spotify.

the class ZooKeeperMasterModel method getDeployment.

/**
 * Returns the current deployment state of {@code jobId} on {@code host}.
 */
@Override
public Deployment getDeployment(final String host, final JobId jobId) {
    final String path = Paths.configHostJob(host, jobId);
    final ZooKeeperClient client = provider.get("getDeployment");
    try {
        final byte[] data = client.getData(path);
        final Task task = parse(data, Task.class);
        return Deployment.of(jobId, task.getGoal(), task.getDeployerUser(), task.getDeployerMaster(), task.getDeploymentGroupName());
    } catch (KeeperException.NoNodeException e) {
        return null;
    } catch (KeeperException | IOException e) {
        throw new HeliosRuntimeException("getting deployment failed", e);
    }
}
Also used : Task(com.spotify.helios.common.descriptors.Task) RolloutTask(com.spotify.helios.common.descriptors.RolloutTask) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) IOException(java.io.IOException) KeeperException(org.apache.zookeeper.KeeperException)

Example 9 with ZooKeeperClient

use of com.spotify.helios.servicescommon.coordination.ZooKeeperClient in project helios by spotify.

the class ZooKeeperMasterModel method getDeploymentGroups.

/**
 * Returns a {@link Map} of deployment group name to {@link DeploymentGroup} objects for all of
 * the deployment groups known.
 */
@Override
public Map<String, DeploymentGroup> getDeploymentGroups() {
    log.debug("getting deployment groups");
    final String folder = Paths.configDeploymentGroups();
    final ZooKeeperClient client = provider.get("getDeploymentGroups");
    try {
        final List<String> names;
        try {
            names = client.getChildren(folder);
        } catch (NoNodeException e) {
            return Maps.newHashMap();
        }
        final Map<String, DeploymentGroup> descriptors = Maps.newHashMap();
        for (final String name : names) {
            final String path = Paths.configDeploymentGroup(name);
            try {
                final byte[] data = client.getData(path);
                final DeploymentGroup descriptor = parse(data, DeploymentGroup.class);
                descriptors.put(descriptor.getName(), descriptor);
            } catch (NoNodeException e) {
                // Ignore, the deployment group was deleted before we had a chance to read it.
                log.debug("Ignoring deleted deployment group {}", name);
            }
        }
        return descriptors;
    } catch (KeeperException | IOException e) {
        throw new HeliosRuntimeException("getting deployment groups failed", e);
    }
}
Also used : NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) IOException(java.io.IOException) DeploymentGroup(com.spotify.helios.common.descriptors.DeploymentGroup) KeeperException(org.apache.zookeeper.KeeperException)

Example 10 with ZooKeeperClient

use of com.spotify.helios.servicescommon.coordination.ZooKeeperClient in project helios by spotify.

the class ZooKeeperMasterModel method removeJob.

/**
 * Deletes a job from ZooKeeper.  Ensures that job is not currently running anywhere.
 */
@Override
public Job removeJob(final JobId id, final String token) throws JobDoesNotExistException, JobStillDeployedException, TokenVerificationException {
    log.info("removing job: id={}", id);
    final ZooKeeperClient client = provider.get("removeJob");
    final Job job = getJob(client, id);
    if (job == null) {
        throw new JobDoesNotExistException(id);
    }
    verifyToken(token, job);
    // TODO (dano): handle retry failures
    try {
        final ImmutableList.Builder<ZooKeeperOperation> operations = ImmutableList.builder();
        final UUID jobCreationOperationId = getJobCreation(client, id);
        if (jobCreationOperationId != null) {
            operations.add(delete(Paths.configJobCreation(id, jobCreationOperationId)));
        }
        operations.add(delete(Paths.configJobHosts(id)), delete(Paths.configJobRefShort(id)), delete(Paths.configJob(id)), // change down the tree. Effectively, make it that version == cVersion.
        set(Paths.configJobs(), UUID.randomUUID().toString().getBytes()));
        client.transaction(operations.build());
    } catch (final NoNodeException e) {
        throw new JobDoesNotExistException(id);
    } catch (final NotEmptyException e) {
        throw new JobStillDeployedException(id, listJobHosts(client, id));
    } catch (final KeeperException e) {
        throw new HeliosRuntimeException("removing job " + id + " failed", e);
    }
    // Delete job history on a best effort basis
    try {
        client.deleteRecursive(Paths.historyJob(id));
    } catch (NoNodeException ignored) {
    // There's no history for this job
    } catch (KeeperException e) {
        log.warn("error removing job history for job {}", id, e);
    }
    return job;
}
Also used : NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) ImmutableList(com.google.common.collect.ImmutableList) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) NotEmptyException(org.apache.zookeeper.KeeperException.NotEmptyException) Job(com.spotify.helios.common.descriptors.Job) UUID(java.util.UUID) KeeperException(org.apache.zookeeper.KeeperException)

Aggregations

ZooKeeperClient (com.spotify.helios.servicescommon.coordination.ZooKeeperClient)43 HeliosRuntimeException (com.spotify.helios.common.HeliosRuntimeException)20 KeeperException (org.apache.zookeeper.KeeperException)18 NoNodeException (org.apache.zookeeper.KeeperException.NoNodeException)15 DefaultZooKeeperClient (com.spotify.helios.servicescommon.coordination.DefaultZooKeeperClient)13 Job (com.spotify.helios.common.descriptors.Job)12 Test (org.junit.Test)12 DeploymentGroup (com.spotify.helios.common.descriptors.DeploymentGroup)11 ZooKeeperOperation (com.spotify.helios.servicescommon.coordination.ZooKeeperOperation)11 IOException (java.io.IOException)10 RolloutTask (com.spotify.helios.common.descriptors.RolloutTask)8 DeploymentGroupTasks (com.spotify.helios.common.descriptors.DeploymentGroupTasks)6 JobId (com.spotify.helios.common.descriptors.JobId)6 Deployment (com.spotify.helios.common.descriptors.Deployment)5 CuratorFramework (org.apache.curator.framework.CuratorFramework)5 ExponentialBackoffRetry (org.apache.curator.retry.ExponentialBackoffRetry)5 DeploymentGroupStatus (com.spotify.helios.common.descriptors.DeploymentGroupStatus)4 SetData (com.spotify.helios.servicescommon.coordination.SetData)4 RetryPolicy (org.apache.curator.RetryPolicy)4 BadVersionException (org.apache.zookeeper.KeeperException.BadVersionException)4