Search in sources :

Example 1 with HeliosRuntimeException

use of com.spotify.helios.common.HeliosRuntimeException in project helios by spotify.

the class MasterService method setupZookeeperClient.

/**
   * Create a Zookeeper client and create the control and state nodes if needed.
   *
   * @param config The service configuration.
   * @return A zookeeper client.
   */
private ZooKeeperClient setupZookeeperClient(final MasterConfig config) {
    ACLProvider aclProvider = null;
    List<AuthInfo> authorization = null;
    final String masterUser = config.getZookeeperAclMasterUser();
    final String masterPassword = config.getZooKeeperAclMasterPassword();
    final String agentUser = config.getZookeeperAclAgentUser();
    final String agentDigest = config.getZooKeeperAclAgentDigest();
    if (!isNullOrEmpty(masterPassword)) {
        if (isNullOrEmpty(masterUser)) {
            throw new HeliosRuntimeException("Master username must be set if a password is set");
        }
        authorization = Lists.newArrayList(new AuthInfo("digest", String.format("%s:%s", masterUser, masterPassword).getBytes()));
    }
    if (config.isZooKeeperEnableAcls()) {
        if (isNullOrEmpty(masterUser) || isNullOrEmpty(masterPassword)) {
            throw new HeliosRuntimeException("ZooKeeper ACLs enabled but master username and/or password not set");
        }
        if (isNullOrEmpty(agentUser) || isNullOrEmpty(agentDigest)) {
            throw new HeliosRuntimeException("ZooKeeper ACLs enabled but agent username and/or digest not set");
        }
        aclProvider = heliosAclProvider(masterUser, digest(masterUser, masterPassword), agentUser, agentDigest);
    }
    final RetryPolicy zooKeeperRetryPolicy = new ExponentialBackoffRetry(1000, 3);
    final CuratorFramework curator = curatorClientFactory.newClient(config.getZooKeeperConnectionString(), config.getZooKeeperSessionTimeoutMillis(), config.getZooKeeperConnectionTimeoutMillis(), zooKeeperRetryPolicy, aclProvider, authorization);
    final ZooKeeperClient client = new DefaultZooKeeperClient(curator, config.getZooKeeperClusterId());
    client.start();
    zkRegistrar = ZooKeeperRegistrarService.newBuilder().setZooKeeperClient(client).setZooKeeperRegistrar(new MasterZooKeeperRegistrar(config.getName())).build();
    // place where we have access to the ACL provider.
    if (aclProvider != null) {
        // effects are limited to a spurious log line.
        try {
            final List<ACL> curAcls = client.getAcl("/");
            final List<ACL> wantedAcls = aclProvider.getAclForPath("/");
            if (!Sets.newHashSet(curAcls).equals(Sets.newHashSet(wantedAcls))) {
                log.info("Current ACL's on the zookeeper root node differ from desired, updating: {} -> {}", curAcls, wantedAcls);
                client.getCuratorFramework().setACL().withACL(wantedAcls).forPath("/");
            }
        } catch (Exception e) {
            log.error("Failed to get/set ACLs on the zookeeper root node", e);
        }
    }
    return client;
}
Also used : ACLProvider(org.apache.curator.framework.api.ACLProvider) AuthInfo(org.apache.curator.framework.AuthInfo) ExponentialBackoffRetry(org.apache.curator.retry.ExponentialBackoffRetry) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) ACL(org.apache.zookeeper.data.ACL) DefaultZooKeeperClient(com.spotify.helios.servicescommon.coordination.DefaultZooKeeperClient) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) ConfigurationException(io.dropwizard.configuration.ConfigurationException) IOException(java.io.IOException) CuratorFramework(org.apache.curator.framework.CuratorFramework) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) DefaultZooKeeperClient(com.spotify.helios.servicescommon.coordination.DefaultZooKeeperClient) RetryPolicy(org.apache.curator.RetryPolicy)

Example 2 with HeliosRuntimeException

use of com.spotify.helios.common.HeliosRuntimeException in project helios by spotify.

the class ZooKeeperMasterModel method removeJob.

/**
   * Deletes a job from ZooKeeper.  Ensures that job is not currently running anywhere.
   */
@Override
public Job removeJob(final JobId id, final String token) throws JobDoesNotExistException, JobStillDeployedException, TokenVerificationException {
    log.info("removing job: id={}", id);
    final ZooKeeperClient client = provider.get("removeJob");
    final Job job = getJob(client, id);
    if (job == null) {
        throw new JobDoesNotExistException(id);
    }
    verifyToken(token, job);
    // TODO (dano): handle retry failures
    try {
        final ImmutableList.Builder<ZooKeeperOperation> operations = ImmutableList.builder();
        final UUID jobCreationOperationId = getJobCreation(client, id);
        if (jobCreationOperationId != null) {
            operations.add(delete(Paths.configJobCreation(id, jobCreationOperationId)));
        }
        operations.add(delete(Paths.configJobHosts(id)), delete(Paths.configJobRefShort(id)), delete(Paths.configJob(id)), // change down the tree. Effectively, make it that version == cVersion.
        set(Paths.configJobs(), UUID.randomUUID().toString().getBytes()));
        client.transaction(operations.build());
    } catch (final NoNodeException e) {
        throw new JobDoesNotExistException(id);
    } catch (final NotEmptyException e) {
        throw new JobStillDeployedException(id, listJobHosts(client, id));
    } catch (final KeeperException e) {
        throw new HeliosRuntimeException("removing job " + id + " failed", e);
    }
    // Delete job history on a best effort basis
    try {
        client.deleteRecursive(Paths.historyJob(id));
    } catch (NoNodeException ignored) {
    // There's no history for this job
    } catch (KeeperException e) {
        log.warn("error removing job history for job {}", id, e);
    }
    return job;
}
Also used : NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) ImmutableList(com.google.common.collect.ImmutableList) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) NotEmptyException(org.apache.zookeeper.KeeperException.NotEmptyException) Job(com.spotify.helios.common.descriptors.Job) UUID(java.util.UUID) KeeperException(org.apache.zookeeper.KeeperException)

Example 3 with HeliosRuntimeException

use of com.spotify.helios.common.HeliosRuntimeException in project helios by spotify.

the class ZooKeeperMasterModel method getTaskStatuses.

private Map<JobId, TaskStatus> getTaskStatuses(final ZooKeeperClient client, final String host) {
    final Map<JobId, TaskStatus> statuses = Maps.newHashMap();
    final List<JobId> jobIds = listHostJobs(client, host);
    for (final JobId jobId : jobIds) {
        TaskStatus status;
        try {
            status = getTaskStatus(client, host, jobId);
        } catch (HeliosRuntimeException e) {
            // Skip this task status so we can return other available information instead of failing the
            // entire thing.
            status = null;
        }
        if (status != null) {
            statuses.put(jobId, status);
        } else {
            log.debug("Task {} status missing for host {}", jobId, host);
        }
    }
    return statuses;
}
Also used : HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) TaskStatus(com.spotify.helios.common.descriptors.TaskStatus) JobId(com.spotify.helios.common.descriptors.JobId)

Example 4 with HeliosRuntimeException

use of com.spotify.helios.common.HeliosRuntimeException in project helios by spotify.

the class ZooKeeperMasterModel method getDeployment.

/**
   * Returns the current deployment state of {@code jobId} on {@code host}.
   */
@Override
public Deployment getDeployment(final String host, final JobId jobId) {
    final String path = Paths.configHostJob(host, jobId);
    final ZooKeeperClient client = provider.get("getDeployment");
    try {
        final byte[] data = client.getData(path);
        final Task task = parse(data, Task.class);
        return Deployment.of(jobId, task.getGoal(), task.getDeployerUser(), task.getDeployerMaster(), task.getDeploymentGroupName());
    } catch (KeeperException.NoNodeException e) {
        return null;
    } catch (KeeperException | IOException e) {
        throw new HeliosRuntimeException("getting deployment failed", e);
    }
}
Also used : Task(com.spotify.helios.common.descriptors.Task) RolloutTask(com.spotify.helios.common.descriptors.RolloutTask) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) IOException(java.io.IOException) KeeperException(org.apache.zookeeper.KeeperException)

Example 5 with HeliosRuntimeException

use of com.spotify.helios.common.HeliosRuntimeException in project helios by spotify.

the class ZooKeeperMasterModel method getDeploymentGroupTasks.

private Map<String, VersionedValue<DeploymentGroupTasks>> getDeploymentGroupTasks(final ZooKeeperClient client) {
    final String folder = Paths.statusDeploymentGroupTasks();
    try {
        final List<String> names;
        try {
            names = client.getChildren(folder);
        } catch (NoNodeException e) {
            return Collections.emptyMap();
        }
        final Map<String, VersionedValue<DeploymentGroupTasks>> ret = Maps.newHashMap();
        for (final String name : names) {
            final String path = Paths.statusDeploymentGroupTasks(name);
            try {
                final Node node = client.getNode(path);
                final byte[] data = node.getBytes();
                final int version = node.getStat().getVersion();
                if (data.length == 0) {
                    // This can happen because of ensurePath creates an empty node
                    log.debug("Ignoring empty deployment group tasks {}", name);
                } else {
                    final DeploymentGroupTasks val = parse(data, DeploymentGroupTasks.class);
                    ret.put(name, VersionedValue.of(val, version));
                }
            } catch (NoNodeException e) {
                // Ignore, the deployment group was deleted before we had a chance to read it.
                log.debug("Ignoring deleted deployment group tasks {}", name);
            }
        }
        return ret;
    } catch (KeeperException | IOException e) {
        throw new HeliosRuntimeException("getting deployment group tasks failed", e);
    }
}
Also used : VersionedValue(com.spotify.helios.servicescommon.VersionedValue) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) Node(com.spotify.helios.servicescommon.coordination.Node) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) DeploymentGroupTasks(com.spotify.helios.common.descriptors.DeploymentGroupTasks) IOException(java.io.IOException) KeeperException(org.apache.zookeeper.KeeperException)

Aggregations

HeliosRuntimeException (com.spotify.helios.common.HeliosRuntimeException)27 KeeperException (org.apache.zookeeper.KeeperException)23 NoNodeException (org.apache.zookeeper.KeeperException.NoNodeException)20 ZooKeeperClient (com.spotify.helios.servicescommon.coordination.ZooKeeperClient)16 JobId (com.spotify.helios.common.descriptors.JobId)10 IOException (java.io.IOException)10 ZooKeeperOperation (com.spotify.helios.servicescommon.coordination.ZooKeeperOperation)9 Job (com.spotify.helios.common.descriptors.Job)7 RolloutTask (com.spotify.helios.common.descriptors.RolloutTask)5 Task (com.spotify.helios.common.descriptors.Task)5 Deployment (com.spotify.helios.common.descriptors.Deployment)4 DeploymentGroup (com.spotify.helios.common.descriptors.DeploymentGroup)4 UUID (java.util.UUID)4 NodeExistsException (org.apache.zookeeper.KeeperException.NodeExistsException)4 ImmutableList (com.google.common.collect.ImmutableList)3 Node (com.spotify.helios.servicescommon.coordination.Node)3 DeploymentGroupStatus (com.spotify.helios.common.descriptors.DeploymentGroupStatus)2 HostNotFoundException (com.spotify.helios.master.HostNotFoundException)2 RollingUpdateOp (com.spotify.helios.rollingupdate.RollingUpdateOp)2 DefaultZooKeeperClient (com.spotify.helios.servicescommon.coordination.DefaultZooKeeperClient)2