Search in sources :

Example 11 with HeliosRuntimeException

use of com.spotify.helios.common.HeliosRuntimeException in project helios by spotify.

the class ZooKeeperMasterModel method getDeployOperations.

private List<ZooKeeperOperation> getDeployOperations(final ZooKeeperClient client, final String host, final Deployment deployment, final String token) throws JobDoesNotExistException, JobAlreadyDeployedException, TokenVerificationException, HostNotFoundException, JobPortAllocationConflictException {
    assertHostExists(client, host);
    final JobId id = deployment.getJobId();
    final Job job = getJob(id);
    if (job == null) {
        throw new JobDoesNotExistException(id);
    }
    verifyToken(token, job);
    final UUID operationId = UUID.randomUUID();
    final String jobPath = Paths.configJob(id);
    final String taskPath = Paths.configHostJob(host, id);
    final String taskCreationPath = Paths.configHostJobCreation(host, id, operationId);
    final List<Integer> staticPorts = staticPorts(job);
    final Map<String, byte[]> portNodes = Maps.newHashMap();
    final byte[] idJson = id.toJsonBytes();
    for (final int port : staticPorts) {
        final String path = Paths.configHostPort(host, port);
        portNodes.put(path, idJson);
    }
    final Task task = new Task(job, deployment.getGoal(), deployment.getDeployerUser(), deployment.getDeployerMaster(), deployment.getDeploymentGroupName());
    final List<ZooKeeperOperation> operations = Lists.newArrayList(check(jobPath), create(portNodes), create(Paths.configJobHost(id, host)));
    // Attempt to read a task here.
    try {
        client.getNode(taskPath);
        // if we get here the node exists already
        throw new JobAlreadyDeployedException(host, id);
    } catch (NoNodeException e) {
        // if the real reason of the failure is that the job is already deployed.
        for (final int port : staticPorts) {
            checkForPortConflicts(client, host, port, id);
        }
        operations.add(create(taskPath, task));
        operations.add(create(taskCreationPath));
    } catch (KeeperException e) {
        throw new HeliosRuntimeException("reading existing task description failed", e);
    }
    return ImmutableList.copyOf(operations);
}
Also used : Task(com.spotify.helios.common.descriptors.Task) RolloutTask(com.spotify.helios.common.descriptors.RolloutTask) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) Job(com.spotify.helios.common.descriptors.Job) UUID(java.util.UUID) JobId(com.spotify.helios.common.descriptors.JobId) KeeperException(org.apache.zookeeper.KeeperException)

Example 12 with HeliosRuntimeException

use of com.spotify.helios.common.HeliosRuntimeException in project helios by spotify.

the class ZooKeeperMasterModel method updateDeploymentGroupHosts.

@Override
public void updateDeploymentGroupHosts(final String groupName, final List<String> hosts) throws DeploymentGroupDoesNotExistException {
    log.debug("updating deployment-group hosts: name={}", groupName);
    final ZooKeeperClient client = provider.get("updateDeploymentGroupHosts");
    try {
        final DeploymentGroupStatus status = getDeploymentGroupStatus(groupName);
        if (!allowHostChange(status)) {
            return;
        }
        // statusDeploymentGroupRemovedHosts may not exist for deployment groups created before it was
        // introduced.
        client.ensurePathAndSetData(Paths.statusDeploymentGroupRemovedHosts(groupName), Json.asBytesUnchecked(emptyList()));
        final List<String> curHosts = getHosts(client, Paths.statusDeploymentGroupHosts(groupName));
        final List<String> previouslyRemovedHosts = getHosts(client, Paths.statusDeploymentGroupRemovedHosts(groupName));
        final List<String> removedHosts = removedHosts(curHosts, hosts, previouslyRemovedHosts);
        if (hosts.equals(curHosts) && removedHosts.equals(previouslyRemovedHosts)) {
            return;
        }
        log.info("for deployment-group name={}, curHosts={}, new hosts={}, " + "previouslyRemovedHosts={}, derived removedHosts={}", groupName, curHosts, hosts, previouslyRemovedHosts, removedHosts);
        final List<ZooKeeperOperation> ops = Lists.newArrayList();
        ops.add(set(Paths.statusDeploymentGroupHosts(groupName), Json.asBytes(hosts)));
        ops.add(set(Paths.statusDeploymentGroupRemovedHosts(groupName), Json.asBytes(removedHosts)));
        final Node dgn = client.getNode(Paths.configDeploymentGroup(groupName));
        final Integer deploymentGroupVersion = dgn.getStat().getVersion();
        DeploymentGroup deploymentGroup = Json.read(dgn.getBytes(), DeploymentGroup.class);
        List<Map<String, Object>> events = ImmutableList.of();
        if (deploymentGroup.getJobId() != null && updateOnHostChange(deploymentGroup, status)) {
            deploymentGroup = deploymentGroup.toBuilder().setRollingUpdateReason(HOSTS_CHANGED).build();
            // Fail transaction if the deployment group has been updated elsewhere.
            ops.add(check(Paths.configDeploymentGroup(groupName), deploymentGroupVersion));
            // NOTE: If the DG was removed this set() cause the transaction to fail, because
            // removing the DG removes this node. It's *important* that there's an operation that
            // causes the transaction to fail if the DG was removed or we'll end up with
            // inconsistent state.
            ops.add(set(Paths.configDeploymentGroup(deploymentGroup.getName()), deploymentGroup));
            final RollingUpdateOp op = getInitRollingUpdateOps(deploymentGroup, hosts, removedHosts, client);
            ops.addAll(op.operations());
            events = op.events();
        }
        log.info("starting zookeeper transaction for updateDeploymentGroupHosts on deployment-group: " + "name={} jobId={} operations={}", groupName, deploymentGroup.getJobId(), ops);
        client.transaction(ops);
        emitEvents(deploymentGroupEventTopic, events);
    } catch (BadVersionException e) {
        // some other master beat us in processing this host update. not exceptional.
        // ideally we would check the path in the exception, but curator doesn't provide a path
        // for exceptions thrown as part of a transaction.
        log.info("zookeeper transaction for updateDeploymentGroupHosts on deployment-group was " + "processed by another master: name={}", groupName);
    } catch (NoNodeException e) {
        throw new DeploymentGroupDoesNotExistException(groupName, e);
    } catch (KeeperException | IOException e) {
        throw new HeliosRuntimeException("updating deployment group hosts failed", e);
    }
}
Also used : NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) Node(com.spotify.helios.servicescommon.coordination.Node) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) DeploymentGroupStatus(com.spotify.helios.common.descriptors.DeploymentGroupStatus) BadVersionException(org.apache.zookeeper.KeeperException.BadVersionException) IOException(java.io.IOException) RollingUpdateOp(com.spotify.helios.rollingupdate.RollingUpdateOp) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.emptyMap(java.util.Collections.emptyMap) DeploymentGroup(com.spotify.helios.common.descriptors.DeploymentGroup) KeeperException(org.apache.zookeeper.KeeperException)

Example 13 with HeliosRuntimeException

use of com.spotify.helios.common.HeliosRuntimeException in project helios by spotify.

the class ZooKeeperMasterModel method rollingUpdate.

@Override
public void rollingUpdate(final DeploymentGroup deploymentGroup, final JobId jobId, final RolloutOptions options) throws DeploymentGroupDoesNotExistException, JobDoesNotExistException {
    checkNotNull(deploymentGroup, "deploymentGroup");
    log.info("preparing to initiate rolling-update on deployment-group: name={}, jobId={}", deploymentGroup.getName(), jobId);
    final DeploymentGroup updated = deploymentGroup.toBuilder().setJobId(jobId).setRolloutOptions(options).setRollingUpdateReason(MANUAL).build();
    if (getJob(jobId) == null) {
        throw new JobDoesNotExistException(jobId);
    }
    final List<ZooKeeperOperation> operations = Lists.newArrayList();
    final ZooKeeperClient client = provider.get("rollingUpdate");
    operations.add(set(Paths.configDeploymentGroup(updated.getName()), updated));
    try {
        final RollingUpdateOp op = getInitRollingUpdateOps(updated, client);
        operations.addAll(op.operations());
        log.info("starting zookeeper transaction for rolling-update on " + "deployment-group name={} jobId={}. List of operations: {}", deploymentGroup.getName(), jobId, operations);
        client.transaction(operations);
        emitEvents(deploymentGroupEventTopic, op.events());
        log.info("initiated rolling-update on deployment-group: name={}, jobId={}", deploymentGroup.getName(), jobId);
    } catch (final NoNodeException e) {
        throw new DeploymentGroupDoesNotExistException(deploymentGroup.getName());
    } catch (final KeeperException e) {
        throw new HeliosRuntimeException("rolling-update on deployment-group " + deploymentGroup.getName() + " failed", e);
    }
}
Also used : RollingUpdateOp(com.spotify.helios.rollingupdate.RollingUpdateOp) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZooKeeperOperation(com.spotify.helios.servicescommon.coordination.ZooKeeperOperation) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) DeploymentGroup(com.spotify.helios.common.descriptors.DeploymentGroup) KeeperException(org.apache.zookeeper.KeeperException)

Example 14 with HeliosRuntimeException

use of com.spotify.helios.common.HeliosRuntimeException in project helios by spotify.

the class AgentService method setupZookeeperClient.

/**
   * Create a Zookeeper client and create the control and state nodes if needed.
   *
   * @param config The service configuration.
   * @return A zookeeper client.
   */
private ZooKeeperClient setupZookeeperClient(final AgentConfig config, final String id, final CountDownLatch zkRegistrationSignal) {
    ACLProvider aclProvider = null;
    List<AuthInfo> authorization = null;
    final String agentUser = config.getZookeeperAclAgentUser();
    final String agentPassword = config.getZooKeeperAclAgentPassword();
    final String masterUser = config.getZookeeperAclMasterUser();
    final String masterDigest = config.getZooKeeperAclMasterDigest();
    if (!isNullOrEmpty(agentPassword)) {
        if (isNullOrEmpty(agentUser)) {
            throw new HeliosRuntimeException("Agent username must be set if a password is set");
        }
        authorization = Lists.newArrayList(new AuthInfo("digest", String.format("%s:%s", agentUser, agentPassword).getBytes()));
    }
    if (config.isZooKeeperEnableAcls()) {
        if (isNullOrEmpty(agentUser) || isNullOrEmpty(agentPassword)) {
            throw new HeliosRuntimeException("ZooKeeper ACLs enabled but agent username and/or password not set");
        }
        if (isNullOrEmpty(masterUser) || isNullOrEmpty(masterDigest)) {
            throw new HeliosRuntimeException("ZooKeeper ACLs enabled but master username and/or digest not set");
        }
        aclProvider = heliosAclProvider(masterUser, masterDigest, agentUser, digest(agentUser, agentPassword));
    }
    final RetryPolicy zooKeeperRetryPolicy = new ExponentialBackoffRetry(1000, 3);
    final CuratorFramework curator = new CuratorClientFactoryImpl().newClient(config.getZooKeeperConnectionString(), config.getZooKeeperSessionTimeoutMillis(), config.getZooKeeperConnectionTimeoutMillis(), zooKeeperRetryPolicy, aclProvider, authorization);
    final ZooKeeperClient client = new DefaultZooKeeperClient(curator, config.getZooKeeperClusterId());
    client.start();
    // Register the agent
    final AgentZooKeeperRegistrar agentZooKeeperRegistrar = new AgentZooKeeperRegistrar(config.getName(), id, config.getZooKeeperRegistrationTtlMinutes(), new SystemClock());
    zkRegistrar = ZooKeeperRegistrarService.newBuilder().setZooKeeperClient(client).setZooKeeperRegistrar(agentZooKeeperRegistrar).setZkRegistrationSignal(zkRegistrationSignal).build();
    return client;
}
Also used : ACLProvider(org.apache.curator.framework.api.ACLProvider) AuthInfo(org.apache.curator.framework.AuthInfo) SystemClock(com.spotify.helios.common.SystemClock) ExponentialBackoffRetry(org.apache.curator.retry.ExponentialBackoffRetry) CuratorClientFactoryImpl(com.spotify.helios.servicescommon.coordination.CuratorClientFactoryImpl) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) DefaultZooKeeperClient(com.spotify.helios.servicescommon.coordination.DefaultZooKeeperClient) CuratorFramework(org.apache.curator.framework.CuratorFramework) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) DefaultZooKeeperClient(com.spotify.helios.servicescommon.coordination.DefaultZooKeeperClient) RetryPolicy(org.apache.curator.RetryPolicy)

Example 15 with HeliosRuntimeException

use of com.spotify.helios.common.HeliosRuntimeException in project helios by spotify.

the class ZooKeeperMasterModel method updateDeployment.

/**
   * Used to update the existing deployment of a job.
   */
@Override
public void updateDeployment(final String host, final Deployment deployment, final String token) throws HostNotFoundException, JobNotDeployedException, TokenVerificationException {
    log.info("updating deployment {}: {}", deployment, host);
    final ZooKeeperClient client = provider.get("updateDeployment");
    final JobId jobId = deployment.getJobId();
    final Job job = getJob(client, jobId);
    final Deployment existingDeployment = getDeployment(host, jobId);
    if (job == null) {
        throw new JobNotDeployedException(host, jobId);
    }
    verifyToken(token, job);
    assertHostExists(client, host);
    assertTaskExists(client, host, deployment.getJobId());
    final String path = Paths.configHostJob(host, jobId);
    final Task task = new Task(job, deployment.getGoal(), existingDeployment.getDeployerUser(), existingDeployment.getDeployerMaster(), existingDeployment.getDeploymentGroupName());
    try {
        client.setData(path, task.toJsonBytes());
    } catch (Exception e) {
        throw new HeliosRuntimeException("updating deployment " + deployment + " on host " + host + " failed", e);
    }
}
Also used : Task(com.spotify.helios.common.descriptors.Task) RolloutTask(com.spotify.helios.common.descriptors.RolloutTask) ZooKeeperClient(com.spotify.helios.servicescommon.coordination.ZooKeeperClient) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) Deployment(com.spotify.helios.common.descriptors.Deployment) Job(com.spotify.helios.common.descriptors.Job) JobId(com.spotify.helios.common.descriptors.JobId) HeliosRuntimeException(com.spotify.helios.common.HeliosRuntimeException) JsonParseException(com.fasterxml.jackson.core.JsonParseException) NotEmptyException(org.apache.zookeeper.KeeperException.NotEmptyException) JsonMappingException(com.fasterxml.jackson.databind.JsonMappingException) BadVersionException(org.apache.zookeeper.KeeperException.BadVersionException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) NodeExistsException(org.apache.zookeeper.KeeperException.NodeExistsException)

Aggregations

HeliosRuntimeException (com.spotify.helios.common.HeliosRuntimeException)27 KeeperException (org.apache.zookeeper.KeeperException)23 NoNodeException (org.apache.zookeeper.KeeperException.NoNodeException)20 ZooKeeperClient (com.spotify.helios.servicescommon.coordination.ZooKeeperClient)16 JobId (com.spotify.helios.common.descriptors.JobId)10 IOException (java.io.IOException)10 ZooKeeperOperation (com.spotify.helios.servicescommon.coordination.ZooKeeperOperation)9 Job (com.spotify.helios.common.descriptors.Job)7 RolloutTask (com.spotify.helios.common.descriptors.RolloutTask)5 Task (com.spotify.helios.common.descriptors.Task)5 Deployment (com.spotify.helios.common.descriptors.Deployment)4 DeploymentGroup (com.spotify.helios.common.descriptors.DeploymentGroup)4 UUID (java.util.UUID)4 NodeExistsException (org.apache.zookeeper.KeeperException.NodeExistsException)4 ImmutableList (com.google.common.collect.ImmutableList)3 Node (com.spotify.helios.servicescommon.coordination.Node)3 DeploymentGroupStatus (com.spotify.helios.common.descriptors.DeploymentGroupStatus)2 HostNotFoundException (com.spotify.helios.master.HostNotFoundException)2 RollingUpdateOp (com.spotify.helios.rollingupdate.RollingUpdateOp)2 DefaultZooKeeperClient (com.spotify.helios.servicescommon.coordination.DefaultZooKeeperClient)2