Search in sources :

Example 11 with SingularityPendingRequest

use of com.hubspot.singularity.SingularityPendingRequest in project Singularity by HubSpot.

the class SingularityMesosStatusUpdateHandler method unsafeProcessStatusUpdate.

private StatusUpdateResult unsafeProcessStatusUpdate(Protos.TaskStatus status, SingularityTaskId taskIdObj) {
    final String taskId = status.getTaskId().getValue();
    long timestamp = System.currentTimeMillis();
    if (status.hasTimestamp()) {
        timestamp = (long) (status.getTimestamp() * 1000);
    }
    long now = System.currentTimeMillis();
    long delta = now - timestamp;
    LOG.debug("Update: task {} is now {} ({}) at {} (delta: {})", taskId, status.getState(), status.getMessage(), timestamp, JavaUtils.durationFromMillis(delta));
    statusUpdateDeltas.update(delta);
    final SingularityTaskStatusHolder newTaskStatusHolder = new SingularityTaskStatusHolder(taskIdObj, Optional.of(mesosProtosUtils.taskStatusFromProtos(status)), System.currentTimeMillis(), serverId, Optional.<String>empty());
    final Optional<SingularityTaskStatusHolder> previousTaskStatusHolder = taskManager.getLastActiveTaskStatus(taskIdObj);
    final ExtendedTaskState taskState = MesosUtils.fromTaskState(status.getState());
    if (taskState == ExtendedTaskState.TASK_ERROR && status.getMessage() != null && status.getMessage().contains(RESOURCE_MISMATCH_ERR)) {
        LOG.error("Possible duplicate resource allocation", new IllegalStateException(String.format("Duplicate resource allocation for %s: %s", taskId, status.getMessage())));
    }
    if (isRecoveryStatusUpdate(previousTaskStatusHolder, status.getReason(), taskState, newTaskStatusHolder)) {
        LOG.info("Found recovery status update with reason {} for task {}", status.getReason(), taskId);
        final Optional<SingularityTaskHistory> maybeTaskHistory = taskManager.getTaskHistory(taskIdObj);
        if (!maybeTaskHistory.isPresent() || !maybeTaskHistory.get().getLastTaskUpdate().isPresent()) {
            LOG.warn("Task {} not found to recover, it may have already been persisted. Triggering a kill via mesos", taskIdObj);
            return StatusUpdateResult.KILL_TASK;
        } else if (status.getReason() == Reason.REASON_AGENT_REREGISTERED) {
            Optional<SingularityLoadBalancerUpdate> maybeLbUpdate = taskManager.getLoadBalancerState(taskIdObj, LoadBalancerRequestType.REMOVE);
            if (maybeLbUpdate.isPresent()) {
                LOG.info("LB removal for recovered task {} was already started. Attempting to clear and start as new task", taskId);
                boolean canRecoverLbState = true;
                if (maybeLbUpdate.get().getLoadBalancerState().isInProgress()) {
                    try {
                        if (lbClient.getState(maybeLbUpdate.get().getLoadBalancerRequestId()).getLoadBalancerState().isInProgress()) {
                            // We don't want to block here and wait for LB removal to finish in case it is stuck. Mark this task for cleaning
                            canRecoverLbState = false;
                        }
                    } catch (Exception e) {
                        LOG.warn("Could not verify LB state for {}", taskId, e);
                        canRecoverLbState = false;
                    }
                }
                if (canRecoverLbState && deployManager.getActiveDeployId(taskIdObj.getRequestId()).map(d -> d.equals(taskIdObj.getDeployId())).orElse(false) && taskManager.reactivateTask(taskIdObj, taskState, newTaskStatusHolder, Optional.ofNullable(status.getMessage()), status.hasReason() ? Optional.of(status.getReason().name()) : Optional.empty())) {
                    Optional<SingularityTask> maybeTask = taskManager.getTask(taskIdObj);
                    Optional<SingularityRequestWithState> maybeRequest = requestManager.getRequest(taskIdObj.getRequestId());
                    if (maybeTask.isPresent() && maybeRequest.isPresent() && maybeRequest.get().getState().isRunnable()) {
                        LOG.info("Task {} can be recovered. Clearing LB state and enqueuing check as new task", taskId);
                        taskManager.clearLoadBalancerHistory(taskIdObj);
                        newTaskChecker.enqueueCheckWithDelay(maybeTask.get(), 0, healthchecker);
                        requestManager.addToPendingQueue(new SingularityPendingRequest(taskIdObj.getRequestId(), taskIdObj.getDeployId(), now, Optional.empty(), PendingType.TASK_RECOVERED, Optional.empty(), Optional.of(String.format("Agent %s recovered", status.getAgentId().getValue()))));
                        return StatusUpdateResult.DONE;
                    }
                } else {
                    LOG.info("Could not recover task {}, will clean up", taskId);
                    taskManager.createTaskCleanup(new SingularityTaskCleanup(Optional.empty(), TaskCleanupType.DECOMISSIONING, System.currentTimeMillis(), taskIdObj, Optional.of("Agent re-registered after load balancer removal started. Task cannot be reactivated."), Optional.empty(), Optional.empty()));
                    requestManager.addToPendingQueue(new SingularityPendingRequest(taskIdObj.getRequestId(), taskIdObj.getDeployId(), now, Optional.empty(), PendingType.TASK_RECOVERED, Optional.empty(), Optional.of(String.format("Agent %s recovered", status.getAgentId().getValue()))));
                    return StatusUpdateResult.DONE;
                }
            }
        }
        // Check tasks with no lb component or not yet removed from LB
        boolean reactivated = deployManager.getActiveDeployId(taskIdObj.getRequestId()).map(d -> d.equals(taskIdObj.getDeployId())).orElse(false) && taskManager.reactivateTask(taskIdObj, taskState, newTaskStatusHolder, Optional.ofNullable(status.getMessage()), status.hasReason() ? Optional.of(status.getReason().name()) : Optional.empty());
        requestManager.addToPendingQueue(new SingularityPendingRequest(taskIdObj.getRequestId(), taskIdObj.getDeployId(), now, Optional.empty(), PendingType.TASK_RECOVERED, Optional.empty(), Optional.of(String.format("Agent %s recovered", status.getAgentId().getValue()))));
        if (reactivated) {
            return StatusUpdateResult.DONE;
        } else {
            return StatusUpdateResult.KILL_TASK;
        }
    } else if (isDuplicateOrIgnorableStatusUpdate(previousTaskStatusHolder, newTaskStatusHolder)) {
        LOG.trace("Ignoring status update {} to {}", taskState, taskIdObj);
        saveNewTaskStatusHolder(taskIdObj, newTaskStatusHolder, taskState);
        return StatusUpdateResult.IGNORED;
    }
    final Optional<SingularityTask> task = taskManager.getTask(taskIdObj);
    if (status.getState() == TaskState.TASK_LOST) {
        boolean isMesosFailure = status.getReason() == Reason.REASON_INVALID_OFFERS || status.getReason() == Reason.REASON_AGENT_REMOVED || status.getReason() == Reason.REASON_AGENT_RESTARTED || status.getReason() == Reason.REASON_AGENT_UNKNOWN || status.getReason() == Reason.REASON_MASTER_DISCONNECTED || status.getReason() == Reason.REASON_AGENT_DISCONNECTED;
        RequestType requestType = task.isPresent() ? task.get().getTaskRequest().getRequest().getRequestType() : null;
        boolean isRelaunchable = requestType != null && !requestType.isLongRunning();
        if (isMesosFailure && isRelaunchable) {
            LOG.info("Relaunching lost task {}", task);
            relaunchTask(task.get());
        }
        lostTasksMeter.mark();
        if (configuration.getDisasterDetection().isEnabled()) {
            taskLostReasons.add(status.getReason());
        }
    }
    if (!taskState.isDone()) {
        if (task.isPresent()) {
            final Optional<SingularityPendingDeploy> pendingDeploy = deployManager.getPendingDeploy(taskIdObj.getRequestId());
            Optional<SingularityRequestWithState> requestWithState = Optional.empty();
            if (taskState == ExtendedTaskState.TASK_RUNNING) {
                requestWithState = requestManager.getRequest(taskIdObj.getRequestId());
                healthchecker.enqueueHealthcheck(task.get(), pendingDeploy, requestWithState);
            }
            if (!pendingDeploy.isPresent() || !pendingDeploy.get().getDeployMarker().getDeployId().equals(taskIdObj.getDeployId())) {
                if (!requestWithState.isPresent()) {
                    requestWithState = requestManager.getRequest(taskIdObj.getRequestId());
                }
                newTaskChecker.enqueueNewTaskCheck(task.get(), requestWithState, healthchecker);
            }
        } else {
            final String message = String.format("Task %s is active but is missing task data", taskId);
            exceptionNotifier.notify(message);
            LOG.error(message);
        }
    }
    final Optional<String> statusMessage = getStatusMessage(status, task);
    final SingularityTaskHistoryUpdate taskUpdate = new SingularityTaskHistoryUpdate(taskIdObj, timestamp, taskState, statusMessage, status.hasReason() ? Optional.of(status.getReason().name()) : Optional.<String>empty());
    final SingularityCreateResult taskHistoryUpdateCreateResult = taskManager.saveTaskHistoryUpdate(taskUpdate);
    logSupport.checkDirectoryAndContainerId(taskIdObj);
    if (taskState.isDone()) {
        healthchecker.cancelHealthcheck(taskId);
        newTaskChecker.cancelNewTaskCheck(taskId);
        taskManager.deleteKilledRecord(taskIdObj);
        handleCompletedTaskState(status, taskIdObj, taskState, taskHistoryUpdateCreateResult, task, timestamp);
    }
    saveNewTaskStatusHolder(taskIdObj, newTaskStatusHolder, taskState);
    return StatusUpdateResult.DONE;
}
Also used : Optional(java.util.Optional) SingularityPendingRequest(com.hubspot.singularity.SingularityPendingRequest) SingularityCreateResult(com.hubspot.singularity.SingularityCreateResult) InvalidSingularityTaskIdException(com.hubspot.singularity.InvalidSingularityTaskIdException) SingularityTranscoderException(com.hubspot.singularity.data.transcoders.SingularityTranscoderException) ExtendedTaskState(com.hubspot.singularity.ExtendedTaskState) SingularityTask(com.hubspot.singularity.SingularityTask) SingularityTaskHistoryUpdate(com.hubspot.singularity.SingularityTaskHistoryUpdate) SingularityPendingDeploy(com.hubspot.singularity.SingularityPendingDeploy) SingularityTaskHistory(com.hubspot.singularity.SingularityTaskHistory) SingularityRequestWithState(com.hubspot.singularity.SingularityRequestWithState) SingularityTaskCleanup(com.hubspot.singularity.SingularityTaskCleanup) SingularityTaskStatusHolder(com.hubspot.singularity.SingularityTaskStatusHolder) RequestType(com.hubspot.singularity.RequestType) LoadBalancerRequestType(com.hubspot.singularity.LoadBalancerRequestType)

Example 12 with SingularityPendingRequest

use of com.hubspot.singularity.SingularityPendingRequest in project Singularity by HubSpot.

the class SingularityMesosStatusUpdateHandler method relaunchTask.

private void relaunchTask(SingularityTask task) {
    SingularityPendingTask pendingTask = task.getTaskRequest().getPendingTask();
    SingularityPendingRequest pendingRequest = new SingularityPendingRequestBuilder().setRequestId(task.getTaskRequest().getRequest().getId()).setDeployId(task.getTaskRequest().getDeploy().getId()).setPendingType(PendingType.RETRY).setUser(pendingTask.getUser()).setRunId(pendingTask.getRunId()).setCmdLineArgsList(pendingTask.getCmdLineArgsList()).setSkipHealthchecks(pendingTask.getSkipHealthchecks()).setMessage(pendingTask.getMessage()).setResources(pendingTask.getResources()).setS3UploaderAdditionalFiles(pendingTask.getS3UploaderAdditionalFiles()).setRunAsUserOverride(pendingTask.getRunAsUserOverride()).setEnvOverrides(pendingTask.getEnvOverrides()).setExtraArtifacts(pendingTask.getExtraArtifacts()).setActionId(pendingTask.getActionId()).setRunAt(pendingTask.getPendingTaskId().getNextRunAt()).setTimestamp(System.currentTimeMillis()).build();
    requestManager.addToPendingQueue(pendingRequest);
}
Also used : SingularityPendingRequestBuilder(com.hubspot.singularity.SingularityPendingRequestBuilder) SingularityPendingRequest(com.hubspot.singularity.SingularityPendingRequest) SingularityPendingTask(com.hubspot.singularity.SingularityPendingTask)

Example 13 with SingularityPendingRequest

use of com.hubspot.singularity.SingularityPendingRequest in project Singularity by HubSpot.

the class DeployResource method deploy.

public SingularityRequestParent deploy(SingularityDeployRequest deployRequest, SingularityUser user) {
    validator.checkActionEnabled(SingularityAction.DEPLOY);
    SingularityDeploy deploy = deployRequest.getDeploy();
    checkNotNullBadRequest(deploy, "DeployRequest must have a deploy object");
    final Optional<String> deployUser = user.getEmail();
    final String requestId = checkNotNullBadRequest(deploy.getRequestId(), "DeployRequest must have a non-null requestId");
    SingularityRequestWithState requestWithState = fetchRequestWithState(requestId, user);
    authorizationHelper.checkForAuthorization(requestWithState.getRequest(), user, SingularityAuthorizationScope.WRITE);
    SingularityRequest request = requestWithState.getRequest();
    authorizationHelper.checkForAuthorization(request, deploy, user, SingularityAuthorizationScope.WRITE);
    final Optional<SingularityRequest> updatedValidatedRequest;
    if (deployRequest.getUpdatedRequest().isPresent()) {
        authorizationHelper.checkForAuthorizedChanges(deployRequest.getUpdatedRequest().get(), requestWithState.getRequest(), user);
        updatedValidatedRequest = Optional.of(validator.checkSingularityRequest(deployRequest.getUpdatedRequest().get(), Optional.of(requestWithState.getRequest()), Optional.<SingularityDeploy>empty(), Optional.of(deploy)));
    } else {
        updatedValidatedRequest = Optional.empty();
    }
    if (updatedValidatedRequest.isPresent()) {
        request = updatedValidatedRequest.get();
    }
    validator.checkScale(request, Optional.of(taskManager.getActiveTaskIdsForRequest(request.getId()).size()));
    if (!deployRequest.isUnpauseOnSuccessfulDeploy() && !configuration.isAllowDeployOfPausedRequests()) {
        checkConflict(requestWithState.getState() != RequestState.PAUSED, "Request %s is paused. Unable to deploy (it must be manually unpaused first)", requestWithState.getRequest().getId());
    }
    deploy = validator.checkDeploy(request, deploy);
    final long now = System.currentTimeMillis();
    SingularityDeployMarker deployMarker = new SingularityDeployMarker(requestId, deploy.getId(), now, deployUser, deployRequest.getMessage());
    SingularityDeployProgress deployProgress;
    if (request.isLongRunning()) {
        int firstTargetInstances = deploy.getCanaryDeploySettings().isEnableCanaryDeploy() ? Math.min(deploy.getCanaryDeploySettings().getInstanceGroupSize(), request.getInstancesSafe()) : request.getInstancesSafe();
        deployProgress = SingularityDeployProgress.forNewDeploy(firstTargetInstances, deploy.getCanaryDeploySettings().isEnableCanaryDeploy());
    } else {
        deployProgress = SingularityDeployProgress.forNonLongRunning();
    }
    SingularityPendingDeploy pendingDeployObj = new SingularityPendingDeploy(deployMarker, DeployState.WAITING, deployProgress, updatedValidatedRequest);
    boolean deployToUnpause = false;
    if (requestWithState.getState() == RequestState.PAUSED && deployRequest.isUnpauseOnSuccessfulDeploy()) {
        deployToUnpause = true;
        requestManager.deployToUnpause(request, now, deployUser, deployRequest.getMessage());
    }
    AtomicBoolean deployAlreadyInProgress = new AtomicBoolean(deployManager.pendingDeployInProgress(requestId));
    // Short circuit outside lock so we don't wait too long
    if (!deployAlreadyInProgress.get()) {
        SingularityRequest updatedRequest = request;
        SingularityDeploy validatedDeploy = deploy;
        // This can cause a conflict if run outside the lock, causing the pending deploy to be checked before deploy data is saved
        schedulerLock.runWithRequestLock(() -> {
            deployManager.createDeployIfNotExists(updatedRequest, deployMarker, validatedDeploy);
            deployAlreadyInProgress.set(deployManager.createPendingDeploy(pendingDeployObj) == SingularityCreateResult.EXISTED);
            if (deployAlreadyInProgress.get()) {
                return;
            }
            deployManager.saveDeploy(updatedRequest, deployMarker, validatedDeploy);
        }, requestId, "submitNewDeploy");
    }
    if (deployAlreadyInProgress.get() && deployToUnpause) {
        requestManager.pause(request, now, deployUser, Optional.empty());
    }
    checkConflict(!deployAlreadyInProgress.get(), "Pending deploy already in progress for %s - cancel it or wait for it to complete (%s)", requestId, deployManager.getPendingDeploy(requestId).orElse(null));
    deployManager.saveDeploy(request, deployMarker, deploy);
    if (request.isDeployable() && !(requestWithState.getState() == RequestState.PAUSED && configuration.isAllowDeployOfPausedRequests())) {
        requestManager.addToPendingQueue(new SingularityPendingRequest(requestId, deployMarker.getDeployId(), now, deployUser, PendingType.NEW_DEPLOY, deployRequest.getDeploy().getSkipHealthchecksOnDeploy(), deployRequest.getMessage()));
    }
    return fillEntireRequest(requestWithState, Optional.of(request));
}
Also used : SingularityPendingRequest(com.hubspot.singularity.SingularityPendingRequest) SingularityDeployMarker(com.hubspot.singularity.SingularityDeployMarker) SingularityRequest(com.hubspot.singularity.SingularityRequest) SingularityDeployProgress(com.hubspot.singularity.SingularityDeployProgress) SingularityDeploy(com.hubspot.singularity.SingularityDeploy) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SingularityPendingDeploy(com.hubspot.singularity.SingularityPendingDeploy) SingularityRequestWithState(com.hubspot.singularity.SingularityRequestWithState)

Example 14 with SingularityPendingRequest

use of com.hubspot.singularity.SingularityPendingRequest in project Singularity by HubSpot.

the class RequestResource method scheduleImmediately.

public SingularityPendingRequestParent scheduleImmediately(SingularityUser user, String requestId, SingularityRunNowRequest runNowRequest, boolean minimalReturn) {
    final Optional<SingularityRunNowRequest> maybeRunNowRequest = Optional.ofNullable(runNowRequest);
    SingularityRequestWithState requestWithState = fetchRequestWithState(requestId, user);
    authorizationHelper.checkForAuthorization(requestWithState.getRequest(), user, SingularityAuthorizationScope.WRITE, SingularityUserFacingAction.EXEC);
    checkConflict(requestWithState.getState() != RequestState.PAUSED, "Request %s is paused. Unable to run now (it must be manually unpaused first)", requestWithState.getRequest().getId());
    // Check these to avoid unnecessary calls to taskManager
    int activeTasks = 0;
    int pendingTasks = 0;
    boolean isOneoffWithInstances = requestWithState.getRequest().isOneOff() && requestWithState.getRequest().getInstances().isPresent();
    if (requestWithState.getRequest().isScheduled() || isOneoffWithInstances) {
        activeTasks = taskManager.getActiveTaskIdsForRequest(requestId).size();
    }
    if (isOneoffWithInstances) {
        pendingTasks = taskManager.getPendingTaskIdsForRequest(requestId).size();
    }
    final SingularityPendingRequest pendingRequest = validator.checkRunNowRequest(getAndCheckDeployId(requestId), user.getEmail(), requestWithState.getRequest(), maybeRunNowRequest, activeTasks, pendingTasks);
    SingularityCreateResult result = requestManager.addToPendingQueue(pendingRequest);
    checkConflict(result != SingularityCreateResult.EXISTED, "%s is already pending, please try again soon", requestId);
    if (minimalReturn) {
        return SingularityPendingRequestParent.minimalFromRequestWithState(requestWithState, pendingRequest);
    } else {
        return SingularityPendingRequestParent.fromSingularityRequestParent(fillEntireRequest(requestWithState), pendingRequest);
    }
}
Also used : SingularityRunNowRequest(com.hubspot.singularity.api.SingularityRunNowRequest) SingularityPendingRequest(com.hubspot.singularity.SingularityPendingRequest) SingularityRequestWithState(com.hubspot.singularity.SingularityRequestWithState) SingularityCreateResult(com.hubspot.singularity.SingularityCreateResult)

Example 15 with SingularityPendingRequest

use of com.hubspot.singularity.SingularityPendingRequest in project Singularity by HubSpot.

the class SingularityValidator method checkRunNowRequest.

public SingularityPendingRequest checkRunNowRequest(String deployId, Optional<String> userEmail, SingularityRequest request, Optional<SingularityRunNowRequest> maybeRunNowRequest, Integer activeTasks, Integer pendingTasks) {
    SingularityRunNowRequest runNowRequest = fillRunNowRequest(maybeRunNowRequest);
    PendingType pendingType;
    if (request.isScheduled()) {
        pendingType = PendingType.IMMEDIATE;
        checkConflict(activeTasks == 0, "Cannot request immediate run of a scheduled job which is currently running (%s)", activeTasks);
    } else if (request.isOneOff()) {
        pendingType = PendingType.ONEOFF;
        if (request.getInstances().isPresent()) {
            checkRateLimited(activeTasks + pendingTasks < request.getInstances().get(), "No more than %s tasks allowed to run concurrently for request %s (%s active, %s pending)", request.getInstances().get(), request, activeTasks, pendingTasks);
        }
    } else {
        throw badRequest("Can not request an immediate run of a non-scheduled / always running request (%s)", request);
    }
    if (runNowRequest.getRunAt().isPresent() && runNowRequest.getRunAt().get() > (System.currentTimeMillis() + TimeUnit.DAYS.toMillis(maxRunNowTaskLaunchDelay))) {
        throw badRequest("Task launch delay can be at most %d days from now.", maxRunNowTaskLaunchDelay);
    }
    return new SingularityPendingRequest(request.getId(), deployId, System.currentTimeMillis(), userEmail, pendingType, runNowRequest.getCommandLineArgs(), Optional.of(getRunId(runNowRequest.getRunId())), runNowRequest.getSkipHealthchecks(), runNowRequest.getMessage(), Optional.empty(), runNowRequest.getResources(), runNowRequest.getS3UploaderAdditionalFiles(), runNowRequest.getRunAsUserOverride(), runNowRequest.getEnvOverrides(), runNowRequest.getRequiredAgentAttributeOverrides(), runNowRequest.getAllowedAgentAttributeOverrides(), runNowRequest.getExtraArtifacts(), runNowRequest.getRunAt());
}
Also used : SingularityRunNowRequest(com.hubspot.singularity.api.SingularityRunNowRequest) PendingType(com.hubspot.singularity.SingularityPendingRequest.PendingType) SingularityPendingRequest(com.hubspot.singularity.SingularityPendingRequest)

Aggregations

SingularityPendingRequest (com.hubspot.singularity.SingularityPendingRequest)53 Test (org.junit.jupiter.api.Test)18 SingularityRequest (com.hubspot.singularity.SingularityRequest)12 SingularityRequestWithState (com.hubspot.singularity.SingularityRequestWithState)11 SingularityTask (com.hubspot.singularity.SingularityTask)9 SingularityTaskId (com.hubspot.singularity.SingularityTaskId)9 SingularityCreateResult (com.hubspot.singularity.SingularityCreateResult)8 SingularityTaskCleanup (com.hubspot.singularity.SingularityTaskCleanup)8 SingularityRunNowRequest (com.hubspot.singularity.api.SingularityRunNowRequest)8 ArrayList (java.util.ArrayList)8 List (java.util.List)8 SingularityDeploy (com.hubspot.singularity.SingularityDeploy)7 SingularityPendingDeploy (com.hubspot.singularity.SingularityPendingDeploy)7 SingularityPendingTask (com.hubspot.singularity.SingularityPendingTask)7 SingularityRequestDeployState (com.hubspot.singularity.SingularityRequestDeployState)7 Resources (com.hubspot.mesos.Resources)6 SingularityDeployKey (com.hubspot.singularity.SingularityDeployKey)6 SingularityDeployProgress (com.hubspot.singularity.SingularityDeployProgress)6 PendingType (com.hubspot.singularity.SingularityPendingRequest.PendingType)6 SingularityRequestBuilder (com.hubspot.singularity.SingularityRequestBuilder)6