Search in sources :

Example 6 with Status

use of alluxio.job.wire.Status in project alluxio by Alluxio.

the class PlanInfo method setStatus.

/**
 * Sets the status of a job.
 *
 * A job can only move from one status to another if the job hasn't already finished. If a job
 * is finished and the caller tries to change the status, this method is a no-op.
 *
 * @param status the job status
 */
public void setStatus(Status status) {
    synchronized (this) {
        // this is synchronized to serialize all setStatus calls.
        if (mStatus.isFinished()) {
            return;
        }
        Status oldStatus = mStatus;
        mStatus = status;
        if (status != oldStatus) {
            // status changed
            if (status.isFinished()) {
                if (status.equals(Status.COMPLETED)) {
                    // for completed jobs
                    LOG.debug("Job completed, Id={} Config={}", oldStatus.name(), status.name(), getId(), getJobConfig());
                } else {
                    // for failed and cancelled jobs
                    LOG.info("Job status changed from {} to {}, Id={} Config={} Error={}", oldStatus.name(), status.name(), getId(), getJobConfig(), getErrorMessage());
                }
            }
            if (status.equals(Status.FAILED) && (getErrorType().isEmpty() || getErrorMessage().isEmpty())) {
                LOG.warn("Job set to failed without given an error type or message, Id={} Config={}", getId(), getJobConfig());
            }
            mLastStatusChangeMs = CommonUtils.getCurrentMs();
            if (mStatusChangeCallback != null) {
                mStatusChangeCallback.accept(this);
            }
            Metrics.counter(oldStatus).dec();
            Metrics.counter(status).inc();
        }
    }
}
Also used : Status(alluxio.job.wire.Status)

Example 7 with Status

use of alluxio.job.wire.Status in project alluxio by Alluxio.

the class StressJobServiceBench method runNoop.

private void runNoop() throws IOException, InterruptedException, TimeoutException {
    long jobId = mJobMasterClient.run(new NoopPlanConfig());
    // TODO(jianjian): refactor JobTestUtils
    ImmutableSet<Status> statuses = ImmutableSet.of(Status.COMPLETED, Status.CANCELED, Status.FAILED);
    final AtomicReference<JobInfo> singleton = new AtomicReference<>();
    CommonUtils.waitFor(String.format("job %d to be one of status %s", jobId, Arrays.toString(statuses.toArray())), () -> {
        JobInfo info;
        try {
            info = mJobMasterClient.getJobStatus(jobId);
            if (statuses.contains(info.getStatus())) {
                singleton.set(info);
            }
            return statuses.contains(info.getStatus());
        } catch (IOException e) {
            throw Throwables.propagate(e);
        }
    }, WaitForOptions.defaults().setTimeoutMs(30 * Constants.SECOND_MS));
    JobInfo jobInfo = singleton.get();
    if (jobInfo.getStatus().equals(Status.FAILED)) {
        throw new IOException(jobInfo.getErrorMessage());
    }
}
Also used : Status(alluxio.job.wire.Status) URIStatus(alluxio.client.file.URIStatus) NoopPlanConfig(alluxio.job.plan.NoopPlanConfig) JobInfo(alluxio.job.wire.JobInfo) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException)

Example 8 with Status

use of alluxio.job.wire.Status in project alluxio by Alluxio.

the class PlanCoordinator method updateStatus.

/**
 * Updates the status of the job. When all the tasks are completed, run the join method in the
 * definition.
 */
private synchronized void updateStatus() {
    int completed = 0;
    List<TaskInfo> taskInfoList = mPlanInfo.getTaskInfoList();
    JobConfig config = mPlanInfo.getJobConfig();
    Preconditions.checkNotNull(config);
    FileSystem fileSystem = mJobServerContext.getFileSystem();
    for (TaskInfo info : taskInfoList) {
        Status status = info.getStatus();
        switch(status) {
            case FAILED:
                setJobAsFailed(info.getErrorType(), "Task execution failed: " + info.getErrorMessage());
                return;
            case CANCELED:
                if (mPlanInfo.getStatus() != Status.FAILED) {
                    mPlanInfo.setStatus(Status.CANCELED);
                    DistributedCmdMetrics.incrementForAllConfigsCancelStatus(config);
                }
                return;
            case RUNNING:
                if (mPlanInfo.getStatus() != Status.FAILED && mPlanInfo.getStatus() != Status.CANCELED) {
                    mPlanInfo.setStatus(Status.RUNNING);
                }
                break;
            case COMPLETED:
                completed++;
                break;
            case CREATED:
                // do nothing
                break;
            default:
                throw new IllegalArgumentException("Unsupported status " + info.getStatus());
        }
    }
    if (completed == taskInfoList.size()) {
        if (mPlanInfo.getStatus() == Status.COMPLETED) {
            return;
        }
        // all the tasks completed, run join
        try {
            // Try to join first, so that in case of failure we don't move to a completed state yet
            mPlanInfo.setResult(join(taskInfoList));
            mPlanInfo.setStatus(Status.COMPLETED);
            // Increment the counter for Complete status when all the tasks in a job are completed.
            DistributedCmdMetrics.incrementForAllConfigsCompleteStatus(config, fileSystem, new CountingRetry(5));
        } catch (Exception e) {
            LOG.warn("Job error when joining tasks Job Id={} Config={}", mPlanInfo.getId(), mPlanInfo.getJobConfig(), e);
            setJobAsFailed(ErrorUtils.getErrorType(e), e.getMessage());
        }
    }
}
Also used : TaskInfo(alluxio.job.wire.TaskInfo) Status(alluxio.job.wire.Status) CountingRetry(alluxio.retry.CountingRetry) FileSystem(alluxio.client.file.FileSystem) BatchedJobConfig(alluxio.job.plan.BatchedJobConfig) JobConfig(alluxio.job.JobConfig) JobDoesNotExistException(alluxio.exception.JobDoesNotExistException)

Aggregations

Status (alluxio.job.wire.Status)8 IOException (java.io.IOException)3 Constants (alluxio.Constants)2 FileSystem (alluxio.client.file.FileSystem)2 URIStatus (alluxio.client.file.URIStatus)2 PropertyKey (alluxio.conf.PropertyKey)2 JobInfo (alluxio.job.wire.JobInfo)2 TaskInfo (alluxio.job.wire.TaskInfo)2 LocalAlluxioJobCluster (alluxio.master.LocalAlluxioJobCluster)2 LocalAlluxioClusterResource (alluxio.testutils.LocalAlluxioClusterResource)2 Assert.assertEquals (org.junit.Assert.assertEquals)2 Assert.assertFalse (org.junit.Assert.assertFalse)2 Assert.assertTrue (org.junit.Assert.assertTrue)2 Before (org.junit.Before)2 ClassRule (org.junit.ClassRule)2 Rule (org.junit.Rule)2 Test (org.junit.Test)2 TestRule (org.junit.rules.TestRule)2 AlluxioURI (alluxio.AlluxioURI)1 FileSystemShell (alluxio.cli.fs.FileSystemShell)1