Search in sources :

Example 6 with TaskInfo

use of alluxio.job.wire.TaskInfo in project alluxio by Alluxio.

the class PlanInfo method addTask.

/**
 * Registers a task.
 *
 * @param taskId the task id
 * @param workerInfo the worker info
 * @param args the arguments
 */
public void addTask(long taskId, WorkerInfo workerInfo, Object args) {
    TaskInfo oldValue = mTaskIdToInfo.putIfAbsent(taskId, new TaskInfo(mId, taskId, Status.CREATED, workerInfo.getAddress(), args));
    // the task is expected to not exist in the map.
    Preconditions.checkState(oldValue == null, String.format("JobId %d cannot add duplicate taskId %d", mId, taskId));
}
Also used : TaskInfo(alluxio.job.wire.TaskInfo)

Example 7 with TaskInfo

use of alluxio.job.wire.TaskInfo in project alluxio by Alluxio.

the class CommandHandlingExecutor method heartbeat.

@Override
public void heartbeat() {
    mHealthReporter.compute();
    if (mHealthReporter.isHealthy()) {
        mTaskExecutorManager.unthrottle();
    } else {
        mTaskExecutorManager.throttle();
    }
    JobWorkerHealth jobWorkerHealth = new JobWorkerHealth(JobWorkerIdRegistry.getWorkerId(), mHealthReporter.getCpuLoadAverage(), mTaskExecutorManager.getTaskExecutorPoolSize(), mTaskExecutorManager.getNumActiveTasks(), mTaskExecutorManager.unfinishedTasks(), mWorkerNetAddress.getHost());
    List<TaskInfo> taskStatusList = mTaskExecutorManager.getAndClearTaskUpdates();
    List<alluxio.grpc.JobCommand> commands;
    List<JobInfo> taskProtoList = taskStatusList.stream().map(TaskInfo::toProto).collect(Collectors.toList());
    try {
        commands = mMasterClient.heartbeat(jobWorkerHealth, taskProtoList);
    } catch (AlluxioException | IOException e) {
        // Restore the task updates so that they can be accessed in the next heartbeat.
        mTaskExecutorManager.restoreTaskUpdates(taskStatusList);
        // TODO(yupeng) better error handling
        LOG.error("Failed to heartbeat", e);
        return;
    }
    for (JobCommand command : commands) {
        mCommandHandlingService.execute(new CommandHandler(command));
    }
}
Also used : TaskInfo(alluxio.job.wire.TaskInfo) JobInfo(alluxio.grpc.JobInfo) JobCommand(alluxio.grpc.JobCommand) JobWorkerHealth(alluxio.job.wire.JobWorkerHealth) IOException(java.io.IOException) AlluxioException(alluxio.exception.AlluxioException)

Example 8 with TaskInfo

use of alluxio.job.wire.TaskInfo in project alluxio by Alluxio.

the class TaskExecutorManager method notifyTaskCompletion.

/**
 * Notifies the completion of the task.
 *
 * @param jobId the job id
 * @param taskId the task id
 * @param result the task execution result
 */
public synchronized void notifyTaskCompletion(long jobId, long taskId, Serializable result) {
    Pair<Long, Long> id = new Pair<>(jobId, taskId);
    TaskInfo taskInfo = mUnfinishedTasks.get(id);
    taskInfo.setStatus(Status.COMPLETED);
    taskInfo.setResult(result);
    finishTask(id);
    LOG.info("Task {} for job {} completed.", taskId, jobId);
}
Also used : TaskInfo(alluxio.job.wire.TaskInfo) Pair(alluxio.collections.Pair)

Example 9 with TaskInfo

use of alluxio.job.wire.TaskInfo in project alluxio by Alluxio.

the class TaskExecutorManager method notifyTaskRunning.

/**
 * Noitfy the start of the task.
 *
 * @param jobId the job id
 * @param taskId the task id
 */
public synchronized void notifyTaskRunning(long jobId, long taskId) {
    Pair<Long, Long> id = new Pair<>(jobId, taskId);
    TaskInfo taskInfo = mUnfinishedTasks.get(id);
    taskInfo.setStatus(Status.RUNNING);
    LOG.info("Task {} for job {} started", taskId, jobId);
}
Also used : TaskInfo(alluxio.job.wire.TaskInfo) Pair(alluxio.collections.Pair)

Example 10 with TaskInfo

use of alluxio.job.wire.TaskInfo in project alluxio by Alluxio.

the class TaskExecutorManager method notifyTaskFailure.

/**
 * Notifies the failure of the task.
 *
 * @param jobId the job id
 * @param taskId the task id
 * @param t the thrown exception
 */
public synchronized void notifyTaskFailure(long jobId, long taskId, Throwable t) {
    Pair<Long, Long> id = new Pair<>(jobId, taskId);
    TaskInfo taskInfo = mUnfinishedTasks.get(id);
    taskInfo.setStatus(Status.FAILED);
    String errorMessage;
    if (ServerConfiguration.getBoolean(PropertyKey.DEBUG)) {
        errorMessage = Throwables.getStackTraceAsString(t);
    } else {
        errorMessage = t.getMessage();
    }
    taskInfo.setErrorType(ErrorUtils.getErrorType(t));
    if (errorMessage != null) {
        taskInfo.setErrorMessage(errorMessage);
    }
    finishTask(id);
    LOG.info("Task {} for job {} failed: {}", taskId, jobId, errorMessage);
    SAMPLING_LOGGER.info("Stack trace for taskId: {} jobId: {} : {}", taskId, jobId, Throwables.getStackTraceAsString(t));
}
Also used : TaskInfo(alluxio.job.wire.TaskInfo) Pair(alluxio.collections.Pair)

Aggregations

TaskInfo (alluxio.job.wire.TaskInfo)15 Pair (alluxio.collections.Pair)5 JobConfig (alluxio.job.JobConfig)2 BatchedJobConfig (alluxio.job.plan.BatchedJobConfig)2 Status (alluxio.job.wire.Status)2 ArrayList (java.util.ArrayList)2 FileSystem (alluxio.client.file.FileSystem)1 AlluxioException (alluxio.exception.AlluxioException)1 JobDoesNotExistException (alluxio.exception.JobDoesNotExistException)1 JobCommand (alluxio.grpc.JobCommand)1 JobInfo (alluxio.grpc.JobInfo)1 MasterWorkerInfo (alluxio.job.MasterWorkerInfo)1 JobInfo (alluxio.job.wire.JobInfo)1 JobWorkerHealth (alluxio.job.wire.JobWorkerHealth)1 PlanCoordinator (alluxio.master.job.plan.PlanCoordinator)1 LockResource (alluxio.resource.LockResource)1 CountingRetry (alluxio.retry.CountingRetry)1 WorkerInfo (alluxio.wire.WorkerInfo)1 IOException (java.io.IOException)1 Serializable (java.io.Serializable)1