Search in sources :

Example 1 with JobDoesNotExistException

use of alluxio.exception.JobDoesNotExistException in project alluxio by Alluxio.

the class TaskExecutor method run.

@Override
public void run() {
    JobConfig jobConfig = null;
    Serializable taskArgs = null;
    try {
        jobConfig = (JobConfig) SerializationUtils.deserialize(mRunTaskCommand.getJobConfig().toByteArray());
        if (mRunTaskCommand.hasTaskArgs()) {
            taskArgs = SerializationUtils.deserialize(mRunTaskCommand.getTaskArgs().toByteArray());
        }
    } catch (IOException | ClassNotFoundException e) {
        fail(e, jobConfig, null);
    }
    PlanDefinition<JobConfig, Serializable, Serializable> definition;
    try {
        definition = PlanDefinitionRegistry.INSTANCE.getJobDefinition(jobConfig);
    } catch (JobDoesNotExistException e) {
        LOG.error("The job definition for config {} does not exist.", jobConfig.getName());
        fail(e, jobConfig, taskArgs);
        return;
    }
    mTaskExecutorManager.notifyTaskRunning(mJobId, mTaskId);
    Serializable result;
    try {
        result = definition.runTask(jobConfig, taskArgs, mContext);
    } catch (InterruptedException e) {
        // Cleanup around the interruption should already have been handled by a different thread
        Thread.currentThread().interrupt();
        return;
    } catch (Throwable t) {
        fail(t, jobConfig, taskArgs);
        return;
    }
    mTaskExecutorManager.notifyTaskCompletion(mJobId, mTaskId, result);
}
Also used : Serializable(java.io.Serializable) JobDoesNotExistException(alluxio.exception.JobDoesNotExistException) IOException(java.io.IOException) JobConfig(alluxio.job.JobConfig)

Example 2 with JobDoesNotExistException

use of alluxio.exception.JobDoesNotExistException in project alluxio by Alluxio.

the class PlanCoordinator method start.

private synchronized void start() throws JobDoesNotExistException {
    // get the job definition
    LOG.info("Starting job Id={} Config={}", mPlanInfo.getId(), mPlanInfo.getJobConfig());
    PlanDefinition<JobConfig, ?, ?> definition;
    try {
        definition = PlanDefinitionRegistry.INSTANCE.getJobDefinition(mPlanInfo.getJobConfig());
    } catch (JobDoesNotExistException e) {
        LOG.info("Exception when getting jobDefinition from jobConfig: ", e);
        mPlanInfo.setErrorType(ErrorUtils.getErrorType(e));
        mPlanInfo.setErrorMessage(e.getMessage());
        DistributedCmdMetrics.incrementForAllConfigsFailStatus(mPlanInfo.getJobConfig());
        mPlanInfo.setStatus(Status.FAILED);
        throw e;
    }
    SelectExecutorsContext context = new SelectExecutorsContext(mPlanInfo.getId(), mJobServerContext);
    Set<? extends Pair<WorkerInfo, ?>> taskAddressToArgs;
    ArrayList<WorkerInfo> workersInfoListCopy = Lists.newArrayList(mWorkersInfoList);
    Collections.shuffle(workersInfoListCopy);
    try {
        taskAddressToArgs = definition.selectExecutors(mPlanInfo.getJobConfig(), workersInfoListCopy, context);
    } catch (Exception e) {
        LOG.warn("Failed to select executor. {})", e.toString());
        LOG.info("Exception: ", e);
        setJobAsFailed(ErrorUtils.getErrorType(e), e.getMessage());
        return;
    }
    if (taskAddressToArgs.isEmpty()) {
        LOG.warn("No executor was selected.");
        updateStatus();
    }
    for (Pair<WorkerInfo, ?> pair : taskAddressToArgs) {
        LOG.debug("Selected executor {} with parameters {}.", pair.getFirst(), pair.getSecond());
        int taskId = mTaskIdToWorkerInfo.size();
        // create task
        mPlanInfo.addTask(taskId, pair.getFirst(), pair.getSecond());
        // submit commands
        JobConfig config;
        if (mPlanInfo.getJobConfig() instanceof BatchedJobConfig) {
            BatchedJobConfig planConfig = (BatchedJobConfig) mPlanInfo.getJobConfig();
            config = new BatchedJobConfig(planConfig.getJobType(), new HashSet<>());
        } else {
            config = mPlanInfo.getJobConfig();
        }
        mCommandManager.submitRunTaskCommand(mPlanInfo.getId(), taskId, config, pair.getSecond(), pair.getFirst().getId());
        mTaskIdToWorkerInfo.put((long) taskId, pair.getFirst());
        mWorkerIdToTaskIds.putIfAbsent(pair.getFirst().getId(), Lists.newArrayList());
        mWorkerIdToTaskIds.get(pair.getFirst().getId()).add((long) taskId);
    }
}
Also used : JobDoesNotExistException(alluxio.exception.JobDoesNotExistException) BatchedJobConfig(alluxio.job.plan.BatchedJobConfig) WorkerInfo(alluxio.wire.WorkerInfo) SelectExecutorsContext(alluxio.job.SelectExecutorsContext) BatchedJobConfig(alluxio.job.plan.BatchedJobConfig) JobConfig(alluxio.job.JobConfig) JobDoesNotExistException(alluxio.exception.JobDoesNotExistException) HashSet(java.util.HashSet)

Example 3 with JobDoesNotExistException

use of alluxio.exception.JobDoesNotExistException in project alluxio by Alluxio.

the class WorkflowTracker method getStatus.

/**
 * Gets information of the given job id.
 *
 * @param jobId the id of the job
 * @param verbose whether the output should be verbose
 * @return null if the job id isn't know by the workflow tracker. WorkflowInfo otherwise
 */
public WorkflowInfo getStatus(long jobId, boolean verbose) {
    WorkflowExecution workflowExecution = mWorkflows.get(jobId);
    if (workflowExecution == null) {
        return null;
    }
    ArrayList<Long> children = Lists.newArrayList(mChildren.get(jobId).iterator());
    Collections.sort(children);
    List<JobInfo> jobInfos = Lists.newArrayList();
    if (verbose) {
        for (long child : children) {
            try {
                jobInfos.add(mJobMaster.getStatus(child));
            } catch (JobDoesNotExistException e) {
                LOG.info(String.format("No job info on child job id %s. Skipping", child));
            }
        }
    }
    WorkflowInfo workflowInfo = new WorkflowInfo(jobId, workflowExecution.getName(), workflowExecution.getStatus(), workflowExecution.getLastUpdated(), workflowExecution.getErrorType(), workflowExecution.getErrorMessage(), jobInfos);
    return workflowInfo;
}
Also used : JobDoesNotExistException(alluxio.exception.JobDoesNotExistException) JobInfo(alluxio.job.wire.JobInfo) WorkflowInfo(alluxio.job.wire.WorkflowInfo) WorkflowExecution(alluxio.job.workflow.WorkflowExecution)

Example 4 with JobDoesNotExistException

use of alluxio.exception.JobDoesNotExistException in project alluxio by Alluxio.

the class JobMaster method cancel.

/**
 * Cancels a job.
 *
 * @param jobId the id of the job
 * @throws JobDoesNotExistException when the job does not exist
 */
public void cancel(long jobId) throws JobDoesNotExistException {
    try (JobMasterAuditContext auditContext = createAuditContext("cancel")) {
        auditContext.setJobId(jobId);
        PlanCoordinator planCoordinator = mPlanTracker.getCoordinator(jobId);
        if (planCoordinator == null) {
            if (!mWorkflowTracker.cancel(jobId)) {
                throw new JobDoesNotExistException(ExceptionMessage.JOB_DOES_NOT_EXIST.getMessage(jobId));
            }
            return;
        }
        planCoordinator.cancel();
        auditContext.setSucceeded(true);
    }
}
Also used : JobDoesNotExistException(alluxio.exception.JobDoesNotExistException) PlanCoordinator(alluxio.master.job.plan.PlanCoordinator)

Example 5 with JobDoesNotExistException

use of alluxio.exception.JobDoesNotExistException in project alluxio by Alluxio.

the class ReplicationChecker method check.

private Set<Long> check(Set<Long> inodes, ReplicationHandler handler, Mode mode) throws InterruptedException {
    Set<Long> processedFileIds = new HashSet<>();
    for (long inodeId : inodes) {
        if (mActiveJobToInodeID.size() >= mMaxActiveJobs) {
            return processedFileIds;
        }
        if (mActiveJobToInodeID.containsValue(inodeId)) {
            continue;
        }
        Set<Triple<AlluxioURI, Long, Integer>> requests = new HashSet<>();
        // Throw if interrupted.
        if (Thread.interrupted()) {
            throw new InterruptedException("ReplicationChecker interrupted.");
        }
        // locking the entire path but just the inode file since this access is read-only.
        try (LockedInodePath inodePath = mInodeTree.lockFullInodePath(inodeId, LockPattern.READ)) {
            InodeFile file = inodePath.getInodeFile();
            for (long blockId : file.getBlockIds()) {
                BlockInfo blockInfo = null;
                try {
                    blockInfo = mBlockMaster.getBlockInfo(blockId);
                } catch (BlockInfoException e) {
                // Cannot find this block in Alluxio from BlockMaster, possibly persisted in UFS
                } catch (UnavailableException e) {
                    // The block master is not available, wait for the next heartbeat
                    LOG.warn("The block master is not available: {}", e.toString());
                    return processedFileIds;
                }
                int currentReplicas = (blockInfo == null) ? 0 : blockInfo.getLocations().size();
                switch(mode) {
                    case EVICT:
                        int maxReplicas = file.getReplicationMax();
                        if (file.getPersistenceState() == PersistenceState.TO_BE_PERSISTED && file.getReplicationDurable() > maxReplicas) {
                            maxReplicas = file.getReplicationDurable();
                        }
                        if (currentReplicas > maxReplicas) {
                            requests.add(new ImmutableTriple<>(inodePath.getUri(), blockId, currentReplicas - maxReplicas));
                        }
                        break;
                    case REPLICATE:
                        int minReplicas = file.getReplicationMin();
                        if (file.getPersistenceState() == PersistenceState.TO_BE_PERSISTED && file.getReplicationDurable() > minReplicas) {
                            minReplicas = file.getReplicationDurable();
                        }
                        if (currentReplicas < minReplicas) {
                            // if this file is not persisted and block master thinks it is lost, no effort made
                            if (!file.isPersisted() && mBlockMaster.isBlockLost(blockId)) {
                                continue;
                            }
                            requests.add(new ImmutableTriple<>(inodePath.getUri(), blockId, minReplicas - currentReplicas));
                        }
                        break;
                    default:
                        LOG.warn("Unexpected replication mode {}.", mode);
                }
            }
        } catch (FileDoesNotExistException e) {
            LOG.warn("Failed to check replication level for inode id {} : {}", inodeId, e.toString());
        }
        for (Triple<AlluxioURI, Long, Integer> entry : requests) {
            AlluxioURI uri = entry.getLeft();
            long blockId = entry.getMiddle();
            int numReplicas = entry.getRight();
            try {
                long jobId;
                switch(mode) {
                    case EVICT:
                        jobId = handler.evict(uri, blockId, numReplicas);
                        break;
                    case REPLICATE:
                        jobId = handler.replicate(uri, blockId, numReplicas);
                        break;
                    default:
                        throw new RuntimeException(String.format("Unexpected replication mode {}.", mode));
                }
                processedFileIds.add(inodeId);
                mActiveJobToInodeID.put(jobId, inodeId);
            } catch (JobDoesNotExistException | ResourceExhaustedException e) {
                LOG.warn("The job service is busy, will retry later. {}", e.toString());
                return processedFileIds;
            } catch (UnavailableException e) {
                LOG.warn("Unable to complete the replication check: {}, will retry later.", e.toString());
                return processedFileIds;
            } catch (Exception e) {
                SAMPLING_LOG.warn("Unexpected exception encountered when starting a {} job (uri={}," + " block ID={}, num replicas={}) : {}", mode, uri, blockId, numReplicas, e.toString());
                LOG.debug("Job service unexpected exception: ", e);
            }
        }
    }
    return processedFileIds;
}
Also used : FileDoesNotExistException(alluxio.exception.FileDoesNotExistException) JobDoesNotExistException(alluxio.exception.JobDoesNotExistException) UnavailableException(alluxio.exception.status.UnavailableException) BlockInfoException(alluxio.exception.BlockInfoException) InodeFile(alluxio.master.file.meta.InodeFile) JobDoesNotExistException(alluxio.exception.JobDoesNotExistException) ResourceExhaustedException(alluxio.exception.status.ResourceExhaustedException) BlockInfoException(alluxio.exception.BlockInfoException) IOException(java.io.IOException) FileDoesNotExistException(alluxio.exception.FileDoesNotExistException) UnavailableException(alluxio.exception.status.UnavailableException) Triple(org.apache.commons.lang3.tuple.Triple) ImmutableTriple(org.apache.commons.lang3.tuple.ImmutableTriple) LockedInodePath(alluxio.master.file.meta.LockedInodePath) ResourceExhaustedException(alluxio.exception.status.ResourceExhaustedException) BlockInfo(alluxio.wire.BlockInfo) HashSet(java.util.HashSet) AlluxioURI(alluxio.AlluxioURI)

Aggregations

JobDoesNotExistException (alluxio.exception.JobDoesNotExistException)9 JobConfig (alluxio.job.JobConfig)3 JobInfo (alluxio.job.wire.JobInfo)3 ResourceExhaustedException (alluxio.exception.status.ResourceExhaustedException)2 WorkflowExecution (alluxio.job.workflow.WorkflowExecution)2 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 AlluxioURI (alluxio.AlluxioURI)1 FileSystemContext (alluxio.client.file.FileSystemContext)1 ConcurrentHashSet (alluxio.collections.ConcurrentHashSet)1 BlockInfoException (alluxio.exception.BlockInfoException)1 FileDoesNotExistException (alluxio.exception.FileDoesNotExistException)1 UnavailableException (alluxio.exception.status.UnavailableException)1 HeartbeatContext (alluxio.heartbeat.HeartbeatContext)1 JobServerContext (alluxio.job.JobServerContext)1 SelectExecutorsContext (alluxio.job.SelectExecutorsContext)1 BatchedJobConfig (alluxio.job.plan.BatchedJobConfig)1 PlanConfig (alluxio.job.plan.PlanConfig)1 WorkflowInfo (alluxio.job.wire.WorkflowInfo)1