use of alluxio.exception.JobDoesNotExistException in project alluxio by Alluxio.
the class TaskExecutor method run.
@Override
public void run() {
JobConfig jobConfig = null;
Serializable taskArgs = null;
try {
jobConfig = (JobConfig) SerializationUtils.deserialize(mRunTaskCommand.getJobConfig().toByteArray());
if (mRunTaskCommand.hasTaskArgs()) {
taskArgs = SerializationUtils.deserialize(mRunTaskCommand.getTaskArgs().toByteArray());
}
} catch (IOException | ClassNotFoundException e) {
fail(e, jobConfig, null);
}
PlanDefinition<JobConfig, Serializable, Serializable> definition;
try {
definition = PlanDefinitionRegistry.INSTANCE.getJobDefinition(jobConfig);
} catch (JobDoesNotExistException e) {
LOG.error("The job definition for config {} does not exist.", jobConfig.getName());
fail(e, jobConfig, taskArgs);
return;
}
mTaskExecutorManager.notifyTaskRunning(mJobId, mTaskId);
Serializable result;
try {
result = definition.runTask(jobConfig, taskArgs, mContext);
} catch (InterruptedException e) {
// Cleanup around the interruption should already have been handled by a different thread
Thread.currentThread().interrupt();
return;
} catch (Throwable t) {
fail(t, jobConfig, taskArgs);
return;
}
mTaskExecutorManager.notifyTaskCompletion(mJobId, mTaskId, result);
}
use of alluxio.exception.JobDoesNotExistException in project alluxio by Alluxio.
the class PlanCoordinator method start.
private synchronized void start() throws JobDoesNotExistException {
// get the job definition
LOG.info("Starting job Id={} Config={}", mPlanInfo.getId(), mPlanInfo.getJobConfig());
PlanDefinition<JobConfig, ?, ?> definition;
try {
definition = PlanDefinitionRegistry.INSTANCE.getJobDefinition(mPlanInfo.getJobConfig());
} catch (JobDoesNotExistException e) {
LOG.info("Exception when getting jobDefinition from jobConfig: ", e);
mPlanInfo.setErrorType(ErrorUtils.getErrorType(e));
mPlanInfo.setErrorMessage(e.getMessage());
DistributedCmdMetrics.incrementForAllConfigsFailStatus(mPlanInfo.getJobConfig());
mPlanInfo.setStatus(Status.FAILED);
throw e;
}
SelectExecutorsContext context = new SelectExecutorsContext(mPlanInfo.getId(), mJobServerContext);
Set<? extends Pair<WorkerInfo, ?>> taskAddressToArgs;
ArrayList<WorkerInfo> workersInfoListCopy = Lists.newArrayList(mWorkersInfoList);
Collections.shuffle(workersInfoListCopy);
try {
taskAddressToArgs = definition.selectExecutors(mPlanInfo.getJobConfig(), workersInfoListCopy, context);
} catch (Exception e) {
LOG.warn("Failed to select executor. {})", e.toString());
LOG.info("Exception: ", e);
setJobAsFailed(ErrorUtils.getErrorType(e), e.getMessage());
return;
}
if (taskAddressToArgs.isEmpty()) {
LOG.warn("No executor was selected.");
updateStatus();
}
for (Pair<WorkerInfo, ?> pair : taskAddressToArgs) {
LOG.debug("Selected executor {} with parameters {}.", pair.getFirst(), pair.getSecond());
int taskId = mTaskIdToWorkerInfo.size();
// create task
mPlanInfo.addTask(taskId, pair.getFirst(), pair.getSecond());
// submit commands
JobConfig config;
if (mPlanInfo.getJobConfig() instanceof BatchedJobConfig) {
BatchedJobConfig planConfig = (BatchedJobConfig) mPlanInfo.getJobConfig();
config = new BatchedJobConfig(planConfig.getJobType(), new HashSet<>());
} else {
config = mPlanInfo.getJobConfig();
}
mCommandManager.submitRunTaskCommand(mPlanInfo.getId(), taskId, config, pair.getSecond(), pair.getFirst().getId());
mTaskIdToWorkerInfo.put((long) taskId, pair.getFirst());
mWorkerIdToTaskIds.putIfAbsent(pair.getFirst().getId(), Lists.newArrayList());
mWorkerIdToTaskIds.get(pair.getFirst().getId()).add((long) taskId);
}
}
use of alluxio.exception.JobDoesNotExistException in project alluxio by Alluxio.
the class WorkflowTracker method getStatus.
/**
* Gets information of the given job id.
*
* @param jobId the id of the job
* @param verbose whether the output should be verbose
* @return null if the job id isn't know by the workflow tracker. WorkflowInfo otherwise
*/
public WorkflowInfo getStatus(long jobId, boolean verbose) {
WorkflowExecution workflowExecution = mWorkflows.get(jobId);
if (workflowExecution == null) {
return null;
}
ArrayList<Long> children = Lists.newArrayList(mChildren.get(jobId).iterator());
Collections.sort(children);
List<JobInfo> jobInfos = Lists.newArrayList();
if (verbose) {
for (long child : children) {
try {
jobInfos.add(mJobMaster.getStatus(child));
} catch (JobDoesNotExistException e) {
LOG.info(String.format("No job info on child job id %s. Skipping", child));
}
}
}
WorkflowInfo workflowInfo = new WorkflowInfo(jobId, workflowExecution.getName(), workflowExecution.getStatus(), workflowExecution.getLastUpdated(), workflowExecution.getErrorType(), workflowExecution.getErrorMessage(), jobInfos);
return workflowInfo;
}
use of alluxio.exception.JobDoesNotExistException in project alluxio by Alluxio.
the class JobMaster method cancel.
/**
* Cancels a job.
*
* @param jobId the id of the job
* @throws JobDoesNotExistException when the job does not exist
*/
public void cancel(long jobId) throws JobDoesNotExistException {
try (JobMasterAuditContext auditContext = createAuditContext("cancel")) {
auditContext.setJobId(jobId);
PlanCoordinator planCoordinator = mPlanTracker.getCoordinator(jobId);
if (planCoordinator == null) {
if (!mWorkflowTracker.cancel(jobId)) {
throw new JobDoesNotExistException(ExceptionMessage.JOB_DOES_NOT_EXIST.getMessage(jobId));
}
return;
}
planCoordinator.cancel();
auditContext.setSucceeded(true);
}
}
use of alluxio.exception.JobDoesNotExistException in project alluxio by Alluxio.
the class ReplicationChecker method check.
private Set<Long> check(Set<Long> inodes, ReplicationHandler handler, Mode mode) throws InterruptedException {
Set<Long> processedFileIds = new HashSet<>();
for (long inodeId : inodes) {
if (mActiveJobToInodeID.size() >= mMaxActiveJobs) {
return processedFileIds;
}
if (mActiveJobToInodeID.containsValue(inodeId)) {
continue;
}
Set<Triple<AlluxioURI, Long, Integer>> requests = new HashSet<>();
// Throw if interrupted.
if (Thread.interrupted()) {
throw new InterruptedException("ReplicationChecker interrupted.");
}
// locking the entire path but just the inode file since this access is read-only.
try (LockedInodePath inodePath = mInodeTree.lockFullInodePath(inodeId, LockPattern.READ)) {
InodeFile file = inodePath.getInodeFile();
for (long blockId : file.getBlockIds()) {
BlockInfo blockInfo = null;
try {
blockInfo = mBlockMaster.getBlockInfo(blockId);
} catch (BlockInfoException e) {
// Cannot find this block in Alluxio from BlockMaster, possibly persisted in UFS
} catch (UnavailableException e) {
// The block master is not available, wait for the next heartbeat
LOG.warn("The block master is not available: {}", e.toString());
return processedFileIds;
}
int currentReplicas = (blockInfo == null) ? 0 : blockInfo.getLocations().size();
switch(mode) {
case EVICT:
int maxReplicas = file.getReplicationMax();
if (file.getPersistenceState() == PersistenceState.TO_BE_PERSISTED && file.getReplicationDurable() > maxReplicas) {
maxReplicas = file.getReplicationDurable();
}
if (currentReplicas > maxReplicas) {
requests.add(new ImmutableTriple<>(inodePath.getUri(), blockId, currentReplicas - maxReplicas));
}
break;
case REPLICATE:
int minReplicas = file.getReplicationMin();
if (file.getPersistenceState() == PersistenceState.TO_BE_PERSISTED && file.getReplicationDurable() > minReplicas) {
minReplicas = file.getReplicationDurable();
}
if (currentReplicas < minReplicas) {
// if this file is not persisted and block master thinks it is lost, no effort made
if (!file.isPersisted() && mBlockMaster.isBlockLost(blockId)) {
continue;
}
requests.add(new ImmutableTriple<>(inodePath.getUri(), blockId, minReplicas - currentReplicas));
}
break;
default:
LOG.warn("Unexpected replication mode {}.", mode);
}
}
} catch (FileDoesNotExistException e) {
LOG.warn("Failed to check replication level for inode id {} : {}", inodeId, e.toString());
}
for (Triple<AlluxioURI, Long, Integer> entry : requests) {
AlluxioURI uri = entry.getLeft();
long blockId = entry.getMiddle();
int numReplicas = entry.getRight();
try {
long jobId;
switch(mode) {
case EVICT:
jobId = handler.evict(uri, blockId, numReplicas);
break;
case REPLICATE:
jobId = handler.replicate(uri, blockId, numReplicas);
break;
default:
throw new RuntimeException(String.format("Unexpected replication mode {}.", mode));
}
processedFileIds.add(inodeId);
mActiveJobToInodeID.put(jobId, inodeId);
} catch (JobDoesNotExistException | ResourceExhaustedException e) {
LOG.warn("The job service is busy, will retry later. {}", e.toString());
return processedFileIds;
} catch (UnavailableException e) {
LOG.warn("Unable to complete the replication check: {}, will retry later.", e.toString());
return processedFileIds;
} catch (Exception e) {
SAMPLING_LOG.warn("Unexpected exception encountered when starting a {} job (uri={}," + " block ID={}, num replicas={}) : {}", mode, uri, blockId, numReplicas, e.toString());
LOG.debug("Job service unexpected exception: ", e);
}
}
}
return processedFileIds;
}
Aggregations