use of alluxio.exception.status.ResourceExhaustedException in project alluxio by Alluxio.
the class WorkflowTrackerTest method testCleanup.
@Test
public void testCleanup() throws Exception {
SleepJobConfig jobConfig = new SleepJobConfig(1);
mPlanTracker.run(jobConfig, mCommandManager, mMockJobServerContext, mWorkers, 1);
jobConfig = new SleepJobConfig(1);
mPlanTracker.run(jobConfig, mCommandManager, mMockJobServerContext, mWorkers, 2);
jobConfig = new SleepJobConfig(1);
mPlanTracker.run(jobConfig, mCommandManager, mMockJobServerContext, mWorkers, 3);
doAnswer(invocation -> {
PlanConfig config = invocation.getArgument(0, PlanConfig.class);
long jobId = invocation.getArgument(1, Long.class);
mPlanTracker.run(config, mCommandManager, mMockJobServerContext, mWorkers, jobId);
return null;
}).when(mMockJobMaster).run(any(PlanConfig.class), any(Long.class));
ArrayList<JobConfig> jobs = Lists.newArrayList();
SleepJobConfig child1 = new SleepJobConfig(1);
SleepJobConfig child2 = new SleepJobConfig(2);
jobs.add(child1);
jobs.add(child2);
CompositeConfig config = new CompositeConfig(jobs, false);
mWorkflowTracker.run(config, 0);
try {
mPlanTracker.run(new SleepJobConfig(1), mCommandManager, mMockJobServerContext, mWorkers, 4);
fail();
} catch (ResourceExhaustedException e) {
// Should fail
}
mPlanTracker.coordinators().stream().filter(coordinator -> coordinator.getJobId() == 100).findFirst().get().setJobAsFailed("TestError", "failed");
mPlanTracker.run(new SleepJobConfig(1), mCommandManager, mMockJobServerContext, mWorkers, 4);
assertNotNull(mWorkflowTracker.getStatus(0, true));
try {
mPlanTracker.run(new SleepJobConfig(1), mCommandManager, mMockJobServerContext, mWorkers, 5);
fail();
} catch (ResourceExhaustedException e) {
// Should fail
}
mPlanTracker.coordinators().stream().filter(coordinator -> coordinator.getJobId() == 101).findFirst().get().setJobAsFailed("TestError", "failed");
mPlanTracker.run(new SleepJobConfig(1), mCommandManager, mMockJobServerContext, mWorkers, 5);
assertNull(mWorkflowTracker.getStatus(100, true));
}
use of alluxio.exception.status.ResourceExhaustedException in project alluxio by Alluxio.
the class ReplicationChecker method check.
private Set<Long> check(Set<Long> inodes, ReplicationHandler handler, Mode mode) throws InterruptedException {
Set<Long> processedFileIds = new HashSet<>();
for (long inodeId : inodes) {
if (mActiveJobToInodeID.size() >= mMaxActiveJobs) {
return processedFileIds;
}
if (mActiveJobToInodeID.containsValue(inodeId)) {
continue;
}
Set<Triple<AlluxioURI, Long, Integer>> requests = new HashSet<>();
// Throw if interrupted.
if (Thread.interrupted()) {
throw new InterruptedException("ReplicationChecker interrupted.");
}
// locking the entire path but just the inode file since this access is read-only.
try (LockedInodePath inodePath = mInodeTree.lockFullInodePath(inodeId, LockPattern.READ)) {
InodeFile file = inodePath.getInodeFile();
for (long blockId : file.getBlockIds()) {
BlockInfo blockInfo = null;
try {
blockInfo = mBlockMaster.getBlockInfo(blockId);
} catch (BlockInfoException e) {
// Cannot find this block in Alluxio from BlockMaster, possibly persisted in UFS
} catch (UnavailableException e) {
// The block master is not available, wait for the next heartbeat
LOG.warn("The block master is not available: {}", e.toString());
return processedFileIds;
}
int currentReplicas = (blockInfo == null) ? 0 : blockInfo.getLocations().size();
switch(mode) {
case EVICT:
int maxReplicas = file.getReplicationMax();
if (file.getPersistenceState() == PersistenceState.TO_BE_PERSISTED && file.getReplicationDurable() > maxReplicas) {
maxReplicas = file.getReplicationDurable();
}
if (currentReplicas > maxReplicas) {
requests.add(new ImmutableTriple<>(inodePath.getUri(), blockId, currentReplicas - maxReplicas));
}
break;
case REPLICATE:
int minReplicas = file.getReplicationMin();
if (file.getPersistenceState() == PersistenceState.TO_BE_PERSISTED && file.getReplicationDurable() > minReplicas) {
minReplicas = file.getReplicationDurable();
}
if (currentReplicas < minReplicas) {
// if this file is not persisted and block master thinks it is lost, no effort made
if (!file.isPersisted() && mBlockMaster.isBlockLost(blockId)) {
continue;
}
requests.add(new ImmutableTriple<>(inodePath.getUri(), blockId, minReplicas - currentReplicas));
}
break;
default:
LOG.warn("Unexpected replication mode {}.", mode);
}
}
} catch (FileDoesNotExistException e) {
LOG.warn("Failed to check replication level for inode id {} : {}", inodeId, e.toString());
}
for (Triple<AlluxioURI, Long, Integer> entry : requests) {
AlluxioURI uri = entry.getLeft();
long blockId = entry.getMiddle();
int numReplicas = entry.getRight();
try {
long jobId;
switch(mode) {
case EVICT:
jobId = handler.evict(uri, blockId, numReplicas);
break;
case REPLICATE:
jobId = handler.replicate(uri, blockId, numReplicas);
break;
default:
throw new RuntimeException(String.format("Unexpected replication mode {}.", mode));
}
processedFileIds.add(inodeId);
mActiveJobToInodeID.put(jobId, inodeId);
} catch (JobDoesNotExistException | ResourceExhaustedException e) {
LOG.warn("The job service is busy, will retry later. {}", e.toString());
return processedFileIds;
} catch (UnavailableException e) {
LOG.warn("Unable to complete the replication check: {}, will retry later.", e.toString());
return processedFileIds;
} catch (Exception e) {
SAMPLING_LOG.warn("Unexpected exception encountered when starting a {} job (uri={}," + " block ID={}, num replicas={}) : {}", mode, uri, blockId, numReplicas, e.toString());
LOG.debug("Job service unexpected exception: ", e);
}
}
}
return processedFileIds;
}
use of alluxio.exception.status.ResourceExhaustedException in project alluxio by Alluxio.
the class WorkflowTracker method next.
private synchronized void next(long jobId) {
WorkflowExecution workflowExecution = mWorkflows.get(jobId);
mChildren.putIfAbsent(jobId, new ConcurrentHashSet<>());
Set<JobConfig> childJobConfigs = workflowExecution.next();
if (childJobConfigs.isEmpty()) {
done(jobId);
return;
}
ConcurrentHashSet<Long> childJobIds = new ConcurrentHashSet<>();
for (int i = 0; i < childJobConfigs.size(); i++) {
childJobIds.add(mJobMaster.getNewJobId());
}
mWaitingOn.put(jobId, childJobIds);
mChildren.get(jobId).addAll(childJobIds);
for (Long childJobId : childJobIds) {
mParentWorkflow.put(childJobId, jobId);
}
Iterator<Long> childJobIdsIter = childJobIds.iterator();
Iterator<JobConfig> childJobConfigsIter = childJobConfigs.iterator();
while (childJobIdsIter.hasNext() && childJobConfigsIter.hasNext()) {
Long childJobId = childJobIdsIter.next();
JobConfig childJobConfig = childJobConfigsIter.next();
try {
mJobMaster.run(childJobConfig, childJobId);
} catch (JobDoesNotExistException | ResourceExhaustedException e) {
LOG.warn(e.getMessage());
final String errorType = ErrorUtils.getErrorType(e);
workflowExecution.stop(Status.FAILED, errorType, e.getMessage());
stop(jobId, Status.FAILED, errorType, e.getMessage());
}
}
}
use of alluxio.exception.status.ResourceExhaustedException in project alluxio by Alluxio.
the class LocalPageStore method put.
@Override
public void put(PageId pageId, byte[] page) throws ResourceExhaustedException, IOException {
Path p = getFilePath(pageId);
try {
if (!Files.exists(p)) {
Path parent = Preconditions.checkNotNull(p.getParent(), "parent of cache file should not be null");
Files.createDirectories(parent);
Files.createFile(p);
}
// extra try to ensure output stream is closed
try (FileOutputStream fos = new FileOutputStream(p.toFile(), false)) {
fos.write(page);
}
} catch (Exception e) {
Files.deleteIfExists(p);
if (e.getMessage().contains(ERROR_NO_SPACE_LEFT)) {
throw new ResourceExhaustedException(String.format("%s is full, configured with %d bytes", getRoot(pageId), mCapacity), e);
}
throw new IOException("Failed to write file " + p + " for page " + pageId);
}
}
Aggregations