Search in sources :

Example 6 with ResourceExhaustedException

use of alluxio.exception.status.ResourceExhaustedException in project alluxio by Alluxio.

the class WorkflowTrackerTest method testCleanup.

@Test
public void testCleanup() throws Exception {
    SleepJobConfig jobConfig = new SleepJobConfig(1);
    mPlanTracker.run(jobConfig, mCommandManager, mMockJobServerContext, mWorkers, 1);
    jobConfig = new SleepJobConfig(1);
    mPlanTracker.run(jobConfig, mCommandManager, mMockJobServerContext, mWorkers, 2);
    jobConfig = new SleepJobConfig(1);
    mPlanTracker.run(jobConfig, mCommandManager, mMockJobServerContext, mWorkers, 3);
    doAnswer(invocation -> {
        PlanConfig config = invocation.getArgument(0, PlanConfig.class);
        long jobId = invocation.getArgument(1, Long.class);
        mPlanTracker.run(config, mCommandManager, mMockJobServerContext, mWorkers, jobId);
        return null;
    }).when(mMockJobMaster).run(any(PlanConfig.class), any(Long.class));
    ArrayList<JobConfig> jobs = Lists.newArrayList();
    SleepJobConfig child1 = new SleepJobConfig(1);
    SleepJobConfig child2 = new SleepJobConfig(2);
    jobs.add(child1);
    jobs.add(child2);
    CompositeConfig config = new CompositeConfig(jobs, false);
    mWorkflowTracker.run(config, 0);
    try {
        mPlanTracker.run(new SleepJobConfig(1), mCommandManager, mMockJobServerContext, mWorkers, 4);
        fail();
    } catch (ResourceExhaustedException e) {
    // Should fail
    }
    mPlanTracker.coordinators().stream().filter(coordinator -> coordinator.getJobId() == 100).findFirst().get().setJobAsFailed("TestError", "failed");
    mPlanTracker.run(new SleepJobConfig(1), mCommandManager, mMockJobServerContext, mWorkers, 4);
    assertNotNull(mWorkflowTracker.getStatus(0, true));
    try {
        mPlanTracker.run(new SleepJobConfig(1), mCommandManager, mMockJobServerContext, mWorkers, 5);
        fail();
    } catch (ResourceExhaustedException e) {
    // Should fail
    }
    mPlanTracker.coordinators().stream().filter(coordinator -> coordinator.getJobId() == 101).findFirst().get().setJobAsFailed("TestError", "failed");
    mPlanTracker.run(new SleepJobConfig(1), mCommandManager, mMockJobServerContext, mWorkers, 5);
    assertNull(mWorkflowTracker.getStatus(100, true));
}
Also used : TestPlanConfig(alluxio.job.TestPlanConfig) JobServerContext(alluxio.job.JobServerContext) Status(alluxio.job.wire.Status) ArrayList(java.util.ArrayList) CommandManager(alluxio.master.job.command.CommandManager) WorkflowInfo(alluxio.job.wire.WorkflowInfo) Lists(com.google.common.collect.Lists) ResourceExhaustedException(alluxio.exception.status.ResourceExhaustedException) WorkerInfo(alluxio.wire.WorkerInfo) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Assert.fail(org.junit.Assert.fail) SleepJobConfig(alluxio.job.SleepJobConfig) Before(org.junit.Before) Assert.assertNotNull(org.junit.Assert.assertNotNull) CompositeConfig(alluxio.job.workflow.composite.CompositeConfig) JobMaster(alluxio.master.job.JobMaster) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) Mockito.verify(org.mockito.Mockito.verify) PlanInfo(alluxio.job.plan.meta.PlanInfo) Mockito.never(org.mockito.Mockito.never) Assert.assertNull(org.junit.Assert.assertNull) JobConfig(alluxio.job.JobConfig) PlanTracker(alluxio.master.job.plan.PlanTracker) PlanConfig(alluxio.job.plan.PlanConfig) Mockito.any(org.mockito.Mockito.any) Assert.assertEquals(org.junit.Assert.assertEquals) Mockito.mock(org.mockito.Mockito.mock) ResourceExhaustedException(alluxio.exception.status.ResourceExhaustedException) SleepJobConfig(alluxio.job.SleepJobConfig) TestPlanConfig(alluxio.job.TestPlanConfig) PlanConfig(alluxio.job.plan.PlanConfig) CompositeConfig(alluxio.job.workflow.composite.CompositeConfig) SleepJobConfig(alluxio.job.SleepJobConfig) JobConfig(alluxio.job.JobConfig) Test(org.junit.Test)

Example 7 with ResourceExhaustedException

use of alluxio.exception.status.ResourceExhaustedException in project alluxio by Alluxio.

the class ReplicationChecker method check.

private Set<Long> check(Set<Long> inodes, ReplicationHandler handler, Mode mode) throws InterruptedException {
    Set<Long> processedFileIds = new HashSet<>();
    for (long inodeId : inodes) {
        if (mActiveJobToInodeID.size() >= mMaxActiveJobs) {
            return processedFileIds;
        }
        if (mActiveJobToInodeID.containsValue(inodeId)) {
            continue;
        }
        Set<Triple<AlluxioURI, Long, Integer>> requests = new HashSet<>();
        // Throw if interrupted.
        if (Thread.interrupted()) {
            throw new InterruptedException("ReplicationChecker interrupted.");
        }
        // locking the entire path but just the inode file since this access is read-only.
        try (LockedInodePath inodePath = mInodeTree.lockFullInodePath(inodeId, LockPattern.READ)) {
            InodeFile file = inodePath.getInodeFile();
            for (long blockId : file.getBlockIds()) {
                BlockInfo blockInfo = null;
                try {
                    blockInfo = mBlockMaster.getBlockInfo(blockId);
                } catch (BlockInfoException e) {
                // Cannot find this block in Alluxio from BlockMaster, possibly persisted in UFS
                } catch (UnavailableException e) {
                    // The block master is not available, wait for the next heartbeat
                    LOG.warn("The block master is not available: {}", e.toString());
                    return processedFileIds;
                }
                int currentReplicas = (blockInfo == null) ? 0 : blockInfo.getLocations().size();
                switch(mode) {
                    case EVICT:
                        int maxReplicas = file.getReplicationMax();
                        if (file.getPersistenceState() == PersistenceState.TO_BE_PERSISTED && file.getReplicationDurable() > maxReplicas) {
                            maxReplicas = file.getReplicationDurable();
                        }
                        if (currentReplicas > maxReplicas) {
                            requests.add(new ImmutableTriple<>(inodePath.getUri(), blockId, currentReplicas - maxReplicas));
                        }
                        break;
                    case REPLICATE:
                        int minReplicas = file.getReplicationMin();
                        if (file.getPersistenceState() == PersistenceState.TO_BE_PERSISTED && file.getReplicationDurable() > minReplicas) {
                            minReplicas = file.getReplicationDurable();
                        }
                        if (currentReplicas < minReplicas) {
                            // if this file is not persisted and block master thinks it is lost, no effort made
                            if (!file.isPersisted() && mBlockMaster.isBlockLost(blockId)) {
                                continue;
                            }
                            requests.add(new ImmutableTriple<>(inodePath.getUri(), blockId, minReplicas - currentReplicas));
                        }
                        break;
                    default:
                        LOG.warn("Unexpected replication mode {}.", mode);
                }
            }
        } catch (FileDoesNotExistException e) {
            LOG.warn("Failed to check replication level for inode id {} : {}", inodeId, e.toString());
        }
        for (Triple<AlluxioURI, Long, Integer> entry : requests) {
            AlluxioURI uri = entry.getLeft();
            long blockId = entry.getMiddle();
            int numReplicas = entry.getRight();
            try {
                long jobId;
                switch(mode) {
                    case EVICT:
                        jobId = handler.evict(uri, blockId, numReplicas);
                        break;
                    case REPLICATE:
                        jobId = handler.replicate(uri, blockId, numReplicas);
                        break;
                    default:
                        throw new RuntimeException(String.format("Unexpected replication mode {}.", mode));
                }
                processedFileIds.add(inodeId);
                mActiveJobToInodeID.put(jobId, inodeId);
            } catch (JobDoesNotExistException | ResourceExhaustedException e) {
                LOG.warn("The job service is busy, will retry later. {}", e.toString());
                return processedFileIds;
            } catch (UnavailableException e) {
                LOG.warn("Unable to complete the replication check: {}, will retry later.", e.toString());
                return processedFileIds;
            } catch (Exception e) {
                SAMPLING_LOG.warn("Unexpected exception encountered when starting a {} job (uri={}," + " block ID={}, num replicas={}) : {}", mode, uri, blockId, numReplicas, e.toString());
                LOG.debug("Job service unexpected exception: ", e);
            }
        }
    }
    return processedFileIds;
}
Also used : FileDoesNotExistException(alluxio.exception.FileDoesNotExistException) JobDoesNotExistException(alluxio.exception.JobDoesNotExistException) UnavailableException(alluxio.exception.status.UnavailableException) BlockInfoException(alluxio.exception.BlockInfoException) InodeFile(alluxio.master.file.meta.InodeFile) JobDoesNotExistException(alluxio.exception.JobDoesNotExistException) ResourceExhaustedException(alluxio.exception.status.ResourceExhaustedException) BlockInfoException(alluxio.exception.BlockInfoException) IOException(java.io.IOException) FileDoesNotExistException(alluxio.exception.FileDoesNotExistException) UnavailableException(alluxio.exception.status.UnavailableException) Triple(org.apache.commons.lang3.tuple.Triple) ImmutableTriple(org.apache.commons.lang3.tuple.ImmutableTriple) LockedInodePath(alluxio.master.file.meta.LockedInodePath) ResourceExhaustedException(alluxio.exception.status.ResourceExhaustedException) BlockInfo(alluxio.wire.BlockInfo) HashSet(java.util.HashSet) AlluxioURI(alluxio.AlluxioURI)

Example 8 with ResourceExhaustedException

use of alluxio.exception.status.ResourceExhaustedException in project alluxio by Alluxio.

the class WorkflowTracker method next.

private synchronized void next(long jobId) {
    WorkflowExecution workflowExecution = mWorkflows.get(jobId);
    mChildren.putIfAbsent(jobId, new ConcurrentHashSet<>());
    Set<JobConfig> childJobConfigs = workflowExecution.next();
    if (childJobConfigs.isEmpty()) {
        done(jobId);
        return;
    }
    ConcurrentHashSet<Long> childJobIds = new ConcurrentHashSet<>();
    for (int i = 0; i < childJobConfigs.size(); i++) {
        childJobIds.add(mJobMaster.getNewJobId());
    }
    mWaitingOn.put(jobId, childJobIds);
    mChildren.get(jobId).addAll(childJobIds);
    for (Long childJobId : childJobIds) {
        mParentWorkflow.put(childJobId, jobId);
    }
    Iterator<Long> childJobIdsIter = childJobIds.iterator();
    Iterator<JobConfig> childJobConfigsIter = childJobConfigs.iterator();
    while (childJobIdsIter.hasNext() && childJobConfigsIter.hasNext()) {
        Long childJobId = childJobIdsIter.next();
        JobConfig childJobConfig = childJobConfigsIter.next();
        try {
            mJobMaster.run(childJobConfig, childJobId);
        } catch (JobDoesNotExistException | ResourceExhaustedException e) {
            LOG.warn(e.getMessage());
            final String errorType = ErrorUtils.getErrorType(e);
            workflowExecution.stop(Status.FAILED, errorType, e.getMessage());
            stop(jobId, Status.FAILED, errorType, e.getMessage());
        }
    }
}
Also used : JobDoesNotExistException(alluxio.exception.JobDoesNotExistException) JobConfig(alluxio.job.JobConfig) ResourceExhaustedException(alluxio.exception.status.ResourceExhaustedException) ConcurrentHashSet(alluxio.collections.ConcurrentHashSet) WorkflowExecution(alluxio.job.workflow.WorkflowExecution)

Example 9 with ResourceExhaustedException

use of alluxio.exception.status.ResourceExhaustedException in project alluxio by Alluxio.

the class LocalPageStore method put.

@Override
public void put(PageId pageId, byte[] page) throws ResourceExhaustedException, IOException {
    Path p = getFilePath(pageId);
    try {
        if (!Files.exists(p)) {
            Path parent = Preconditions.checkNotNull(p.getParent(), "parent of cache file should not be null");
            Files.createDirectories(parent);
            Files.createFile(p);
        }
        // extra try to ensure output stream is closed
        try (FileOutputStream fos = new FileOutputStream(p.toFile(), false)) {
            fos.write(page);
        }
    } catch (Exception e) {
        Files.deleteIfExists(p);
        if (e.getMessage().contains(ERROR_NO_SPACE_LEFT)) {
            throw new ResourceExhaustedException(String.format("%s is full, configured with %d bytes", getRoot(pageId), mCapacity), e);
        }
        throw new IOException("Failed to write file " + p + " for page " + pageId);
    }
}
Also used : Path(java.nio.file.Path) ResourceExhaustedException(alluxio.exception.status.ResourceExhaustedException) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) PageNotFoundException(alluxio.exception.PageNotFoundException) IOException(java.io.IOException) ResourceExhaustedException(alluxio.exception.status.ResourceExhaustedException)

Aggregations

ResourceExhaustedException (alluxio.exception.status.ResourceExhaustedException)9 IOException (java.io.IOException)5 PageNotFoundException (alluxio.exception.PageNotFoundException)3 JobConfig (alluxio.job.JobConfig)3 Test (org.junit.Test)3 JobDoesNotExistException (alluxio.exception.JobDoesNotExistException)2 JobServerContext (alluxio.job.JobServerContext)2 SleepJobConfig (alluxio.job.SleepJobConfig)2 TestPlanConfig (alluxio.job.TestPlanConfig)2 CommandManager (alluxio.master.job.command.CommandManager)2 AlluxioURI (alluxio.AlluxioURI)1 PageId (alluxio.client.file.cache.PageId)1 LocalPageStore (alluxio.client.file.cache.store.LocalPageStore)1 CacheScope (alluxio.client.quota.CacheScope)1 ConcurrentHashSet (alluxio.collections.ConcurrentHashSet)1 BlockInfoException (alluxio.exception.BlockInfoException)1 FileDoesNotExistException (alluxio.exception.FileDoesNotExistException)1 UnavailableException (alluxio.exception.status.UnavailableException)1 PlanConfig (alluxio.job.plan.PlanConfig)1 PlanInfo (alluxio.job.plan.meta.PlanInfo)1