Search in sources :

Example 1 with AllocationShape

use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.

the class CudaWorkspace method alloc.

@Override
public PagedPointer alloc(long requiredMemory, MemoryKind kind, DataBuffer.Type type, boolean initialize) {
    long numElements = requiredMemory / Nd4j.sizeOfDataType(type);
    if (!isUsed.get()) {
        if (disabledCounter.incrementAndGet() % 10 == 0)
            log.warn("Worskpace was turned off, and wasn't enabled after {} allocations", disabledCounter.get());
        if (kind == MemoryKind.DEVICE) {
            PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
            externalAllocations.add(new PointersPair(null, pointer));
            return pointer;
        } else {
            PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
            externalAllocations.add(new PointersPair(pointer, null));
            return pointer;
        }
    }
    long div = requiredMemory % 8;
    if (div != 0)
        requiredMemory += div;
    boolean trimmer = (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && requiredMemory + cycleAllocations.get() > initialBlockSize.get() && initialBlockSize.get() > 0 && kind == MemoryKind.DEVICE) || trimmedMode.get();
    if (trimmer && workspaceConfiguration.getPolicySpill() == SpillPolicy.REALLOCATE && !trimmedMode.get()) {
        trimmedMode.set(true);
        trimmedStep.set(stepsCount.get());
    }
    if (kind == MemoryKind.DEVICE) {
        if (deviceOffset.get() + requiredMemory <= currentSize.get() && !trimmer) {
            cycleAllocations.addAndGet(requiredMemory);
            long prevOffset = deviceOffset.getAndAdd(requiredMemory);
            if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY)
                return null;
            PagedPointer ptr = workspace.getDevicePointer().withOffset(prevOffset, numElements);
            if (isDebug.get())
                log.info("Workspace [{}] device_{}: alloc array of {} bytes, capacity of {} elements; prevOffset: {}; newOffset: {}; size: {}; address: {}", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements, prevOffset, deviceOffset.get(), currentSize.get(), ptr.address());
            if (initialize) {
                // CudaContext context = AtomicAllocator.getInstance().getMemoryHandler().getCudaContext();
                CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
                int ret = NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(ptr, 0, requiredMemory, 0, context.getSpecialStream());
                if (ret == 0)
                    throw new ND4JIllegalStateException("memset failed device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread());
                context.syncSpecialStream();
            }
            return ptr;
        } else {
            // spill
            if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && currentSize.get() > 0 && !trimmer) {
                // log.info("End of space reached. Current offset: {}; requiredMemory: {}", deviceOffset.get(), requiredMemory);
                reset();
                resetPlanned.set(true);
                return alloc(requiredMemory, kind, type, initialize);
            }
            if (!trimmer)
                spilledAllocationsSize.addAndGet(requiredMemory);
            else
                pinnedAllocationsSize.addAndGet(requiredMemory);
            if (isDebug.get()) {
                log.info("Workspace [{}] device_{}: spilled DEVICE array of {} bytes, capacity of {} elements", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements);
            }
            // Nd4j.getWorkspaceManager().printAllocationStatisticsForCurrentThread();
            AllocationShape shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
            cycleAllocations.addAndGet(requiredMemory);
            if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY)
                return null;
            switch(workspaceConfiguration.getPolicySpill()) {
                case REALLOCATE:
                case EXTERNAL:
                    if (!trimmer) {
                        externalCount.incrementAndGet();
                        // 
                        // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
                        PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
                        // pointer.setLeaked(true);
                        pointer.isLeaked();
                        externalAllocations.add(new PointersPair(null, pointer));
                        return pointer;
                    } else {
                        pinnedCount.incrementAndGet();
                        // 
                        // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
                        PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
                        // pointer.setLeaked(true);
                        pointer.isLeaked();
                        pinnedAllocations.add(new PointersPair(stepsCount.get(), requiredMemory, null, pointer));
                        return pointer;
                    }
                case FAIL:
                default:
                    {
                        throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
                    }
            }
        }
    } else if (kind == MemoryKind.HOST) {
        if (hostOffset.get() + requiredMemory <= currentSize.get() && !trimmer) {
            long prevOffset = hostOffset.getAndAdd(requiredMemory);
            PagedPointer ptr = workspace.getHostPointer().withOffset(prevOffset, numElements);
            if (initialize)
                Pointer.memset(ptr, 0, requiredMemory);
            return ptr;
        } else {
            // log.info("Spilled HOST array of {} bytes, capacity of {} elements", requiredMemory, numElements);
            AllocationShape shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
            switch(workspaceConfiguration.getPolicySpill()) {
                case REALLOCATE:
                case EXTERNAL:
                    if (!trimmer) {
                        // memoryManager.allocate(requiredMemory, MemoryKind.HOST, true)
                        // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
                        PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
                        // pointer.setLeaked(true);
                        externalAllocations.add(new PointersPair(pointer, null));
                        return pointer;
                    } else {
                        // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
                        PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
                        // pointer.setLeaked(true);
                        pointer.isLeaked();
                        pinnedAllocations.add(new PointersPair(stepsCount.get(), 0L, pointer, null));
                        return pointer;
                    }
                case FAIL:
                default:
                    {
                        throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
                    }
            }
        }
    } else
        throw new ND4JIllegalStateException("Unknown MemoryKind was passed in: " + kind);
// throw new ND4JIllegalStateException("Shouldn't ever reach this line");
}
Also used : AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) PointersPair(org.nd4j.linalg.api.memory.pointers.PointersPair) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer)

Example 2 with AllocationShape

use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.

the class CudaFullCachingProvider method free.

/**
 * This method frees specific chunk of memory, described by AllocationPoint passed in
 *
 * PLEASE NOTE: This method can actually ignore free, and keep released memory chunk for future reuse.
 *
 * @param point
 */
@Override
public void free(AllocationPoint point) {
    if (point.getAllocationStatus() == AllocationStatus.DEVICE) {
        if (point.isConstant())
            return;
        AllocationShape shape = point.getShape();
        int deviceId = point.getDeviceId();
        long address = point.getDevicePointer().address();
        long reqMemory = AllocationUtils.getRequiredMemory(shape);
        if (reqMemory > CudaEnvironment.getInstance().getConfiguration().getMaximumDeviceCacheableLength() || deviceCachedAmount.get(deviceId).get() >= CudaEnvironment.getInstance().getConfiguration().getMaximumHostCache()) {
            // log.info("DEVICE_{} memory purging: {} bytes; MS: {}; MT: {}", deviceId, reqMemory, MAX_GPU_ALLOCATION, MAX_GPU_CACHE);
            super.free(point);
            return;
        }
        // log.info("Saving HOST memory into cache...");
        ensureDeviceCacheHolder(deviceId, shape);
        CacheHolder cache = deviceCache.get(deviceId).get(shape);
        if (point.getDeviceId() != deviceId)
            throw new RuntimeException("deviceId changed!");
        // memory chunks < threshold will be cached no matter what
        if (reqMemory <= FORCED_CACHE_THRESHOLD) {
            cache.put(new CudaPointer(point.getDevicePointer().address()));
            return;
        } else {
            long cacheEntries = cache.size();
            long cacheHeight = deviceCache.get(deviceId).size();
            // total memory allocated within this bucket
            long cacheDepth = cacheEntries * reqMemory;
            // if (cacheDepth < MAX_CACHED_MEMORY / cacheHeight) {
            cache.put(new CudaPointer(point.getDevicePointer().address()));
            return;
        // } else {
        // super.free(point);
        // }
        }
    }
    super.free(point);
}
Also used : AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Example 3 with AllocationShape

use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.

the class AllocationUtilsTest method testGetRequiredMemory2.

@Test
public void testGetRequiredMemory2() throws Exception {
    AllocationShape shape = new AllocationShape();
    shape.setOffset(0);
    shape.setLength(10);
    shape.setStride(1);
    shape.setDataType(DataBuffer.Type.FLOAT);
    assertEquals(40, AllocationUtils.getRequiredMemory(shape));
}
Also used : AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) Test(org.junit.Test)

Example 4 with AllocationShape

use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.

the class AllocationUtilsTest method testGetRequiredMemory3.

@Test
public void testGetRequiredMemory3() throws Exception {
    AllocationShape shape = new AllocationShape();
    shape.setOffset(0);
    shape.setLength(10);
    shape.setStride(2);
    shape.setDataType(DataBuffer.Type.FLOAT);
    assertEquals(80, AllocationUtils.getRequiredMemory(shape));
}
Also used : AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) Test(org.junit.Test)

Example 5 with AllocationShape

use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.

the class CudaDirectProviderTest method mallocDevice.

@Test
public void mallocDevice() throws Exception {
    CudaDirectProvider provider = new CudaDirectProvider();
    AllocationShape shape = new AllocationShape(300000, 4, DataBuffer.Type.FLOAT);
    AllocationPoint point = new AllocationPoint();
    point.setShape(shape);
    point.setPointers(provider.malloc(shape, point, AllocationStatus.DEVICE));
    System.out.println("Allocated...");
    Thread.sleep(1000);
    point.setAllocationStatus(AllocationStatus.DEVICE);
    provider.free(point);
    System.out.println("Deallocated...");
    Thread.sleep(1000);
}
Also used : AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Test(org.junit.Test)

Aggregations

AllocationShape (org.nd4j.jita.allocator.impl.AllocationShape)14 Test (org.junit.Test)6 AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)5 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)4 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)2 IOException (java.io.IOException)1 MemoryWorkspace (org.nd4j.linalg.api.memory.MemoryWorkspace)1 PagedPointer (org.nd4j.linalg.api.memory.pointers.PagedPointer)1 PointersPair (org.nd4j.linalg.api.memory.pointers.PointersPair)1 INDArray (org.nd4j.linalg.api.ndarray.INDArray)1 ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)1 DummyWorkspace (org.nd4j.linalg.memory.abstracts.DummyWorkspace)1