Search in sources :

Example 1 with PagedPointer

use of org.nd4j.linalg.api.memory.pointers.PagedPointer in project nd4j by deeplearning4j.

the class CudaWorkspace method alloc.

@Override
public PagedPointer alloc(long requiredMemory, MemoryKind kind, DataBuffer.Type type, boolean initialize) {
    long numElements = requiredMemory / Nd4j.sizeOfDataType(type);
    if (!isUsed.get()) {
        if (disabledCounter.incrementAndGet() % 10 == 0)
            log.warn("Worskpace was turned off, and wasn't enabled after {} allocations", disabledCounter.get());
        if (kind == MemoryKind.DEVICE) {
            PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
            externalAllocations.add(new PointersPair(null, pointer));
            return pointer;
        } else {
            PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
            externalAllocations.add(new PointersPair(pointer, null));
            return pointer;
        }
    }
    long div = requiredMemory % 8;
    if (div != 0)
        requiredMemory += div;
    boolean trimmer = (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && requiredMemory + cycleAllocations.get() > initialBlockSize.get() && initialBlockSize.get() > 0 && kind == MemoryKind.DEVICE) || trimmedMode.get();
    if (trimmer && workspaceConfiguration.getPolicySpill() == SpillPolicy.REALLOCATE && !trimmedMode.get()) {
        trimmedMode.set(true);
        trimmedStep.set(stepsCount.get());
    }
    if (kind == MemoryKind.DEVICE) {
        if (deviceOffset.get() + requiredMemory <= currentSize.get() && !trimmer) {
            cycleAllocations.addAndGet(requiredMemory);
            long prevOffset = deviceOffset.getAndAdd(requiredMemory);
            if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY)
                return null;
            PagedPointer ptr = workspace.getDevicePointer().withOffset(prevOffset, numElements);
            if (isDebug.get())
                log.info("Workspace [{}] device_{}: alloc array of {} bytes, capacity of {} elements; prevOffset: {}; newOffset: {}; size: {}; address: {}", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements, prevOffset, deviceOffset.get(), currentSize.get(), ptr.address());
            if (initialize) {
                // CudaContext context = AtomicAllocator.getInstance().getMemoryHandler().getCudaContext();
                CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
                int ret = NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(ptr, 0, requiredMemory, 0, context.getSpecialStream());
                if (ret == 0)
                    throw new ND4JIllegalStateException("memset failed device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread());
                context.syncSpecialStream();
            }
            return ptr;
        } else {
            // spill
            if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && currentSize.get() > 0 && !trimmer) {
                // log.info("End of space reached. Current offset: {}; requiredMemory: {}", deviceOffset.get(), requiredMemory);
                reset();
                resetPlanned.set(true);
                return alloc(requiredMemory, kind, type, initialize);
            }
            if (!trimmer)
                spilledAllocationsSize.addAndGet(requiredMemory);
            else
                pinnedAllocationsSize.addAndGet(requiredMemory);
            if (isDebug.get()) {
                log.info("Workspace [{}] device_{}: spilled DEVICE array of {} bytes, capacity of {} elements", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements);
            }
            // Nd4j.getWorkspaceManager().printAllocationStatisticsForCurrentThread();
            AllocationShape shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
            cycleAllocations.addAndGet(requiredMemory);
            if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY)
                return null;
            switch(workspaceConfiguration.getPolicySpill()) {
                case REALLOCATE:
                case EXTERNAL:
                    if (!trimmer) {
                        externalCount.incrementAndGet();
                        // 
                        // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
                        PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
                        // pointer.setLeaked(true);
                        pointer.isLeaked();
                        externalAllocations.add(new PointersPair(null, pointer));
                        return pointer;
                    } else {
                        pinnedCount.incrementAndGet();
                        // 
                        // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
                        PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
                        // pointer.setLeaked(true);
                        pointer.isLeaked();
                        pinnedAllocations.add(new PointersPair(stepsCount.get(), requiredMemory, null, pointer));
                        return pointer;
                    }
                case FAIL:
                default:
                    {
                        throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
                    }
            }
        }
    } else if (kind == MemoryKind.HOST) {
        if (hostOffset.get() + requiredMemory <= currentSize.get() && !trimmer) {
            long prevOffset = hostOffset.getAndAdd(requiredMemory);
            PagedPointer ptr = workspace.getHostPointer().withOffset(prevOffset, numElements);
            if (initialize)
                Pointer.memset(ptr, 0, requiredMemory);
            return ptr;
        } else {
            // log.info("Spilled HOST array of {} bytes, capacity of {} elements", requiredMemory, numElements);
            AllocationShape shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
            switch(workspaceConfiguration.getPolicySpill()) {
                case REALLOCATE:
                case EXTERNAL:
                    if (!trimmer) {
                        // memoryManager.allocate(requiredMemory, MemoryKind.HOST, true)
                        // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
                        PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
                        // pointer.setLeaked(true);
                        externalAllocations.add(new PointersPair(pointer, null));
                        return pointer;
                    } else {
                        // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
                        PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
                        // pointer.setLeaked(true);
                        pointer.isLeaked();
                        pinnedAllocations.add(new PointersPair(stepsCount.get(), 0L, pointer, null));
                        return pointer;
                    }
                case FAIL:
                default:
                    {
                        throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
                    }
            }
        }
    } else
        throw new ND4JIllegalStateException("Unknown MemoryKind was passed in: " + kind);
// throw new ND4JIllegalStateException("Shouldn't ever reach this line");
}
Also used : AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) PointersPair(org.nd4j.linalg.api.memory.pointers.PointersPair) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer)

Example 2 with PagedPointer

use of org.nd4j.linalg.api.memory.pointers.PagedPointer in project nd4j by deeplearning4j.

the class AtomicAllocator method allocateMemory.

/**
 * This method allocates required chunk of memory in specific location
 * <p>
 * PLEASE NOTE: Do not use this method, unless you're 100% sure what you're doing
 *
 * @param requiredMemory
 * @param location
 */
@Override
public AllocationPoint allocateMemory(DataBuffer buffer, AllocationShape requiredMemory, AllocationStatus location, boolean initialize) {
    AllocationPoint point = new AllocationPoint();
    useTracker.set(System.currentTimeMillis());
    // we use these longs as tracking codes for memory tracking
    Long allocId = objectsTracker.getAndIncrement();
    // point.attachBuffer(buffer);
    point.setObjectId(allocId);
    point.setShape(requiredMemory);
    /*
        if (buffer instanceof CudaIntDataBuffer) {
            buffer.setConstant(true);
            point.setConstant(true);
        }
        */
    int numBuckets = configuration.getNumberOfGcThreads();
    int bucketId = RandomUtils.nextInt(0, numBuckets);
    GarbageBufferReference reference = new GarbageBufferReference((BaseDataBuffer) buffer, queueMap.get(bucketId), point);
    point.attachReference(reference);
    point.setDeviceId(-1);
    if (buffer.isAttached()) {
        long reqMem = AllocationUtils.getRequiredMemory(requiredMemory);
        // log.info("Allocating {} bytes from attached memory...", reqMem);
        // workaround for init order
        getMemoryHandler().getCudaContext();
        point.setDeviceId(Nd4j.getAffinityManager().getDeviceForCurrentThread());
        CudaWorkspace workspace = (CudaWorkspace) Nd4j.getMemoryManager().getCurrentWorkspace();
        PointersPair pair = new PointersPair();
        PagedPointer ptrDev = workspace.alloc(reqMem, MemoryKind.DEVICE, requiredMemory.getDataType(), initialize);
        PagedPointer ptrHost = workspace.alloc(reqMem, MemoryKind.HOST, requiredMemory.getDataType(), initialize);
        pair.setHostPointer(ptrHost);
        if (ptrDev != null) {
            pair.setDevicePointer(ptrDev);
            point.setAllocationStatus(AllocationStatus.DEVICE);
        } else {
            pair.setDevicePointer(ptrHost);
            point.setAllocationStatus(AllocationStatus.HOST);
        }
        // if (!ptrDev.isLeaked())
        point.setAttached(true);
        point.setPointers(pair);
    } else {
        // we stay naive on PointersPair, we just don't know on this level, which pointers are set. MemoryHandler will be used for that
        PointersPair pair = memoryHandler.alloc(location, point, requiredMemory, initialize);
        point.setPointers(pair);
    }
    allocationsMap.put(allocId, point);
    return point;
}
Also used : PointersPair(org.nd4j.jita.allocator.pointers.PointersPair) AtomicLong(java.util.concurrent.atomic.AtomicLong) CudaWorkspace(org.nd4j.jita.workspace.CudaWorkspace) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer) GarbageBufferReference(org.nd4j.jita.allocator.garbage.GarbageBufferReference)

Example 3 with PagedPointer

use of org.nd4j.linalg.api.memory.pointers.PagedPointer in project nd4j by deeplearning4j.

the class NativeOpExecutioner method calculateOutputShape.

@Override
public List<int[]> calculateOutputShape(@NonNull CustomOp op) {
    val lc = op.opName().toLowerCase();
    val hash = op.opHash();
    val result = new ArrayList<int[]>();
    if (op.numInputArguments() < 1) {
        return Collections.emptyList();
    }
    val inputBuffers = new PointerPointer<>(op.numInputArguments());
    val inputShapes = new PointerPointer<>(op.numInputArguments());
    val inputArgs = op.inputArguments();
    int cnt = 0;
    for (val in : inputArgs) {
        inputBuffers.put(cnt, in.data().addressPointer());
        inputShapes.put(cnt++, in.shapeInfoDataBuffer().addressPointer());
    }
    val iArgs = op.numIArguments() > 0 ? new IntPointer(op.numIArguments()) : null;
    cnt = 0;
    val iArgs1 = op.iArgs();
    for (val i : iArgs1) iArgs.put(cnt++, i);
    if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
        val tArgs = op.numTArguments() > 0 ? new FloatPointer(op.numTArguments()) : null;
        val tArgs1 = op.tArgs();
        cnt = 0;
        for (val t : tArgs1) tArgs.put(cnt++, (float) t);
        val ptrptr = (Nd4jCpu.ShapeList) loop.calculateOutputShapesFloat(null, hash, inputBuffers, inputShapes, op.numInputArguments(), tArgs, op.numTArguments(), iArgs, op.numIArguments());
        if (ptrptr == null)
            throw new RuntimeException();
        for (int e = 0; e < ptrptr.size(); e++) result.add(getShapeFromPointer(new PagedPointer(ptrptr.at(e)).asIntPointer()));
        loop.deleteShapeList(ptrptr);
    } else if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
        val tArgs = op.numTArguments() > 0 ? new DoublePointer(op.numTArguments()) : null;
        cnt = 0;
        val tArgs1 = op.tArgs();
        for (val t : tArgs1) tArgs.put(cnt++, t);
        val ptrptr = (Nd4jCpu.ShapeList) loop.calculateOutputShapesDouble(null, hash, inputBuffers, inputShapes, op.numInputArguments(), tArgs, op.numTArguments(), iArgs, op.numIArguments());
        if (ptrptr == null)
            throw new RuntimeException();
        for (int e = 0; e < ptrptr.size(); e++) result.add(getShapeFromPointer(new PagedPointer(ptrptr.at(e)).asIntPointer()));
        loop.deleteShapeList(ptrptr);
    } else if (Nd4j.dataType() == DataBuffer.Type.HALF) {
        val tArgs = op.numTArguments() > 0 ? new ShortPointer(op.numTArguments()) : null;
        cnt = 0;
        val tArgs1 = op.tArgs();
        for (val t : tArgs1) tArgs.put(cnt++, ArrayUtil.toHalf(t));
        val ptrptr = (Nd4jCpu.ShapeList) loop.calculateOutputShapesHalf(null, hash, inputBuffers, inputShapes, op.numInputArguments(), tArgs, op.numTArguments(), iArgs, op.numIArguments());
        if (ptrptr == null)
            throw new RuntimeException();
        val numOutputs = getCustomOperations().get(lc).getNumOutputs();
        for (int e = 0; e < ptrptr.size(); e++) result.add(getShapeFromPointer(new PagedPointer(ptrptr.at(e)).asIntPointer()));
        loop.deleteShapeList(ptrptr);
    }
    return result;
}
Also used : lombok.val(lombok.val) Nd4jCpu(org.nd4j.nativeblas.Nd4jCpu) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer)

Example 4 with PagedPointer

use of org.nd4j.linalg.api.memory.pointers.PagedPointer in project nd4j by deeplearning4j.

the class CpuWorkspace method init.

@Override
protected void init() {
    super.init();
    if (workspaceConfiguration.getPolicyLocation() == LocationPolicy.RAM) {
        if (currentSize.get() > 0) {
            isInit.set(true);
            if (isDebug.get())
                log.info("Allocating [{}] workspace of {} bytes...", id, currentSize.get());
            workspace.setHostPointer(new PagedPointer(memoryManager.allocate(currentSize.get() + SAFETY_OFFSET, MemoryKind.HOST, true)));
        }
    } else if (workspaceConfiguration.getPolicyLocation() == LocationPolicy.MMAP) {
        long flen = tempFile.length();
        mmap = NativeOpsHolder.getInstance().getDeviceNativeOps().mmapFile(null, tempFile.getAbsolutePath(), flen);
        if (mmap == null)
            throw new RuntimeException("MMAP failed");
        workspace.setHostPointer(new PagedPointer(mmap.get(0)));
    }
}
Also used : PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer)

Example 5 with PagedPointer

use of org.nd4j.linalg.api.memory.pointers.PagedPointer in project nd4j by deeplearning4j.

the class CudaWorkspace method init.

@Override
protected void init() {
    if (workspaceConfiguration.getPolicyLocation() == LocationPolicy.MMAP) {
        throw new ND4JIllegalStateException("CUDA do not support MMAP workspaces yet");
    }
    super.init();
    if (currentSize.get() > 0) {
        // log.info("Allocating {} bytes at DEVICE & HOST space...", currentSize.get());
        isInit.set(true);
        long bytes = currentSize.get();
        if (isDebug.get())
            log.info("Allocating [{}] workspace on device_{}, {} bytes...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes);
        if (isDebug.get()) {
            Nd4j.getWorkspaceManager().printAllocationStatisticsForCurrentThread();
        }
        Pointer ptr = memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.HOST, false);
        if (ptr == null)
            throw new ND4JIllegalStateException("Can't allocate memory for workspace");
        workspace.setHostPointer(new PagedPointer(ptr));
        if (workspaceConfiguration.getPolicyMirroring() != MirroringPolicy.HOST_ONLY)
            workspace.setDevicePointer(new PagedPointer(memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.DEVICE, false)));
    // log.info("Workspace [{}] initialized successfully", id);
    }
}
Also used : ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer) Pointer(org.bytedeco.javacpp.Pointer) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer)

Aggregations

PagedPointer (org.nd4j.linalg.api.memory.pointers.PagedPointer)8 ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)4 lombok.val (lombok.val)2 Pointer (org.bytedeco.javacpp.Pointer)2 PointersPair (org.nd4j.linalg.api.memory.pointers.PointersPair)2 ByteBuffer (java.nio.ByteBuffer)1 HashMap (java.util.HashMap)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 BytePointer (org.bytedeco.javacpp.BytePointer)1 FlatArray (org.nd4j.graph.FlatArray)1 FlatResult (org.nd4j.graph.FlatResult)1 FlatVariable (org.nd4j.graph.FlatVariable)1 GarbageBufferReference (org.nd4j.jita.allocator.garbage.GarbageBufferReference)1 AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)1 AllocationShape (org.nd4j.jita.allocator.impl.AllocationShape)1 PointersPair (org.nd4j.jita.allocator.pointers.PointersPair)1 CudaWorkspace (org.nd4j.jita.workspace.CudaWorkspace)1 INDArray (org.nd4j.linalg.api.ndarray.INDArray)1 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)1 Nd4jCpu (org.nd4j.nativeblas.Nd4jCpu)1