Search in sources :

Example 1 with PointersPair

use of org.nd4j.jita.allocator.pointers.PointersPair in project nd4j by deeplearning4j.

the class AtomicAllocator method allocateMemory.

/**
 * This method allocates required chunk of memory in specific location
 * <p>
 * PLEASE NOTE: Do not use this method, unless you're 100% sure what you're doing
 *
 * @param requiredMemory
 * @param location
 */
@Override
public AllocationPoint allocateMemory(DataBuffer buffer, AllocationShape requiredMemory, AllocationStatus location, boolean initialize) {
    AllocationPoint point = new AllocationPoint();
    useTracker.set(System.currentTimeMillis());
    // we use these longs as tracking codes for memory tracking
    Long allocId = objectsTracker.getAndIncrement();
    // point.attachBuffer(buffer);
    point.setObjectId(allocId);
    point.setShape(requiredMemory);
    /*
        if (buffer instanceof CudaIntDataBuffer) {
            buffer.setConstant(true);
            point.setConstant(true);
        }
        */
    int numBuckets = configuration.getNumberOfGcThreads();
    int bucketId = RandomUtils.nextInt(0, numBuckets);
    GarbageBufferReference reference = new GarbageBufferReference((BaseDataBuffer) buffer, queueMap.get(bucketId), point);
    point.attachReference(reference);
    point.setDeviceId(-1);
    if (buffer.isAttached()) {
        long reqMem = AllocationUtils.getRequiredMemory(requiredMemory);
        // log.info("Allocating {} bytes from attached memory...", reqMem);
        // workaround for init order
        getMemoryHandler().getCudaContext();
        point.setDeviceId(Nd4j.getAffinityManager().getDeviceForCurrentThread());
        CudaWorkspace workspace = (CudaWorkspace) Nd4j.getMemoryManager().getCurrentWorkspace();
        PointersPair pair = new PointersPair();
        PagedPointer ptrDev = workspace.alloc(reqMem, MemoryKind.DEVICE, requiredMemory.getDataType(), initialize);
        PagedPointer ptrHost = workspace.alloc(reqMem, MemoryKind.HOST, requiredMemory.getDataType(), initialize);
        pair.setHostPointer(ptrHost);
        if (ptrDev != null) {
            pair.setDevicePointer(ptrDev);
            point.setAllocationStatus(AllocationStatus.DEVICE);
        } else {
            pair.setDevicePointer(ptrHost);
            point.setAllocationStatus(AllocationStatus.HOST);
        }
        // if (!ptrDev.isLeaked())
        point.setAttached(true);
        point.setPointers(pair);
    } else {
        // we stay naive on PointersPair, we just don't know on this level, which pointers are set. MemoryHandler will be used for that
        PointersPair pair = memoryHandler.alloc(location, point, requiredMemory, initialize);
        point.setPointers(pair);
    }
    allocationsMap.put(allocId, point);
    return point;
}
Also used : PointersPair(org.nd4j.jita.allocator.pointers.PointersPair) AtomicLong(java.util.concurrent.atomic.AtomicLong) CudaWorkspace(org.nd4j.jita.workspace.CudaWorkspace) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer) GarbageBufferReference(org.nd4j.jita.allocator.garbage.GarbageBufferReference)

Example 2 with PointersPair

use of org.nd4j.jita.allocator.pointers.PointersPair in project nd4j by deeplearning4j.

the class CudaCachingZeroProvider method malloc.

/**
 * This method provides PointersPair to memory chunk specified by AllocationShape
 *
 * PLEASE NOTE: This method can actually ignore malloc request, and give out previously cached free memory chunk with equal shape.
 *
 * @param shape shape of desired memory chunk
 * @param point target AllocationPoint structure
 * @param location either HOST or DEVICE
 * @return
 */
@Override
public PointersPair malloc(AllocationShape shape, AllocationPoint point, AllocationStatus location) {
    long reqMemory = AllocationUtils.getRequiredMemory(shape);
    if (location == AllocationStatus.HOST && reqMemory < CudaEnvironment.getInstance().getConfiguration().getMaximumHostCacheableLength()) {
        CacheHolder cache = zeroCache.get(shape);
        if (cache != null) {
            Pointer pointer = cache.poll();
            if (pointer != null) {
                cacheZeroHit.incrementAndGet();
                // since this memory chunk is going to be used now, remove it's amount from
                zeroCachedAmount.addAndGet(-1 * reqMemory);
                PointersPair pair = new PointersPair();
                pair.setDevicePointer(new CudaPointer(pointer.address()));
                pair.setHostPointer(new CudaPointer(pointer.address()));
                point.setAllocationStatus(AllocationStatus.HOST);
                return pair;
            }
        }
        cacheZeroMiss.incrementAndGet();
        if (CudaEnvironment.getInstance().getConfiguration().isUsePreallocation() && zeroCachedAmount.get() < CudaEnvironment.getInstance().getConfiguration().getMaximumHostCache() / 10 && reqMemory < 16 * 1024 * 1024L) {
            CachePreallocator preallocator = new CachePreallocator(shape, location, CudaEnvironment.getInstance().getConfiguration().getPreallocationCalls());
            preallocator.start();
        }
        cacheZeroMiss.incrementAndGet();
        return super.malloc(shape, point, location);
    }
    return super.malloc(shape, point, location);
}
Also used : PointersPair(org.nd4j.jita.allocator.pointers.PointersPair) Pointer(org.bytedeco.javacpp.Pointer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Example 3 with PointersPair

use of org.nd4j.jita.allocator.pointers.PointersPair in project nd4j by deeplearning4j.

the class DelayedMemoryTest method testDelayedAllocation4.

@Test
public void testDelayedAllocation4() throws Exception {
    INDArray array = Nd4j.create(new float[] { 1f, 2f, 3f, 4f, 5f });
    AllocationPoint pointer = AtomicAllocator.getInstance().getAllocationPoint(array);
    PointersPair pair = pointer.getPointers();
    // pointers should be equal, device memory wasn't allocated yet
    assertEquals(pair.getDevicePointer(), pair.getHostPointer());
    assertEquals(2.0f, array.getFloat(1), 0.001f);
    assertEquals(pair.getDevicePointer(), pair.getHostPointer());
    String temp = System.getProperty("java.io.tmpdir");
    String outPath = FilenameUtils.concat(temp, "dl4jtestserialization.bin");
    try (DataOutputStream dos = new DataOutputStream(Files.newOutputStream(Paths.get(outPath)))) {
        Nd4j.write(array, dos);
    }
    INDArray in;
    try (DataInputStream dis = new DataInputStream(new FileInputStream(outPath))) {
        in = Nd4j.read(dis);
    }
    assertEquals(AtomicAllocator.getInstance().getAllocationPoint(in).getPointers().getDevicePointer(), AtomicAllocator.getInstance().getAllocationPoint(in).getPointers().getHostPointer());
    assertEquals(array, in);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) PointersPair(org.nd4j.jita.allocator.pointers.PointersPair) DataOutputStream(java.io.DataOutputStream) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) Test(org.junit.Test)

Example 4 with PointersPair

use of org.nd4j.jita.allocator.pointers.PointersPair in project nd4j by deeplearning4j.

the class CudaZeroHandler method promoteObject.

/**
 * This method moves specific object from zero-copy memory to device memory
 *
 * PLEASE NOTE:  DO NOT EVER USE THIS METHOD MANUALLY, UNLESS YOU 100% HAVE TO
 *
 * @return
 */
@Override
public boolean promoteObject(DataBuffer buffer) {
    AllocationPoint dstPoint = AtomicAllocator.getInstance().getAllocationPoint(buffer);
    if (dstPoint.getAllocationStatus() != AllocationStatus.HOST)
        return false;
    if (configuration.getMemoryModel() == Configuration.MemoryModel.DELAYED && dstPoint.getAllocationStatus() == AllocationStatus.HOST) {
        // if we have constant buffer (aka shapeInfo or other constant stuff)
        if (buffer.isConstant()) {
            Nd4j.getConstantHandler().moveToConstantSpace(buffer);
        } else {
            PointersPair pair = memoryProvider.malloc(dstPoint.getShape(), dstPoint, AllocationStatus.DEVICE);
            if (pair != null) {
                Integer deviceId = getDeviceId();
                // log.info("Promoting object to device: [{}]", deviceId);
                dstPoint.getPointers().setDevicePointer(pair.getDevicePointer());
                dstPoint.setAllocationStatus(AllocationStatus.DEVICE);
                deviceAllocations.get(deviceId).put(dstPoint.getObjectId(), dstPoint.getObjectId());
                zeroAllocations.get(dstPoint.getBucketId()).remove(dstPoint.getObjectId());
                deviceMemoryTracker.addToAllocation(Thread.currentThread().getId(), deviceId, AllocationUtils.getRequiredMemory(dstPoint.getShape()));
                dstPoint.tickHostWrite();
            } else
                throw new RuntimeException("PewPew");
        }
    }
    return true;
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) PointersPair(org.nd4j.jita.allocator.pointers.PointersPair) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint)

Example 5 with PointersPair

use of org.nd4j.jita.allocator.pointers.PointersPair in project nd4j by deeplearning4j.

the class CudaZeroHandler method alloc.

/**
 * Allocate specified memory chunk on specified device/host
 *
 * @param targetMode valid arguments are DEVICE, ZERO
 * @param shape
 * @return
 */
@Override
public PointersPair alloc(AllocationStatus targetMode, AllocationPoint point, AllocationShape shape, boolean initialize) {
    long reqMemory = AllocationUtils.getRequiredMemory(shape);
    CudaContext context = getCudaContext();
    switch(targetMode) {
        case HOST:
            {
                if (zeroUseCounter.get() + reqMemory >= configuration.getMaximumZeroAllocation()) {
                    if (reqMemory > configuration.getMaximumZeroAllocation()) {
                        throw new IllegalStateException("You can't allocate more memory, then allowed with configured value: [" + configuration.getMaximumZeroAllocation() + "]");
                    }
                    while (zeroUseCounter.get() + reqMemory >= configuration.getMaximumZeroAllocation()) {
                        try {
                            log.warn("No available [HOST] memory, sleeping for a while...");
                            log.debug("Currently used: [" + zeroUseCounter.get() + "], allocated objects: [" + zeroAllocations.get(0) + "]");
                            Nd4j.getMemoryManager().invokeGc();
                            Thread.sleep(1000);
                        } catch (Exception e) {
                            throw new RuntimeException(e);
                        }
                    }
                }
                PointersPair pair = memoryProvider.malloc(shape, point, targetMode);
                if (initialize) {
                    org.bytedeco.javacpp.Pointer.memset(pair.getHostPointer(), 0, reqMemory);
                    point.tickHostWrite();
                }
                pickupHostAllocation(point);
                return pair;
            }
        case DEVICE:
            {
                int deviceId = getDeviceId();
                PointersPair returnPair = new PointersPair();
                PointersPair tmpPair = new PointersPair();
                // if the initial memory location is device, there's a chance we don't have zero memory allocated
                if (point.getPointers() == null || point.getPointers().getHostPointer() == null) {
                    tmpPair = alloc(AllocationStatus.HOST, point, point.getShape(), initialize);
                    returnPair.setDevicePointer(tmpPair.getHostPointer());
                    returnPair.setHostPointer(tmpPair.getHostPointer());
                    point.setAllocationStatus(AllocationStatus.HOST);
                    point.setPointers(tmpPair);
                }
                /*
                if (reqMemory < configuration.getMaximumSingleHostAllocation()
                                && deviceMemoryTracker.getAllocatedSize(deviceId) + reqMemory < configuration
                                                .getMaximumDeviceAllocation()) {
*/
                if (deviceMemoryTracker.reserveAllocationIfPossible(Thread.currentThread().getId(), deviceId, reqMemory)) {
                    point.setDeviceId(deviceId);
                    PointersPair pair = memoryProvider.malloc(shape, point, targetMode);
                    if (pair != null) {
                        // log.info("PEWPEW");
                        returnPair.setDevicePointer(pair.getDevicePointer());
                        point.setAllocationStatus(AllocationStatus.DEVICE);
                        if (point.getPointers() == null)
                            throw new RuntimeException("WTF?");
                        point.getPointers().setDevicePointer(pair.getDevicePointer());
                        deviceAllocations.get(deviceId).put(point.getObjectId(), point.getObjectId());
                        val p = point.getBucketId();
                        if (p != null) {
                            val m = zeroAllocations.get(point.getBucketId());
                            // m can be null, if that's point from workspace - just no bucketId for it
                            if (m != null)
                                m.remove(point.getObjectId());
                        }
                        deviceMemoryTracker.addToAllocation(Thread.currentThread().getId(), deviceId, reqMemory);
                        // point.tickDeviceWrite();
                        point.tickHostWrite();
                        if (!initialize) {
                            point.tickDeviceWrite();
                            point.tickHostRead();
                        } else {
                            // CudaContext ctx = AtomicAllocator.getInstance().getFlowController().prepareAction(point);
                            nativeOps.memsetAsync(pair.getDevicePointer(), 0, reqMemory, 0, context.getSpecialStream());
                            context.getSpecialStream().synchronize();
                            point.tickDeviceWrite();
                            point.tickHostRead();
                        // AtomicAllocator.getInstance().getFlowController().registerAction(ctx, point);
                        }
                    } else {
                        log.warn("Out of [DEVICE] memory, host memory will be used instead: deviceId: [{}], requested bytes: [{}]", deviceId, reqMemory);
                        // if device memory allocation failed (aka returned NULL), keep using host memory instead
                        returnPair.setDevicePointer(tmpPair.getHostPointer());
                        point.setAllocationStatus(AllocationStatus.HOST);
                        Nd4j.getMemoryManager().invokeGc();
                        try {
                            Thread.sleep(100);
                        } catch (Exception e) {
                        }
                    }
                } else {
                    log.warn("Hard limit on [DEVICE] memory hit, please consider tuning memory parameters, deviceId [{}]", deviceId);
                    Nd4j.getMemoryManager().invokeGc();
                    try {
                        Thread.sleep(100);
                    } catch (Exception e) {
                    }
                }
                return returnPair;
            }
        default:
            throw new IllegalStateException("Can't allocate memory on target [" + targetMode + "]");
    }
}
Also used : lombok.val(lombok.val) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) PointersPair(org.nd4j.jita.allocator.pointers.PointersPair) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException)

Aggregations

PointersPair (org.nd4j.jita.allocator.pointers.PointersPair)8 AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)5 Test (org.junit.Test)3 INDArray (org.nd4j.linalg.api.ndarray.INDArray)3 Pointer (org.bytedeco.javacpp.Pointer)2 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)2 DataInputStream (java.io.DataInputStream)1 DataOutputStream (java.io.DataOutputStream)1 FileInputStream (java.io.FileInputStream)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 lombok.val (lombok.val)1 GarbageBufferReference (org.nd4j.jita.allocator.garbage.GarbageBufferReference)1 AtomicAllocator (org.nd4j.jita.allocator.impl.AtomicAllocator)1 CudaWorkspace (org.nd4j.jita.workspace.CudaWorkspace)1 PagedPointer (org.nd4j.linalg.api.memory.pointers.PagedPointer)1 ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)1 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)1