Search in sources :

Example 11 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class GridExecutionerTest method testDupLocality2.

@Test
public void testDupLocality2() throws Exception {
    INDArray array2 = Nd4j.createUninitialized(new int[] { 10, 10 }, 'c');
    // ((GridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking();
    AllocationPoint point2 = AtomicAllocator.getInstance().getAllocationPoint(array2);
    assertEquals(true, point2.isActualOnDeviceSide());
    assertEquals(true, point2.isActualOnHostSide());
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Test(org.junit.Test)

Example 12 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class GridExecutionerTest method testDupLocality4.

@Test
public void testDupLocality4() throws Exception {
    int nIn = 8;
    int layerSize = 10;
    int nOut = 4;
    INDArray in = Nd4j.ones(1, 10).dup('c');
    AllocationPoint point1 = AtomicAllocator.getInstance().getAllocationPoint(in);
    assertEquals(true, point1.isEnqueued());
    // assertEquals(1, ((GridExecutioner) Nd4j.getExecutioner()).getQueueLength());
    INDArray out = Nd4j.zeros(1, 10).dup('c');
    AllocationPoint point1A = AtomicAllocator.getInstance().getAllocationPoint(in);
    AllocationPoint point2 = AtomicAllocator.getInstance().getAllocationPoint(out);
    assertEquals(1, ((GridExecutioner) Nd4j.getExecutioner()).getQueueLength());
    assertTrue(point1 == point1A);
    assertEquals(true, point2.isEnqueued());
    assertEquals(false, point1.isEnqueued());
    assertEquals(Nd4j.ones(1, 10), in);
    assertEquals(Nd4j.zeros(1, 10), out);
    INDArray inCopy = in.dup('c');
    AllocationPoint point3 = AtomicAllocator.getInstance().getAllocationPoint(inCopy);
    assertEquals(false, point2.isEnqueued());
    assertEquals(true, point3.isEnqueued());
    assertEquals(true, point1.isEnqueued());
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Test(org.junit.Test)

Example 13 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class GridExecutionerTest method testDupLocality3.

@Test
public void testDupLocality3() throws Exception {
    INDArray array1 = Nd4j.create(new float[] { 1f, 1f, 1f, 1f, 1f });
    INDArray exp1 = Nd4j.create(new float[] { 0f, 1f, 1f, 1f, 1f });
    INDArray exp2 = Nd4j.create(new float[] { 1f, 1f, 1f, 1f, 1f });
    INDArray array2 = array1.dup();
    AllocationPoint point1 = AtomicAllocator.getInstance().getAllocationPoint(array1);
    AllocationPoint point2 = AtomicAllocator.getInstance().getAllocationPoint(array2);
    assertTrue(point1.isActualOnDeviceSide());
    assertTrue(point2.isActualOnDeviceSide());
    assertTrue(point1.isEnqueued());
    assertTrue(point2.isEnqueued());
    array1.putScalar(0, 0f);
    assertEquals(0, ((GridExecutioner) Nd4j.getExecutioner()).getQueueLength());
    assertFalse(point1.isActualOnDeviceSide());
    assertEquals(exp1, array1);
    assertEquals(exp2, array2);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Test(org.junit.Test)

Example 14 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class CudaMemoryManager method memset.

@Override
public void memset(INDArray array) {
    if (array.isView()) {
        array.assign(0.0);
        // we don't want any mGRID activations here
        Nd4j.getExecutioner().commit();
        return;
    }
    // we want to be sure we have no trails left in mGRID
    Nd4j.getExecutioner().push();
    AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(array);
    if (point.getAllocationStatus() == AllocationStatus.DEVICE) {
        CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
        NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(AtomicAllocator.getInstance().getPointer(array, context), 0, array.data().length() * Nd4j.sizeOfDataType(array.data().dataType()), 0, context.getOldStream());
        // better be safe then sorry
        context.getOldStream().synchronize();
        point.tickDeviceWrite();
    } else if (point.getAllocationStatus() == AllocationStatus.HOST) {
        Nd4j.getExecutioner().commit();
        // just casual memset
        Pointer.memset(AtomicAllocator.getInstance().getHostPointer(array), 0, array.data().length() * Nd4j.sizeOfDataType(array.data().dataType()));
        point.tickHostWrite();
    }
}
Also used : CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint)

Example 15 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class CudaFullCachingProvider method free.

/**
 * This method frees specific chunk of memory, described by AllocationPoint passed in
 *
 * PLEASE NOTE: This method can actually ignore free, and keep released memory chunk for future reuse.
 *
 * @param point
 */
@Override
public void free(AllocationPoint point) {
    if (point.getAllocationStatus() == AllocationStatus.DEVICE) {
        if (point.isConstant())
            return;
        AllocationShape shape = point.getShape();
        int deviceId = point.getDeviceId();
        long address = point.getDevicePointer().address();
        long reqMemory = AllocationUtils.getRequiredMemory(shape);
        if (reqMemory > CudaEnvironment.getInstance().getConfiguration().getMaximumDeviceCacheableLength() || deviceCachedAmount.get(deviceId).get() >= CudaEnvironment.getInstance().getConfiguration().getMaximumHostCache()) {
            // log.info("DEVICE_{} memory purging: {} bytes; MS: {}; MT: {}", deviceId, reqMemory, MAX_GPU_ALLOCATION, MAX_GPU_CACHE);
            super.free(point);
            return;
        }
        // log.info("Saving HOST memory into cache...");
        ensureDeviceCacheHolder(deviceId, shape);
        CacheHolder cache = deviceCache.get(deviceId).get(shape);
        if (point.getDeviceId() != deviceId)
            throw new RuntimeException("deviceId changed!");
        // memory chunks < threshold will be cached no matter what
        if (reqMemory <= FORCED_CACHE_THRESHOLD) {
            cache.put(new CudaPointer(point.getDevicePointer().address()));
            return;
        } else {
            long cacheEntries = cache.size();
            long cacheHeight = deviceCache.get(deviceId).size();
            // total memory allocated within this bucket
            long cacheDepth = cacheEntries * reqMemory;
            // if (cacheDepth < MAX_CACHED_MEMORY / cacheHeight) {
            cache.put(new CudaPointer(point.getDevicePointer().address()));
            return;
        // } else {
        // super.free(point);
        // }
        }
    }
    super.free(point);
}
Also used : AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Aggregations

AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)67 INDArray (org.nd4j.linalg.api.ndarray.INDArray)33 Test (org.junit.Test)31 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)24 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)15 DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)11 ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)11 AtomicAllocator (org.nd4j.jita.allocator.impl.AtomicAllocator)7 BaseCudaDataBuffer (org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer)7 Pointer (org.bytedeco.javacpp.Pointer)6 AllocationShape (org.nd4j.jita.allocator.impl.AllocationShape)5 PointersPair (org.nd4j.jita.allocator.pointers.PointersPair)5 MemoryWorkspace (org.nd4j.linalg.api.memory.MemoryWorkspace)4 JCublasNDArray (org.nd4j.linalg.jcublas.JCublasNDArray)3 CudaDoubleDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)3 CompressedDataBuffer (org.nd4j.linalg.compression.CompressedDataBuffer)2 DeviceLocalNDArray (org.nd4j.linalg.util.DeviceLocalNDArray)2 DataInputStream (java.io.DataInputStream)1 DataOutputStream (java.io.DataOutputStream)1 FileInputStream (java.io.FileInputStream)1