Search in sources :

Example 36 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class CudaZeroHandler method pickupHostAllocation.

private void pickupHostAllocation(AllocationPoint point) {
    int numBuckets = configuration.getNumberOfGcThreads();
    long bucketId = RandomUtils.nextInt(0, numBuckets);
    long reqMemory = AllocationUtils.getRequiredMemory(point.getShape());
    zeroUseCounter.addAndGet(reqMemory);
    point.setBucketId(bucketId);
    if (!zeroAllocations.containsKey(bucketId)) {
        log.debug("Creating bucketID: " + bucketId);
        synchronized (this) {
            if (!zeroAllocations.containsKey(bucketId)) {
                zeroAllocations.put(bucketId, new ConcurrentHashMap<Long, Long>());
            }
        }
    }
    zeroAllocations.get(bucketId).put(point.getObjectId(), point.getObjectId());
}
Also used : AtomicLong(java.util.concurrent.atomic.AtomicLong) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint)

Example 37 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class CudaZeroHandler method memcpyDevice.

@Override
public void memcpyDevice(DataBuffer dstBuffer, Pointer srcPointer, long length, long dstOffset, CudaContext context) {
    // log.info("Memcpy device: {} bytes ", length);
    AllocationPoint point = ((BaseCudaDataBuffer) dstBuffer).getAllocationPoint();
    Pointer dP = new CudaPointer((point.getPointers().getDevicePointer().address()) + dstOffset);
    if (nativeOps.memcpyAsync(dP, srcPointer, length, CudaConstants.cudaMemcpyDeviceToDevice, context.getOldStream()) == 0)
        throw new ND4JIllegalStateException("memcpyAsync failed");
    point.tickDeviceWrite();
}
Also used : BaseCudaDataBuffer(org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) Pointer(org.bytedeco.javacpp.Pointer) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Example 38 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class SporadicTests method testReplicate5.

@Test
public void testReplicate5() throws Exception {
    INDArray array = Nd4j.create(3, 3);
    log.error("Original: Host pt: {}; Dev pt: {}", AtomicAllocator.getInstance().getAllocationPoint(array).getPointers().getHostPointer().address(), AtomicAllocator.getInstance().getAllocationPoint(array).getPointers().getDevicePointer().address());
    final DeviceLocalNDArray locals = new DeviceLocalNDArray(array);
    int numDevices = Nd4j.getAffinityManager().getNumberOfDevices();
    for (int t = 0; t < numDevices; t++) {
        log.error("deviceId: {}; Host pt: {}; Dev pt: {}", t, AtomicAllocator.getInstance().getAllocationPoint(locals.get(t)).getPointers().getHostPointer().address(), AtomicAllocator.getInstance().getAllocationPoint(locals.get(t)).getPointers().getDevicePointer().address());
    }
    Thread[] threads = new Thread[numDevices];
    for (int t = 0; t < numDevices; t++) {
        threads[t] = new Thread(new Runnable() {

            @Override
            public void run() {
                AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(locals.get());
                log.error("deviceId: {}; Host pt: {}; Dev pt: {}", Nd4j.getAffinityManager().getDeviceForCurrentThread(), point.getPointers().getHostPointer().address(), point.getPointers().getDevicePointer().address());
            }
        });
        threads[t].start();
    }
    for (int t = 0; t < numDevices; t++) {
        threads[t].join();
    }
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) DeviceLocalNDArray(org.nd4j.linalg.util.DeviceLocalNDArray) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Test(org.junit.Test)

Example 39 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class SporadicTests method testReplicate3.

@Test
public void testReplicate3() throws Exception {
    INDArray array = Nd4j.ones(10, 10);
    INDArray exp = Nd4j.create(10).assign(10f);
    log.error("Array length: {}", array.length());
    int numDevices = Nd4j.getAffinityManager().getNumberOfDevices();
    final DeviceLocalNDArray locals = new DeviceLocalNDArray(array);
    Thread[] threads = new Thread[numDevices];
    for (int t = 0; t < numDevices; t++) {
        threads[t] = new Thread(new Runnable() {

            @Override
            public void run() {
                AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(locals.get());
                log.error("Point deviceId: {}; current deviceId: {}", point.getDeviceId(), Nd4j.getAffinityManager().getDeviceForCurrentThread());
                INDArray sum = locals.get().sum(1);
                INDArray localExp = Nd4j.create(10).assign(10f);
                assertEquals(localExp, sum);
            }
        });
        threads[t].start();
    }
    for (int t = 0; t < numDevices; t++) {
        threads[t].join();
    }
    for (int t = 0; t < numDevices; t++) {
        AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(locals.get(t));
        log.error("Point deviceId: {}; current deviceId: {}", point.getDeviceId(), Nd4j.getAffinityManager().getDeviceForCurrentThread());
        exp.addi(0.0f);
        assertEquals(exp, locals.get(t).sum(0));
        log.error("Point after: {}", point.getDeviceId());
    }
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) DeviceLocalNDArray(org.nd4j.linalg.util.DeviceLocalNDArray) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Test(org.junit.Test)

Example 40 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class SporadicTests method testLocality.

@Test
public void testLocality() {
    INDArray array = Nd4j.create(new float[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 });
    AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(array);
    assertEquals(true, point.isActualOnDeviceSide());
    INDArray arrayR = array.reshape('f', 3, 3);
    AllocationPoint pointR = AtomicAllocator.getInstance().getAllocationPoint(arrayR);
    assertEquals(true, pointR.isActualOnDeviceSide());
    INDArray arrayS = Shape.newShapeNoCopy(array, new int[] { 3, 3 }, true);
    AllocationPoint pointS = AtomicAllocator.getInstance().getAllocationPoint(arrayS);
    assertEquals(true, pointS.isActualOnDeviceSide());
    INDArray arrayL = Nd4j.create(new int[] { 3, 4, 4, 4 }, 'c');
    AllocationPoint pointL = AtomicAllocator.getInstance().getAllocationPoint(arrayL);
    assertEquals(true, pointL.isActualOnDeviceSide());
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Test(org.junit.Test)

Aggregations

AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)67 INDArray (org.nd4j.linalg.api.ndarray.INDArray)33 Test (org.junit.Test)31 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)24 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)15 DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)11 ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)11 AtomicAllocator (org.nd4j.jita.allocator.impl.AtomicAllocator)7 BaseCudaDataBuffer (org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer)7 Pointer (org.bytedeco.javacpp.Pointer)6 AllocationShape (org.nd4j.jita.allocator.impl.AllocationShape)5 PointersPair (org.nd4j.jita.allocator.pointers.PointersPair)5 MemoryWorkspace (org.nd4j.linalg.api.memory.MemoryWorkspace)4 JCublasNDArray (org.nd4j.linalg.jcublas.JCublasNDArray)3 CudaDoubleDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)3 CompressedDataBuffer (org.nd4j.linalg.compression.CompressedDataBuffer)2 DeviceLocalNDArray (org.nd4j.linalg.util.DeviceLocalNDArray)2 DataInputStream (java.io.DataInputStream)1 DataOutputStream (java.io.DataOutputStream)1 FileInputStream (java.io.FileInputStream)1