Search in sources :

Example 51 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class AveragingTests method testMultiDeviceAveraging.

/**
 * This test should be run on multi-gpu system only. On single-gpu system this test will fail
 * @throws Exception
 */
@Test
public void testMultiDeviceAveraging() throws Exception {
    final List<Pair<INDArray, INDArray>> pairs = new ArrayList<>();
    int numDevices = Nd4j.getAffinityManager().getNumberOfDevices();
    AtomicAllocator allocator = AtomicAllocator.getInstance();
    for (int i = 0; i < THREADS; i++) {
        final int order = i;
        Thread thread = new Thread(new Runnable() {

            @Override
            public void run() {
                pairs.add(new Pair<INDArray, INDArray>(Nd4j.valueArrayOf(LENGTH, (double) order), null));
                try {
                    Thread.sleep(100);
                } catch (Exception e) {
                // 
                }
            }
        });
        thread.start();
        thread.join();
    }
    assertEquals(THREADS, pairs.size());
    final List<INDArray> arrays = new ArrayList<>();
    AtomicBoolean hasNonZero = new AtomicBoolean(false);
    for (int i = 0; i < THREADS; i++) {
        INDArray array = pairs.get(i).getKey();
        AllocationPoint point = allocator.getAllocationPoint(array.data());
        if (point.getDeviceId() != 0)
            hasNonZero.set(true);
        arrays.add(array);
    }
    assertEquals(true, hasNonZero.get());
    /*
        // old way of averaging, without further propagation
        INDArray z = Nd4j.create(LENGTH);
        long time1 = System.currentTimeMillis();
        for (int i = 0; i < THREADS; i++) {
            z.addi(arrays.get(i));
        }
        z.divi((float) THREADS);
        CudaContext context = (CudaContext) allocator.getDeviceContext().getContext();
        context.syncOldStream();
        long time2 = System.currentTimeMillis();
        System.out.println("Execution time: " + (time2 - time1));

*/
    long time1 = System.currentTimeMillis();
    INDArray z = Nd4j.averageAndPropagate(arrays);
    long time2 = System.currentTimeMillis();
    System.out.println("Execution time: " + (time2 - time1));
    assertEquals(7.5f, z.getFloat(0), 0.01f);
    assertEquals(7.5f, z.getFloat(10), 0.01f);
    for (int i = 0; i < THREADS; i++) {
        for (int x = 0; x < LENGTH; x++) {
            assertEquals("Failed on array [" + i + "], element [" + x + "]", z.getFloat(0), arrays.get(i).getFloat(x), 0.01f);
        }
    }
}
Also used : AtomicAllocator(org.nd4j.jita.allocator.impl.AtomicAllocator) ArrayList(java.util.ArrayList) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Pair(org.nd4j.linalg.primitives.Pair) Test(org.junit.Test)

Example 52 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class GridExecutionerTest method testReverseFlow1.

// ///////////////////////////////////////////////////////////////////////
// ///////////////////////////////////////////////////////////////////////
/*
    Reverse flow tests
*/
// ///////////////////////////////////////////////////////////////////////
// ///////////////////////////////////////////////////////////////////////
@Test
public void testReverseFlow1() throws Exception {
    CudaGridExecutioner executioner = ((CudaGridExecutioner) Nd4j.getExecutioner());
    INDArray put = Nd4j.create(new double[] { 5, 6 });
    INDArray row1 = Nd4j.linspace(1, 4, 4);
    AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(row1);
    assertEquals(0, executioner.getQueueLength());
    assertEquals(true, point.isActualOnHostSide());
    assertEquals(false, point.isActualOnDeviceSide());
    System.out.println("A: --------------------------");
    row1 = row1.reshape(2, 2);
    assertEquals(true, point.isActualOnHostSide());
    assertEquals(false, point.isActualOnDeviceSide());
    System.out.println("B: --------------------------");
    // ((CudaGridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking();
    row1.putRow(1, put);
    assertEquals(true, point.isActualOnHostSide());
    assertEquals(false, point.isActualOnDeviceSide());
    System.out.println("C: --------------------------");
    assertEquals(1, executioner.getQueueLength());
    executioner.flushQueueBlocking();
    assertEquals(0, executioner.getQueueLength());
    assertEquals(false, point.isActualOnHostSide());
    assertEquals(true, point.isActualOnDeviceSide());
    System.out.println("D: --------------------------");
    // ((CudaGridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking();
    // System.out.println(row1);
    assertArrayEquals(new float[] { 1, 2, 5, 6 }, row1.data().asFloat(), 0.1f);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Test(org.junit.Test)

Example 53 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class GridExecutionerTest method testDupLocality1.

@Test
public void testDupLocality1() throws Exception {
    INDArray array1 = Nd4j.create(new double[] { 1, 2, 3, 4, 5 });
    AllocationPoint point1 = AtomicAllocator.getInstance().getAllocationPoint(array1);
    assertEquals(true, point1.isActualOnDeviceSide());
    assertEquals(false, point1.isActualOnHostSide());
    INDArray array2 = array1.dup();
    // ((GridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking();
    AllocationPoint point2 = AtomicAllocator.getInstance().getAllocationPoint(array2);
    assertEquals(true, point2.isActualOnDeviceSide());
    assertEquals(true, point2.isActualOnHostSide());
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Test(org.junit.Test)

Example 54 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class AsynchronousFlowControllerTest method testDependencies2.

@Test
public void testDependencies2() throws Exception {
    INDArray arrayWrite = Nd4j.create(new float[] { 1f, 2f, 3f });
    INDArray array = Nd4j.create(new float[] { 1f, 2f, 3f });
    // we use synchronization to make sure it completes activeWrite caused by array creation
    String arrayContents = array.toString();
    AllocationPoint point = allocator.getAllocationPoint(array);
    assertPointHasNoDependencies(point);
    CudaContext context = controller.prepareAction(arrayWrite, array);
    controller.registerAction(context, arrayWrite, array);
    assertTrue(controller.hasActiveReads(point));
    assertEquals(-1, controller.hasActiveWrite(point));
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Test(org.junit.Test)

Example 55 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class AsynchronousFlowControllerTest method testDependencies3.

@Test
public void testDependencies3() throws Exception {
    INDArray arrayWrite = Nd4j.create(new float[] { 1f, 2f, 3f });
    INDArray array = Nd4j.create(new float[] { 1f, 2f, 3f });
    // we use synchronization to make sure it completes activeWrite caused by array creation
    String arrayContents = array.toString();
    AllocationPoint point = allocator.getAllocationPoint(array);
    AllocationPoint pointWrite = allocator.getAllocationPoint(arrayWrite);
    assertPointHasNoDependencies(point);
    CudaContext context = controller.prepareAction(arrayWrite, array);
    controller.registerAction(context, arrayWrite, array);
    assertTrue(controller.hasActiveReads(point));
    assertFalse(controller.hasActiveReads(pointWrite));
    assertNotEquals(-1, controller.hasActiveWrite(pointWrite));
    controller.synchronizeReadLanes(point);
    assertPointHasNoDependencies(point);
    assertEquals(-1, controller.hasActiveWrite(pointWrite));
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Test(org.junit.Test)

Aggregations

AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)67 INDArray (org.nd4j.linalg.api.ndarray.INDArray)33 Test (org.junit.Test)31 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)24 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)15 DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)11 ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)11 AtomicAllocator (org.nd4j.jita.allocator.impl.AtomicAllocator)7 BaseCudaDataBuffer (org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer)7 Pointer (org.bytedeco.javacpp.Pointer)6 AllocationShape (org.nd4j.jita.allocator.impl.AllocationShape)5 PointersPair (org.nd4j.jita.allocator.pointers.PointersPair)5 MemoryWorkspace (org.nd4j.linalg.api.memory.MemoryWorkspace)4 JCublasNDArray (org.nd4j.linalg.jcublas.JCublasNDArray)3 CudaDoubleDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)3 CompressedDataBuffer (org.nd4j.linalg.compression.CompressedDataBuffer)2 DeviceLocalNDArray (org.nd4j.linalg.util.DeviceLocalNDArray)2 DataInputStream (java.io.DataInputStream)1 DataOutputStream (java.io.DataOutputStream)1 FileInputStream (java.io.FileInputStream)1