Search in sources :

Example 16 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class JCublasNDArrayFactory method average.

@Override
public INDArray average(INDArray target, INDArray[] arrays) {
    if (arrays == null || arrays.length == 0)
        throw new RuntimeException("Input arrays are missing");
    if (arrays.length == 1)
        return target.assign(arrays[0]);
    // we do averaging on GPU only if ALL devices have p2p links
    if (nativeOps.isP2PAvailable() && CudaEnvironment.getInstance().getConfiguration().isCrossDeviceAccessAllowed()) {
        Nd4j.getExecutioner().push();
        long len = target != null ? target.lengthLong() : arrays[0].lengthLong();
        AtomicAllocator allocator = AtomicAllocator.getInstance();
        CudaContext context = allocator.getFlowController().prepareAction(target, arrays);
        PointerPointer extras = new // not used
        PointerPointer(// not used
        null, context.getOldStream(), allocator.getDeviceIdPointer(), new CudaPointer(0));
        Pointer z = target == null ? null : AtomicAllocator.getInstance().getPointer(target, context);
        long[] xPointers = new long[arrays.length];
        for (int i = 0; i < arrays.length; i++) {
            if (arrays[i].elementWiseStride() != 1)
                throw new ND4JIllegalStateException("Native averaging is applicable only to continuous INDArrays");
            if (arrays[i].lengthLong() != len)
                throw new ND4JIllegalStateException("All arrays should have equal length for averaging");
            AllocationPoint point = allocator.getAllocationPoint(arrays[i]);
            xPointers[i] = point.getPointers().getDevicePointer().address();
            point.tickDeviceWrite();
        }
        CudaDoubleDataBuffer tempX = new CudaDoubleDataBuffer(arrays.length);
        allocator.memcpyBlocking(tempX, new LongPointer(xPointers), xPointers.length * 8, 0);
        PointerPointer x = new PointerPointer(AtomicAllocator.getInstance().getPointer(tempX, context));
        if (arrays[0].data().dataType() == DataBuffer.Type.DOUBLE) {
            nativeOps.averageDouble(extras, x, target == null ? null : (DoublePointer) z, arrays.length, len, true);
        } else if (arrays[0].data().dataType() == DataBuffer.Type.FLOAT) {
            nativeOps.averageFloat(extras, x, target == null ? null : (FloatPointer) z, arrays.length, len, true);
        } else {
            nativeOps.averageHalf(extras, x, target == null ? null : (ShortPointer) z, arrays.length, len, true);
        }
        allocator.getFlowController().registerAction(context, target, arrays);
        tempX.address();
        return target;
    } else {
        // otherwise we do averging on CPU side
        /**
         * We expect all operations are complete at this point
         */
        long len = target == null ? arrays[0].lengthLong() : target.lengthLong();
        CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
        PointerPointer dataPointers = new PointerPointer(arrays.length);
        PointerPointer extras = new // not used
        PointerPointer(// not used
        null, context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer(), new CudaPointer(1));
        for (int i = 0; i < arrays.length; i++) {
            Nd4j.getCompressor().autoDecompress(arrays[i]);
            if (arrays[i].elementWiseStride() != 1)
                throw new ND4JIllegalStateException("Native averaging is applicable only to continuous INDArrays");
            if (arrays[i].lengthLong() != len)
                throw new ND4JIllegalStateException("All arrays should have equal length for averaging");
            dataPointers.put(i, AtomicAllocator.getInstance().getHostPointer(arrays[i]));
        }
        if (arrays[0].data().dataType() == DataBuffer.Type.DOUBLE) {
            nativeOps.averageDouble(extras, dataPointers, target == null ? null : (DoublePointer) AtomicAllocator.getInstance().getHostPointer(target), arrays.length, len, true);
        } else if (arrays[0].data().dataType() == DataBuffer.Type.FLOAT) {
            nativeOps.averageFloat(extras, dataPointers, target == null ? null : (FloatPointer) AtomicAllocator.getInstance().getHostPointer(target), arrays.length, len, true);
        } else {
            nativeOps.averageHalf(extras, dataPointers, target == null ? null : (ShortPointer) AtomicAllocator.getInstance().getHostPointer(target), arrays.length, len, true);
        }
        if (target != null)
            AtomicAllocator.getInstance().getAllocationPoint(target).tickHostWrite();
        // TODO: make propagation optional maybe?
        if (true) {
            for (int i = 0; i < arrays.length; i++) {
                AtomicAllocator.getInstance().getAllocationPoint(arrays[i]).tickHostWrite();
            }
        }
        return target;
    }
}
Also used : AtomicAllocator(org.nd4j.jita.allocator.impl.AtomicAllocator) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Example 17 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class AsynchronousFlowControllerTest method testDependencies4.

@Test
public void testDependencies4() throws Exception {
    INDArray arrayWrite = Nd4j.create(new float[] { 1f, 2f, 3f });
    INDArray array = Nd4j.create(new float[] { 1f, 2f, 3f });
    // we use synchronization to make sure it completes activeWrite caused by array creation
    String arrayContents = array.toString();
    AllocationPoint point = allocator.getAllocationPoint(array);
    AllocationPoint pointWrite = allocator.getAllocationPoint(arrayWrite);
    assertPointHasNoDependencies(point);
    controller.cutTail();
    CudaContext context = controller.prepareAction(arrayWrite, array);
    controller.registerAction(context, arrayWrite, array);
    assertTrue(controller.hasActiveReads(point));
    assertFalse(controller.hasActiveReads(pointWrite));
    assertNotEquals(-1, controller.hasActiveWrite(pointWrite));
    Configuration configuration = CudaEnvironment.getInstance().getConfiguration();
    controller.sweepTail();
    assertTrue(controller.hasActiveReads(point));
    assertFalse(controller.hasActiveReads(pointWrite));
    assertNotEquals(-1, controller.hasActiveWrite(pointWrite));
    controller.sweepTail();
    assertTrue(controller.hasActiveReads(point));
    assertFalse(controller.hasActiveReads(pointWrite));
    assertNotEquals(-1, controller.hasActiveWrite(pointWrite));
    for (int i = 0; i < configuration.getCommandQueueLength(); i++) controller.sweepTail();
    assertPointHasNoDependencies(point);
    assertPointHasNoDependencies(pointWrite);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) Configuration(org.nd4j.jita.conf.Configuration) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Test(org.junit.Test)

Example 18 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class AsynchronousFlowControllerTest method testDependencies1.

@Test
public void testDependencies1() throws Exception {
    INDArray array = Nd4j.create(new float[] { 1f, 2f, 3f });
    // we use synchronization to make sure it completes activeWrite caused by array creation
    String arrayContents = array.toString();
    AllocationPoint point = allocator.getAllocationPoint(array);
    assertPointHasNoDependencies(point);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Test(org.junit.Test)

Example 19 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class DelayedMemoryTest method testDelayedAllocation4.

@Test
public void testDelayedAllocation4() throws Exception {
    INDArray array = Nd4j.create(new float[] { 1f, 2f, 3f, 4f, 5f });
    AllocationPoint pointer = AtomicAllocator.getInstance().getAllocationPoint(array);
    PointersPair pair = pointer.getPointers();
    // pointers should be equal, device memory wasn't allocated yet
    assertEquals(pair.getDevicePointer(), pair.getHostPointer());
    assertEquals(2.0f, array.getFloat(1), 0.001f);
    assertEquals(pair.getDevicePointer(), pair.getHostPointer());
    String temp = System.getProperty("java.io.tmpdir");
    String outPath = FilenameUtils.concat(temp, "dl4jtestserialization.bin");
    try (DataOutputStream dos = new DataOutputStream(Files.newOutputStream(Paths.get(outPath)))) {
        Nd4j.write(array, dos);
    }
    INDArray in;
    try (DataInputStream dis = new DataInputStream(new FileInputStream(outPath))) {
        in = Nd4j.read(dis);
    }
    assertEquals(AtomicAllocator.getInstance().getAllocationPoint(in).getPointers().getDevicePointer(), AtomicAllocator.getInstance().getAllocationPoint(in).getPointers().getHostPointer());
    assertEquals(array, in);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) PointersPair(org.nd4j.jita.allocator.pointers.PointersPair) DataOutputStream(java.io.DataOutputStream) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) Test(org.junit.Test)

Example 20 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class DelayedMemoryTest method testDelayedDup1.

@Test
public void testDelayedDup1() throws Exception {
    INDArray array = Nd4j.linspace(1, 1000, 1000).reshape(10, 10, 10);
    AllocationPoint pointShape = AtomicAllocator.getInstance().getAllocationPoint(array.shapeInfoDataBuffer());
    AllocationPoint pointArray = AtomicAllocator.getInstance().getAllocationPoint(array);
    assertEquals(AllocationStatus.HOST, pointArray.getAllocationStatus());
    assertEquals(AllocationStatus.HOST, pointShape.getAllocationStatus());
    float sum = array.sumNumber().floatValue();
    pointShape = AtomicAllocator.getInstance().getAllocationPoint(array.shapeInfoDataBuffer());
    pointArray = AtomicAllocator.getInstance().getAllocationPoint(array);
    assertEquals(AllocationStatus.DEVICE, pointArray.getAllocationStatus());
    assertEquals(AllocationStatus.CONSTANT, pointShape.getAllocationStatus());
    INDArray dup = array.dup();
    AllocationPoint dupShape = AtomicAllocator.getInstance().getAllocationPoint(dup.shapeInfoDataBuffer());
    AllocationPoint dupArray = AtomicAllocator.getInstance().getAllocationPoint(dup);
    assertEquals(AllocationStatus.DEVICE, dupArray.getAllocationStatus());
    assertEquals(AllocationStatus.CONSTANT, dupShape.getAllocationStatus());
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Test(org.junit.Test)

Aggregations

AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)67 INDArray (org.nd4j.linalg.api.ndarray.INDArray)33 Test (org.junit.Test)31 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)24 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)15 DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)11 ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)11 AtomicAllocator (org.nd4j.jita.allocator.impl.AtomicAllocator)7 BaseCudaDataBuffer (org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer)7 Pointer (org.bytedeco.javacpp.Pointer)6 AllocationShape (org.nd4j.jita.allocator.impl.AllocationShape)5 PointersPair (org.nd4j.jita.allocator.pointers.PointersPair)5 MemoryWorkspace (org.nd4j.linalg.api.memory.MemoryWorkspace)4 JCublasNDArray (org.nd4j.linalg.jcublas.JCublasNDArray)3 CudaDoubleDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)3 CompressedDataBuffer (org.nd4j.linalg.compression.CompressedDataBuffer)2 DeviceLocalNDArray (org.nd4j.linalg.util.DeviceLocalNDArray)2 DataInputStream (java.io.DataInputStream)1 DataOutputStream (java.io.DataOutputStream)1 FileInputStream (java.io.FileInputStream)1