Search in sources :

Example 26 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class JCublasNDArray method leverageTo.

@Override
public INDArray leverageTo(String id) {
    if (!isAttached()) {
        // log.info("Skipping detached");
        return this;
    }
    if (!Nd4j.getWorkspaceManager().checkIfWorkspaceExists(id)) {
        // log.info("Skipping non-existent");
        return this;
    }
    MemoryWorkspace current = Nd4j.getMemoryManager().getCurrentWorkspace();
    MemoryWorkspace target = Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread(id);
    if (current == target) {
        // log.info("Skipping equals A");
        return this;
    }
    if (this.data.getParentWorkspace() == target) {
        // log.info("Skipping equals B");
        return this;
    }
    Nd4j.getMemoryManager().setCurrentWorkspace(target);
    // log.info("Leveraging...");
    INDArray copy = null;
    if (!this.isView()) {
        // if (1 < 0) {
        Nd4j.getExecutioner().commit();
        DataBuffer buffer = Nd4j.createBuffer(this.lengthLong(), false);
        AllocationPoint pointDst = AtomicAllocator.getInstance().getAllocationPoint(buffer);
        AllocationPoint pointSrc = AtomicAllocator.getInstance().getAllocationPoint(this.data);
        CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(pointDst, pointSrc);
        /*
            if (NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(pointDst.getDevicePointer(), 0, 1, 0, context.getOldStream()) == 0)
                throw new ND4JIllegalStateException("memsetAsync 1 failed");

            context.syncOldStream();

            if (NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(pointSrc.getDevicePointer(), 0, 1, 0, context.getOldStream()) == 0)
                throw new ND4JIllegalStateException("memsetAsync 2 failed");

            context.syncOldStream();
*/
        if (pointSrc.isActualOnDeviceSide()) {
            if (NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getDevicePointer(), pointSrc.getDevicePointer(), this.lengthLong() * Nd4j.sizeOfDataType(buffer.dataType()), CudaConstants.cudaMemcpyDeviceToDevice, context.getOldStream()) == 0)
                throw new ND4JIllegalStateException("memcpyAsync failed");
        } else {
            if (NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getDevicePointer(), pointSrc.getHostPointer(), this.lengthLong() * Nd4j.sizeOfDataType(buffer.dataType()), CudaConstants.cudaMemcpyHostToDevice, context.getOldStream()) == 0)
                throw new ND4JIllegalStateException("memcpyAsync failed");
        }
        context.syncOldStream();
        copy = Nd4j.createArrayFromShapeBuffer(buffer, this.shapeInfoDataBuffer());
        // tag buffer as valid on device side
        pointDst.tickHostRead();
        pointDst.tickDeviceWrite();
        AtomicAllocator.getInstance().getFlowController().registerAction(context, pointDst, pointSrc);
    } else {
        copy = this.dup(this.ordering());
        Nd4j.getExecutioner().commit();
    }
    Nd4j.getMemoryManager().setCurrentWorkspace(current);
    return copy;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) MemoryWorkspace(org.nd4j.linalg.api.memory.MemoryWorkspace) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer)

Example 27 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class JCublasNDArray method migrate.

/**
 * This method pulls this INDArray into current Workspace.
 *
 * PLEASE NOTE: If there's no current Workspace - INDArray returned as is
 *
 * @return
 */
@Override
public INDArray migrate() {
    MemoryWorkspace current = Nd4j.getMemoryManager().getCurrentWorkspace();
    if (current == null)
        return this;
    INDArray copy = null;
    if (!this.isView()) {
        Nd4j.getExecutioner().commit();
        DataBuffer buffer = Nd4j.createBuffer(this.lengthLong(), false);
        AllocationPoint pointDst = AtomicAllocator.getInstance().getAllocationPoint(buffer);
        AllocationPoint pointSrc = AtomicAllocator.getInstance().getAllocationPoint(this.data);
        // CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
        CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(pointDst, pointSrc);
        if (pointSrc.isActualOnDeviceSide()) {
            if (NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getDevicePointer(), pointSrc.getDevicePointer(), this.lengthLong() * Nd4j.sizeOfDataType(buffer.dataType()), CudaConstants.cudaMemcpyDeviceToDevice, context.getOldStream()) == 0)
                throw new ND4JIllegalStateException("memcpyAsync failed");
        } else {
            if (NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(pointDst.getDevicePointer(), pointSrc.getHostPointer(), this.lengthLong() * Nd4j.sizeOfDataType(buffer.dataType()), CudaConstants.cudaMemcpyHostToDevice, context.getOldStream()) == 0)
                throw new ND4JIllegalStateException("memcpyAsync failed");
        }
        context.syncOldStream();
        if (pointDst.getDeviceId() != Nd4j.getMemoryManager().getCurrentWorkspace().getDeviceId()) {
            // log.info("Swapping [{}] -> [{}]", pointDst.getDeviceId(), Nd4j.getMemoryManager().getCurrentWorkspace().getDeviceId());
            pointDst.setDeviceId(Nd4j.getMemoryManager().getCurrentWorkspace().getDeviceId());
        }
        copy = Nd4j.createArrayFromShapeBuffer(buffer, this.shapeInfoDataBuffer());
        // tag buffer as valid on device side
        pointDst.tickHostRead();
        pointDst.tickDeviceWrite();
        AtomicAllocator.getInstance().getFlowController().registerAction(context, pointDst, pointSrc);
    } else {
        copy = this.dup(this.ordering());
    }
    return copy;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) MemoryWorkspace(org.nd4j.linalg.api.memory.MemoryWorkspace) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer)

Example 28 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class CudaDirectProviderTest method mallocDevice.

@Test
public void mallocDevice() throws Exception {
    CudaDirectProvider provider = new CudaDirectProvider();
    AllocationShape shape = new AllocationShape(300000, 4, DataBuffer.Type.FLOAT);
    AllocationPoint point = new AllocationPoint();
    point.setShape(shape);
    point.setPointers(provider.malloc(shape, point, AllocationStatus.DEVICE));
    System.out.println("Allocated...");
    Thread.sleep(1000);
    point.setAllocationStatus(AllocationStatus.DEVICE);
    provider.free(point);
    System.out.println("Deallocated...");
    Thread.sleep(1000);
}
Also used : AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Test(org.junit.Test)

Example 29 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class AsynchronousFlowController method setWriteLane.

protected void setWriteLane(INDArray array, cudaEvent_t event) {
    AllocationPoint point = allocator.getAllocationPoint(array);
    point.setWriteLane(event);
}
Also used : AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint)

Example 30 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class AsynchronousFlowController method setReadLane.

protected void setReadLane(INDArray array, cudaEvent_t event) {
    AllocationPoint point = allocator.getAllocationPoint(array);
    point.addReadLane(event);
}
Also used : AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint)

Aggregations

AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)67 INDArray (org.nd4j.linalg.api.ndarray.INDArray)33 Test (org.junit.Test)31 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)24 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)15 DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)11 ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)11 AtomicAllocator (org.nd4j.jita.allocator.impl.AtomicAllocator)7 BaseCudaDataBuffer (org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer)7 Pointer (org.bytedeco.javacpp.Pointer)6 AllocationShape (org.nd4j.jita.allocator.impl.AllocationShape)5 PointersPair (org.nd4j.jita.allocator.pointers.PointersPair)5 MemoryWorkspace (org.nd4j.linalg.api.memory.MemoryWorkspace)4 JCublasNDArray (org.nd4j.linalg.jcublas.JCublasNDArray)3 CudaDoubleDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)3 CompressedDataBuffer (org.nd4j.linalg.compression.CompressedDataBuffer)2 DeviceLocalNDArray (org.nd4j.linalg.util.DeviceLocalNDArray)2 DataInputStream (java.io.DataInputStream)1 DataOutputStream (java.io.DataOutputStream)1 FileInputStream (java.io.FileInputStream)1