Search in sources :

Example 31 with CudaPointer

use of org.nd4j.jita.allocator.pointers.CudaPointer in project nd4j by deeplearning4j.

the class BaseCudaDataBuffer method reallocate.

@Override
public DataBuffer reallocate(long length) {
    // we want to be sure this array isn't used anywhere RIGHT AT THIS MOMENT
    Nd4j.getExecutioner().commit();
    AllocationPoint old = allocationPoint;
    allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, dataType()), false);
    trackingPoint = allocationPoint.getObjectId();
    switch(dataType()) {
        case DOUBLE:
            this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length, 0).asDoublePointer();
            indexer = DoubleIndexer.create((DoublePointer) pointer);
            break;
        case FLOAT:
            this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length, 0).asFloatPointer();
            indexer = FloatIndexer.create((FloatPointer) pointer);
            break;
        case HALF:
            this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length, 0).asShortPointer();
            indexer = ShortIndexer.create((ShortPointer) pointer);
            break;
        case INT:
            this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length, 0).asIntPointer();
            indexer = IntIndexer.create((IntPointer) pointer);
            break;
        default:
            throw new UnsupportedOperationException();
    }
    CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
    NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(allocationPoint.getDevicePointer(), 0, length * elementSize, 0, context.getSpecialStream());
    if (old.isActualOnDeviceSide()) {
        NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(allocationPoint.getDevicePointer(), old.getDevicePointer(), this.length * elementSize, CudaConstants.cudaMemcpyDeviceToDevice, context.getSpecialStream());
    } else if (old.isActualOnHostSide()) {
        NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(allocationPoint.getDevicePointer(), old.getHostPointer(), this.length * elementSize, CudaConstants.cudaMemcpyHostToDevice, context.getSpecialStream());
    }
    context.getSpecialStream().synchronize();
    allocationPoint.tickDeviceWrite();
    // we're keeping pointer reference for JVM
    pointer.address();
    // this.length = length;
    if (isAttached()) {
    // do nothing here, that's workspaces
    } else {
        AtomicAllocator.getInstance().freeMemory(old);
    }
    return this;
}
Also used : AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Example 32 with CudaPointer

use of org.nd4j.jita.allocator.pointers.CudaPointer in project nd4j by deeplearning4j.

the class BaseCudaDataBuffer method read.

@Override
public void read(DataInputStream s) {
    try {
        // log.info("Restoring CUDA databuffer");
        // skip allocationMode
        s.readUTF();
        allocationMode = AllocationMode.JAVACPP;
        int locLength = s.readInt();
        boolean reallocate = locLength != length || indexer == null;
        length = locLength;
        Type t = Type.valueOf(s.readUTF());
        // log.info("Restoring buffer ["+t+"] of length ["+ length+"]");
        if (globalType == null && Nd4j.dataType() != null) {
            globalType = Nd4j.dataType();
        }
        if (t != globalType && t != Type.INT && Nd4j.sizeOfDataType(globalType) < Nd4j.sizeOfDataType(t)) {
            log.warn("Loading a data stream with opType different from what is set globally. Expect precision loss");
            if (globalType == Type.INT)
                log.warn("Int to float/double widening UNSUPPORTED!!!");
        }
        if (t == Type.COMPRESSED) {
            type = t;
            return;
        } else if (t == Type.INT || globalType == Type.INT) {
            this.elementSize = 4;
            this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, t), false);
            this.trackingPoint = allocationPoint.getObjectId();
            // we keep int buffer's dtype after ser/de
            this.type = t;
            this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length).asIntPointer();
            indexer = IntIndexer.create((IntPointer) pointer);
            IntIndexer Iindexer = (IntIndexer) indexer;
            int[] array = new int[(int) length];
            for (int i = 0; i < length(); i++) {
                if (t == Type.INT)
                    // array[i] = s.readInt();
                    Iindexer.put(i, s.readInt());
                else if (t == Type.DOUBLE)
                    Iindexer.put(i, (int) s.readDouble());
                else if (t == Type.FLOAT)
                    Iindexer.put(i, (int) s.readFloat());
                else if (t == Type.HALF)
                    Iindexer.put(i, (int) toFloat((int) s.readShort()));
            }
            allocationPoint.tickHostWrite();
        } else if (globalType == Type.DOUBLE) {
            this.elementSize = 8;
            if (reallocate) {
                MemoryWorkspace workspace = Nd4j.getMemoryManager().getCurrentWorkspace();
                if (workspace != null && (workspace instanceof DummyWorkspace)) {
                    this.attached = true;
                    this.parentWorkspace = workspace;
                    workspaceGenerationId = workspace.getGenerationId();
                }
                this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, globalType), false);
                // allocationPoint.attachBuffer(this);
                this.trackingPoint = allocationPoint.getObjectId();
                this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length).asDoublePointer();
                indexer = DoubleIndexer.create((DoublePointer) pointer);
            }
            DoubleIndexer Dindexer = (DoubleIndexer) indexer;
            for (int i = 0; i < length(); i++) {
                if (t == Type.DOUBLE)
                    Dindexer.put(i, s.readDouble());
                else if (t == Type.FLOAT)
                    Dindexer.put(i, (double) s.readFloat());
                else if (t == Type.HALF)
                    Dindexer.put(i, (double) toFloat((int) s.readShort()));
            }
            allocationPoint.tickHostWrite();
        } else if (globalType == Type.FLOAT) {
            this.elementSize = 4;
            if (reallocate) {
                this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, dataType()), false);
                this.trackingPoint = allocationPoint.getObjectId();
                this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length).asFloatPointer();
                indexer = FloatIndexer.create((FloatPointer) pointer);
            }
            FloatIndexer Findexer = (FloatIndexer) indexer;
            for (int i = 0; i < length; i++) {
                if (t == Type.DOUBLE)
                    Findexer.put(i, (float) s.readDouble());
                else if (t == Type.FLOAT)
                    Findexer.put(i, s.readFloat());
                else if (t == Type.HALF) {
                    Findexer.put(i, toFloat((int) s.readShort()));
                }
            }
            allocationPoint.tickHostWrite();
        } else if (globalType == Type.HALF) {
            this.elementSize = 2;
            if (reallocate) {
                this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, dataType()), false);
                this.trackingPoint = allocationPoint.getObjectId();
                this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length).asShortPointer();
                indexer = HalfIndexer.create((ShortPointer) this.pointer);
            }
            HalfIndexer Hindexer = (HalfIndexer) indexer;
            for (int i = 0; i < length; i++) {
                if (t == Type.DOUBLE)
                    Hindexer.put(i, (float) s.readDouble());
                else if (t == Type.FLOAT)
                    Hindexer.put(i, s.readFloat());
                else if (t == Type.HALF) {
                    Hindexer.put(i, toFloat((int) s.readShort()));
                }
            }
            // for HALF & HALF2 datatype we just tag data as fresh on host
            allocationPoint.tickHostWrite();
        } else
            throw new IllegalStateException("Unknown dataType: [" + t.toString() + "]");
    /*
            this.wrappedBuffer = this.pointer.asByteBuffer();
            this.wrappedBuffer.order(ByteOrder.nativeOrder());
            */
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    // we call sync to copyback data to host
    AtomicAllocator.getInstance().getFlowController().synchronizeToDevice(allocationPoint);
// allocator.synchronizeHostData(this);
}
Also used : DummyWorkspace(org.nd4j.linalg.memory.abstracts.DummyWorkspace) AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) IOException(java.io.IOException) MemoryWorkspace(org.nd4j.linalg.api.memory.MemoryWorkspace) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Example 33 with CudaPointer

use of org.nd4j.jita.allocator.pointers.CudaPointer in project nd4j by deeplearning4j.

the class BaseCudaDataBuffer method set.

/**
 * PLEASE NOTE: length, srcOffset, dstOffset are considered numbers of elements, not byte offsets
 *
 * @param data
 * @param length
 * @param srcOffset
 * @param dstOffset
 */
public void set(double[] data, long length, long srcOffset, long dstOffset) {
    // TODO: make sure getPointer returns proper pointer
    if (dataType() == Type.DOUBLE) {
        DoublePointer pointer = new DoublePointer(data);
        Pointer srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize));
        allocator.memcpyAsync(this, srcPtr, length * elementSize, dstOffset * elementSize);
        // we're keeping pointer reference for JVM
        pointer.address();
    } else if (dataType() == Type.FLOAT) {
        FloatPointer pointer = new FloatPointer(ArrayUtil.toFloats(data));
        Pointer srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize));
        allocator.memcpyAsync(this, srcPtr, length * elementSize, dstOffset * elementSize);
        // we're keeping pointer reference for JVM
        pointer.address();
    } else if (dataType() == Type.INT) {
        IntPointer pointer = new IntPointer(ArrayUtil.toInts(data));
        Pointer srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize));
        allocator.memcpyAsync(this, srcPtr, length * elementSize, dstOffset * elementSize);
        // we're keeping pointer reference for JVM
        pointer.address();
    } else if (dataType() == Type.HALF) {
        ShortPointer pointer = new ShortPointer(ArrayUtil.toHalfs(data));
        Pointer srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize));
        allocator.memcpyAsync(this, srcPtr, length * elementSize, dstOffset * elementSize);
        // we're keeping pointer reference for JVM
        pointer.address();
    }
}
Also used : CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Example 34 with CudaPointer

use of org.nd4j.jita.allocator.pointers.CudaPointer in project nd4j by deeplearning4j.

the class BaseCudaDataBuffer method set.

/**
 * PLEASE NOTE: length, srcOffset, dstOffset are considered numbers of elements, not byte offsets
 *
 * @param data
 * @param length
 * @param srcOffset
 * @param dstOffset
 */
public void set(float[] data, long length, long srcOffset, long dstOffset) {
    // log.info("Set called");
    if (dataType() == Type.DOUBLE) {
        // Pointer dstPtr = dstOffset > 0 ? new Pointer(allocator.getPointer(this).address()).withByteOffset(dstOffset * 4) : new Pointer(allocator.getPointer(this).address());
        // Pointer srcPtr = srcOffset > 0 ? Pointer.to(ArrayUtil.toDoubles(data)).withByteOffset(srcOffset * elementSize) : Pointer.to(ArrayUtil.toDoubles(data));
        DoublePointer pointer = new DoublePointer(ArrayUtil.toDoubles(data));
        Pointer srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize));
        allocator.memcpyAsync(this, srcPtr, length * elementSize, dstOffset * elementSize);
        // we're keeping pointer reference for JVM
        pointer.address();
    } else if (dataType() == Type.FLOAT) {
        // Pointer srcPtr = srcOffset > 0 ? Pointer.to(data).withByteOffset(srcOffset * elementSize) : Pointer.to(data);
        FloatPointer pointer = new FloatPointer(data);
        Pointer srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize));
        // log.info("Memcpy params: byteLength: ["+(length * elementSize)+"], srcOffset: ["+(srcOffset * elementSize)+"], dstOffset: [" +(dstOffset* elementSize) + "]" );
        allocator.memcpyAsync(this, srcPtr, length * elementSize, dstOffset * elementSize);
        // we're keeping pointer reference for JVM
        pointer.address();
    } else if (dataType() == Type.INT) {
        // Pointer srcPtr = srcOffset > 0 ? Pointer.to(ArrayUtil.toInts(data)).withByteOffset(srcOffset * elementSize) : Pointer.to(ArrayUtil.toInts(data));
        IntPointer pointer = new IntPointer(ArrayUtil.toInts(data));
        Pointer srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize));
        allocator.memcpyAsync(this, srcPtr, length * elementSize, dstOffset * elementSize);
        // we're keeping pointer reference for JVM
        pointer.address();
    } else if (dataType() == Type.HALF) {
        ShortPointer pointer = new ShortPointer(ArrayUtil.toHalfs(data));
        Pointer srcPtr = new CudaPointer(pointer.address() + (dstOffset * elementSize));
        allocator.memcpyAsync(this, srcPtr, length * elementSize, dstOffset * elementSize);
        // we're keeping pointer reference for JVM
        pointer.address();
    }
}
Also used : CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Example 35 with CudaPointer

use of org.nd4j.jita.allocator.pointers.CudaPointer in project nd4j by deeplearning4j.

the class BaseCudaDataBuffer method dup.

@Override
public DataBuffer dup() {
    allocator.synchronizeHostData(this);
    DataBuffer buffer = create(this.length);
    allocator.memcpyBlocking(buffer, new CudaPointer(allocator.getHostPointer(this).address()), this.length * elementSize, 0);
    return buffer;
}
Also used : CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) BaseDataBuffer(org.nd4j.linalg.api.buffer.BaseDataBuffer)

Aggregations

CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)47 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)27 AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)20 Pointer (org.bytedeco.javacpp.Pointer)18 DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)18 INDArray (org.nd4j.linalg.api.ndarray.INDArray)15 org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t (org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t)12 GridExecutioner (org.nd4j.linalg.api.ops.executioner.GridExecutioner)11 DoublePointer (org.bytedeco.javacpp.DoublePointer)10 FloatPointer (org.bytedeco.javacpp.FloatPointer)10 IntPointer (org.bytedeco.javacpp.IntPointer)10 CUstream_st (org.bytedeco.javacpp.cuda.CUstream_st)10 ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)10 CublasPointer (org.nd4j.linalg.jcublas.CublasPointer)10 BlasException (org.nd4j.linalg.api.blas.BlasException)8 BaseCudaDataBuffer (org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer)7 AllocationShape (org.nd4j.jita.allocator.impl.AllocationShape)4 AtomicAllocator (org.nd4j.jita.allocator.impl.AtomicAllocator)4 BaseDataBuffer (org.nd4j.linalg.api.buffer.BaseDataBuffer)4 INDArrayIndex (org.nd4j.linalg.indexing.INDArrayIndex)4