Examples with AllocationPoint - org.nd4j.jita.allocator.impl.AllocationPoint

Example 46 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class JCublasNDArrayFactory method convertDataEx.

@Override
public DataBuffer convertDataEx(DataBuffer.TypeEx typeSrc, DataBuffer source, DataBuffer.TypeEx typeDst) {
    int elementSize = 0;
    if (typeDst.ordinal() <= 2)
        elementSize = 1;
    else if (typeDst.ordinal() <= 5)
        elementSize = 2;
    else if (typeDst.ordinal() == 6)
        elementSize = 4;
    else if (typeDst.ordinal() == 7)
        elementSize = 8;
    else
        throw new UnsupportedOperationException("Unknown target TypeEx: " + typeDst.name());
    // flushQueue should be blocking here, because typeConversion happens on cpu side
    Nd4j.getExecutioner().commit();
    DataBuffer buffer = null;
    if (!(source instanceof CompressedDataBuffer))
        AtomicAllocator.getInstance().synchronizeHostData(source);
    if (CompressionUtils.goingToCompress(typeSrc, typeDst)) {
        // all types below 8 are compression modes
        BytePointer pointer = new BytePointer(source.length() * elementSize);
        CompressionDescriptor descriptor = new CompressionDescriptor(source, typeDst.name());
        descriptor.setCompressionType(CompressionType.LOSSY);
        descriptor.setCompressedLength(source.length() * elementSize);
        buffer = new CompressedDataBuffer(pointer, descriptor);
    } else {
        CompressedDataBuffer compressed = (CompressedDataBuffer) source;
        CompressionDescriptor descriptor = compressed.getCompressionDescriptor();
        // decompression mode
        buffer = Nd4j.createBuffer(descriptor.getNumberOfElements(), false);
        AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(buffer);
        point.tickHostWrite();
    }
    convertDataEx(typeSrc, source, typeDst, buffer);
    return buffer;
}

Also used : CompressionDescriptor(org.nd4j.linalg.compression.CompressionDescriptor) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CudaIntDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)

Example 47 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class CudaExecutioner method exec.

@Override
public <T extends Aggregate> void exec(Batch<T> batch) {
    DataBuffer surfaceBuffer = getBuffer(batch);
    CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
    IntPointer pointer = (IntPointer) new CudaPointer(AtomicAllocator.getInstance().getHostPointer(surfaceBuffer)).asIntPointer();
    AllocationPoint surfacePoint = AtomicAllocator.getInstance().getAllocationPoint(surfaceBuffer);
    int maxTypes = 5;
    int maxIntArrays = batch.getSample().maxIntArrays();
    int maxArraySize = batch.getSample().maxIntArraySize();
    int indexPos = maxTypes * (Batch.getBatchLimit() * 16);
    int intArraysPos = indexPos + (batch.getSample().maxIndexArguments() * (Batch.getBatchLimit() * 16));
    int realPos = (intArraysPos + (maxIntArrays * maxArraySize * (Batch.getBatchLimit() * 16))) / (Nd4j.dataType() == DataBuffer.Type.DOUBLE ? 2 : 1);
    if (Nd4j.dataType() == DataBuffer.Type.HALF)
        realPos *= 2;
    int argsPos = (realPos + (batch.getSample().maxRealArguments() * (Batch.getBatchLimit() * 16))) / (Nd4j.dataType() == DataBuffer.Type.FLOAT ? 2 : 1);
    if (Nd4j.dataType() == DataBuffer.Type.HALF)
        argsPos /= 4;
    int shapesPos = argsPos + (batch.getSample().maxArguments() * (Batch.getBatchLimit() * 16));
    for (int i = 0; i < batch.getNumAggregates(); i++) {
        T op = batch.getAggregates().get(i);
        // put num arguments
        int idx = i * maxTypes;
        pointer.put(idx, op.getArguments().size());
        pointer.put(idx + 1, op.getShapes().size());
        pointer.put(idx + 2, op.getIndexingArguments().size());
        pointer.put(idx + 3, op.getRealArguments().size());
        pointer.put(idx + 4, op.getIntArrayArguments().size());
        // putting indexing arguments
        for (int e = 0; e < op.getIndexingArguments().size(); e++) {
            idx = indexPos + i * batch.getSample().maxIndexArguments();
            pointer.put(idx + e, op.getIndexingArguments().get(e));
        }
        // putting intArray values
        int bsize = maxIntArrays * maxArraySize;
        for (int e = 0; e < op.getIntArrayArguments().size(); e++) {
            int step = (i * bsize) + (e * maxArraySize);
            if (op.getIntArrayArguments().get(e) != null)
                for (int x = 0; x < op.getIntArrayArguments().get(e).length; x++) {
                    idx = intArraysPos + step + x;
                    pointer.put(idx, op.getIntArrayArguments().get(e)[x]);
                }
        }
        // putting real arguments
        if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
            FloatPointer realPtr = new FloatPointer(pointer);
            for (int e = 0; e < op.getRealArguments().size(); e++) {
                idx = realPos + i * op.maxRealArguments();
                realPtr.put(idx + e, op.getRealArguments().get(e).floatValue());
            }
        } else if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
            DoublePointer dPtr = new DoublePointer(pointer);
            for (int e = 0; e < op.getRealArguments().size(); e++) {
                idx = realPos + (i * op.maxRealArguments());
                dPtr.put(idx + e, op.getRealArguments().get(e).doubleValue());
            }
        } else if (Nd4j.dataType() == DataBuffer.Type.HALF) {
            ShortPointer sPtr = new ShortPointer(pointer);
            for (int e = 0; e < op.getRealArguments().size(); e++) {
                idx = realPos + (i * op.maxRealArguments());
                sPtr.put(idx + e, BaseDataBuffer.fromFloat(op.getRealArguments().get(e).floatValue()));
            }
        }
        // putting arguments pointers
        PointerPointer ptrPtr = new PointerPointer(pointer);
        for (int e = 0; e < op.getArguments().size(); e++) {
            idx = argsPos + i * batch.getSample().maxArguments();
            if (op.getArguments().get(e) != null) {
                ptrPtr.put(idx + e, AtomicAllocator.getInstance().getPointer(op.getArguments().get(e), context));
                AtomicAllocator.getInstance().getAllocationPoint(op.getArguments().get(e)).tickDeviceWrite();
            }
        }
        // putting shape pointers
        for (int e = 0; e < op.getShapes().size(); e++) {
            idx = shapesPos + i * batch.getSample().maxShapes();
            if (op.getShapes().get(e) != null) {
                ptrPtr.put(idx + e, AtomicAllocator.getInstance().getPointer(op.getShapes().get(e), context));
                AtomicAllocator.getInstance().getAllocationPoint(op.getShapes().get(e)).tickDeviceWrite();
            }
        }
    }
    // trigger write, so getPointer request will force relocation to GPU
    surfacePoint.tickHostWrite();
    PointerPointer extraArgs = new PointerPointer(32);
    extraArgs.put(0, null);
    extraArgs.put(1, context.getOldStream());
    extraArgs.put(2, new CudaPointer(Math.min(batch.getNumAggregates(), CudaEnvironment.getInstance().getConfiguration().getMaximumGridSize())));
    extraArgs.put(3, new CudaPointer(batch.getSample().getThreadsPerInstance()));
    extraArgs.put(4, new CudaPointer(batch.getSample().getSharedMemorySize()));
    if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
        nativeOps.execAggregateBatchFloat(extraArgs, batch.getNumAggregates(), batch.opNum(), batch.getSample().maxArguments(), batch.getSample().maxShapes(), batch.getSample().maxIntArrays(), batch.getSample().maxIntArraySize(), batch.getSample().maxIndexArguments(), batch.getSample().maxRealArguments(), AtomicAllocator.getInstance().getPointer(surfaceBuffer, context));
    } else if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
        nativeOps.execAggregateBatchDouble(extraArgs, batch.getNumAggregates(), batch.opNum(), batch.getSample().maxArguments(), batch.getSample().maxShapes(), batch.getSample().maxIntArrays(), batch.getSample().maxIntArraySize(), batch.getSample().maxIndexArguments(), batch.getSample().maxRealArguments(), AtomicAllocator.getInstance().getPointer(surfaceBuffer, context));
    } else if (Nd4j.dataType() == DataBuffer.Type.HALF) {
        nativeOps.execAggregateBatchHalf(extraArgs, batch.getNumAggregates(), batch.opNum(), batch.getSample().maxArguments(), batch.getSample().maxShapes(), batch.getSample().maxIntArrays(), batch.getSample().maxIntArraySize(), batch.getSample().maxIndexArguments(), batch.getSample().maxRealArguments(), AtomicAllocator.getInstance().getPointer(surfaceBuffer, context));
    }
    surfacePoint.tickHostWrite();
}

Also used : CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) BaseDataBuffer(org.nd4j.linalg.api.buffer.BaseDataBuffer)

Example 48 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class BaseCudaDataBuffer method reallocate.

@Override
public DataBuffer reallocate(long length) {
    // we want to be sure this array isn't used anywhere RIGHT AT THIS MOMENT
    Nd4j.getExecutioner().commit();
    AllocationPoint old = allocationPoint;
    allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, dataType()), false);
    trackingPoint = allocationPoint.getObjectId();
    switch(dataType()) {
        case DOUBLE:
            this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length, 0).asDoublePointer();
            indexer = DoubleIndexer.create((DoublePointer) pointer);
            break;
        case FLOAT:
            this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length, 0).asFloatPointer();
            indexer = FloatIndexer.create((FloatPointer) pointer);
            break;
        case HALF:
            this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length, 0).asShortPointer();
            indexer = ShortIndexer.create((ShortPointer) pointer);
            break;
        case INT:
            this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length, 0).asIntPointer();
            indexer = IntIndexer.create((IntPointer) pointer);
            break;
        default:
            throw new UnsupportedOperationException();
    }
    CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
    NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(allocationPoint.getDevicePointer(), 0, length * elementSize, 0, context.getSpecialStream());
    if (old.isActualOnDeviceSide()) {
        NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(allocationPoint.getDevicePointer(), old.getDevicePointer(), this.length * elementSize, CudaConstants.cudaMemcpyDeviceToDevice, context.getSpecialStream());
    } else if (old.isActualOnHostSide()) {
        NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(allocationPoint.getDevicePointer(), old.getHostPointer(), this.length * elementSize, CudaConstants.cudaMemcpyHostToDevice, context.getSpecialStream());
    }
    context.getSpecialStream().synchronize();
    allocationPoint.tickDeviceWrite();
    // we're keeping pointer reference for JVM
    pointer.address();
    // this.length = length;
    if (isAttached()) {
    // do nothing here, that's workspaces
    } else {
        AtomicAllocator.getInstance().freeMemory(old);
    }
    return this;
}

Also used : AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Example 49 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class BaseCudaDataBuffer method read.

@Override
public void read(DataInputStream s) {
    try {
        // log.info("Restoring CUDA databuffer");
        // skip allocationMode
        s.readUTF();
        allocationMode = AllocationMode.JAVACPP;
        int locLength = s.readInt();
        boolean reallocate = locLength != length || indexer == null;
        length = locLength;
        Type t = Type.valueOf(s.readUTF());
        // log.info("Restoring buffer ["+t+"] of length ["+ length+"]");
        if (globalType == null && Nd4j.dataType() != null) {
            globalType = Nd4j.dataType();
        }
        if (t != globalType && t != Type.INT && Nd4j.sizeOfDataType(globalType) < Nd4j.sizeOfDataType(t)) {
            log.warn("Loading a data stream with opType different from what is set globally. Expect precision loss");
            if (globalType == Type.INT)
                log.warn("Int to float/double widening UNSUPPORTED!!!");
        }
        if (t == Type.COMPRESSED) {
            type = t;
            return;
        } else if (t == Type.INT || globalType == Type.INT) {
            this.elementSize = 4;
            this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, t), false);
            this.trackingPoint = allocationPoint.getObjectId();
            // we keep int buffer's dtype after ser/de
            this.type = t;
            this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length).asIntPointer();
            indexer = IntIndexer.create((IntPointer) pointer);
            IntIndexer Iindexer = (IntIndexer) indexer;
            int[] array = new int[(int) length];
            for (int i = 0; i < length(); i++) {
                if (t == Type.INT)
                    // array[i] = s.readInt();
                    Iindexer.put(i, s.readInt());
                else if (t == Type.DOUBLE)
                    Iindexer.put(i, (int) s.readDouble());
                else if (t == Type.FLOAT)
                    Iindexer.put(i, (int) s.readFloat());
                else if (t == Type.HALF)
                    Iindexer.put(i, (int) toFloat((int) s.readShort()));
            }
            allocationPoint.tickHostWrite();
        } else if (globalType == Type.DOUBLE) {
            this.elementSize = 8;
            if (reallocate) {
                MemoryWorkspace workspace = Nd4j.getMemoryManager().getCurrentWorkspace();
                if (workspace != null && (workspace instanceof DummyWorkspace)) {
                    this.attached = true;
                    this.parentWorkspace = workspace;
                    workspaceGenerationId = workspace.getGenerationId();
                }
                this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, globalType), false);
                // allocationPoint.attachBuffer(this);
                this.trackingPoint = allocationPoint.getObjectId();
                this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length).asDoublePointer();
                indexer = DoubleIndexer.create((DoublePointer) pointer);
            }
            DoubleIndexer Dindexer = (DoubleIndexer) indexer;
            for (int i = 0; i < length(); i++) {
                if (t == Type.DOUBLE)
                    Dindexer.put(i, s.readDouble());
                else if (t == Type.FLOAT)
                    Dindexer.put(i, (double) s.readFloat());
                else if (t == Type.HALF)
                    Dindexer.put(i, (double) toFloat((int) s.readShort()));
            }
            allocationPoint.tickHostWrite();
        } else if (globalType == Type.FLOAT) {
            this.elementSize = 4;
            if (reallocate) {
                this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, dataType()), false);
                this.trackingPoint = allocationPoint.getObjectId();
                this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length).asFloatPointer();
                indexer = FloatIndexer.create((FloatPointer) pointer);
            }
            FloatIndexer Findexer = (FloatIndexer) indexer;
            for (int i = 0; i < length; i++) {
                if (t == Type.DOUBLE)
                    Findexer.put(i, (float) s.readDouble());
                else if (t == Type.FLOAT)
                    Findexer.put(i, s.readFloat());
                else if (t == Type.HALF) {
                    Findexer.put(i, toFloat((int) s.readShort()));
                }
            }
            allocationPoint.tickHostWrite();
        } else if (globalType == Type.HALF) {
            this.elementSize = 2;
            if (reallocate) {
                this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, dataType()), false);
                this.trackingPoint = allocationPoint.getObjectId();
                this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length).asShortPointer();
                indexer = HalfIndexer.create((ShortPointer) this.pointer);
            }
            HalfIndexer Hindexer = (HalfIndexer) indexer;
            for (int i = 0; i < length; i++) {
                if (t == Type.DOUBLE)
                    Hindexer.put(i, (float) s.readDouble());
                else if (t == Type.FLOAT)
                    Hindexer.put(i, s.readFloat());
                else if (t == Type.HALF) {
                    Hindexer.put(i, toFloat((int) s.readShort()));
                }
            }
            // for HALF & HALF2 datatype we just tag data as fresh on host
            allocationPoint.tickHostWrite();
        } else
            throw new IllegalStateException("Unknown dataType: [" + t.toString() + "]");
    /*
            this.wrappedBuffer = this.pointer.asByteBuffer();
            this.wrappedBuffer.order(ByteOrder.nativeOrder());
            */
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    // we call sync to copyback data to host
    AtomicAllocator.getInstance().getFlowController().synchronizeToDevice(allocationPoint);
// allocator.synchronizeHostData(this);
}

Also used : DummyWorkspace(org.nd4j.linalg.memory.abstracts.DummyWorkspace) AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) IOException(java.io.IOException) MemoryWorkspace(org.nd4j.linalg.api.memory.MemoryWorkspace) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Example 50 with AllocationPoint

use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.

the class JCublasNDArray method unsafeDuplication.

@Override
public INDArray unsafeDuplication(boolean blocking) {
    DataBuffer rb = Nd4j.getMemoryManager().getCurrentWorkspace() == null ? Nd4j.getDataBufferFactory().createSame(this.data, false) : Nd4j.getDataBufferFactory().createSame(this.data, false, Nd4j.getMemoryManager().getCurrentWorkspace());
    INDArray ret = Nd4j.createArrayFromShapeBuffer(rb, this.shapeInfoDataBuffer());
    if (blocking)
        Nd4j.getExecutioner().push();
    // Nd4j.getExecutioner().commit();
    AtomicAllocator allocator = AtomicAllocator.getInstance();
    CudaContext context = (CudaContext) allocator.getDeviceContext().getContext();
    AllocationPoint srcPoint = allocator.getAllocationPoint(this);
    AllocationPoint dstPoint = allocator.getAllocationPoint(ret);
    int route = 0;
    if (dstPoint.getAllocationStatus() == AllocationStatus.DEVICE && srcPoint.getAllocationStatus() == AllocationStatus.DEVICE) {
        // d2d copy
        route = 1;
        NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(dstPoint.getDevicePointer(), srcPoint.getDevicePointer(), this.data.length() * this.data.getElementSize(), CudaConstants.cudaMemcpyDeviceToDevice, blocking ? context.getOldStream() : context.getSpecialStream());
        dstPoint.tickDeviceWrite();
    } else if (dstPoint.getAllocationStatus() == AllocationStatus.HOST && srcPoint.getAllocationStatus() == AllocationStatus.DEVICE) {
        route = 2;
        NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(dstPoint.getHostPointer(), srcPoint.getDevicePointer(), this.data.length() * this.data.getElementSize(), CudaConstants.cudaMemcpyDeviceToHost, blocking ? context.getOldStream() : context.getSpecialStream());
        dstPoint.tickHostWrite();
    } else if (dstPoint.getAllocationStatus() == AllocationStatus.DEVICE && srcPoint.getAllocationStatus() == AllocationStatus.HOST) {
        route = 3;
        NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(dstPoint.getDevicePointer(), srcPoint.getHostPointer(), this.data.length() * this.data.getElementSize(), CudaConstants.cudaMemcpyHostToDevice, blocking ? context.getOldStream() : context.getSpecialStream());
        dstPoint.tickDeviceWrite();
    } else {
        route = 4;
        NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(dstPoint.getHostPointer(), srcPoint.getHostPointer(), this.data.length() * this.data.getElementSize(), CudaConstants.cudaMemcpyHostToHost, blocking ? context.getOldStream() : context.getSpecialStream());
        dstPoint.tickHostWrite();
    }
    if (blocking)
        context.syncOldStream();
    else
        context.syncSpecialStream();
    /*
        long time2 = System.currentTimeMillis();

        long bytes = this.data.length() * this.data.getElementSize();
        long spent = time2 - time1;

        float bw = (1000 * bytes / spent) / 1024 / 1024.0f / 1024; //1000 / spent * bytes / 1024 / 1024 / 1024;

        log.info("Route: [{}]; Blocking: {}; {} bytes; {} ms; Bandwidth: {} GB/s", route, blocking, bytes, spent, String.format("%.2f", bw));
*/
    return ret;
}

Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) AtomicAllocator(org.nd4j.jita.allocator.impl.AtomicAllocator) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer)

Aggregations

AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)67 INDArray (org.nd4j.linalg.api.ndarray.INDArray)33 Test (org.junit.Test)31 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)24 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)15 DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)11 ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)11 AtomicAllocator (org.nd4j.jita.allocator.impl.AtomicAllocator)7 BaseCudaDataBuffer (org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer)7 Pointer (org.bytedeco.javacpp.Pointer)6 AllocationShape (org.nd4j.jita.allocator.impl.AllocationShape)5 PointersPair (org.nd4j.jita.allocator.pointers.PointersPair)5 MemoryWorkspace (org.nd4j.linalg.api.memory.MemoryWorkspace)4 JCublasNDArray (org.nd4j.linalg.jcublas.JCublasNDArray)3 CudaDoubleDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)3 CompressedDataBuffer (org.nd4j.linalg.compression.CompressedDataBuffer)2 DeviceLocalNDArray (org.nd4j.linalg.util.DeviceLocalNDArray)2 DataInputStream (java.io.DataInputStream)1 DataOutputStream (java.io.DataOutputStream)1 FileInputStream (java.io.FileInputStream)1