Search in sources :

Example 6 with AllocationShape

use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.

the class CudaCachingZeroProvider method free.

/**
 * This method frees specific chunk of memory, described by AllocationPoint passed in.
 *
 * PLEASE NOTE: This method can actually ignore free, and keep released memory chunk for future reuse.
 *
 * @param point
 */
@Override
public void free(AllocationPoint point) {
    if (point.getAllocationStatus() == AllocationStatus.DEVICE) {
        super.free(point);
    } else {
        AllocationShape shape = point.getShape();
        long reqMemory = AllocationUtils.getRequiredMemory(shape);
        // we don't cache too big objects
        if (reqMemory > CudaEnvironment.getInstance().getConfiguration().getMaximumHostCacheableLength() || zeroCachedAmount.get() >= CudaEnvironment.getInstance().getConfiguration().getMaximumHostCache()) {
            // log.info("HOST memory purging: {} bytes; MS: {}; MT: {}", reqMemory, MAX_SINGLE_ALLOCATION, MAX_CACHED_MEMORY);
            super.free(point);
            return;
        }
        ensureCacheHolder(shape);
        // log.info("Saving DEVICE memory into cache...");
        /*
                Now we should decide if this object can be cached or not
             */
        CacheHolder cache = zeroCache.get(shape);
        // memory chunks < threshold will be cached no matter what
        if (reqMemory <= FORCED_CACHE_THRESHOLD) {
            Pointer.memset(point.getHostPointer(), 0, reqMemory);
            cache.put(new CudaPointer(point.getHostPointer().address()));
        } else {
            long cacheEntries = cache.size();
            long cacheHeight = zeroCache.size();
            // total memory allocated within this bucket
            long cacheDepth = cacheEntries * reqMemory;
            // if (cacheDepth < MAX_CACHED_MEMORY / cacheHeight) {
            Pointer.memset(point.getHostPointer(), 0, reqMemory);
            cache.put(new CudaPointer(point.getHostPointer().address()));
        // } else {
        // super.free(point);
        // }
        }
    }
}
Also used : AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Example 7 with AllocationShape

use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.

the class BaseCudaDataBuffer method reallocate.

@Override
public DataBuffer reallocate(long length) {
    // we want to be sure this array isn't used anywhere RIGHT AT THIS MOMENT
    Nd4j.getExecutioner().commit();
    AllocationPoint old = allocationPoint;
    allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, dataType()), false);
    trackingPoint = allocationPoint.getObjectId();
    switch(dataType()) {
        case DOUBLE:
            this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length, 0).asDoublePointer();
            indexer = DoubleIndexer.create((DoublePointer) pointer);
            break;
        case FLOAT:
            this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length, 0).asFloatPointer();
            indexer = FloatIndexer.create((FloatPointer) pointer);
            break;
        case HALF:
            this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length, 0).asShortPointer();
            indexer = ShortIndexer.create((ShortPointer) pointer);
            break;
        case INT:
            this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length, 0).asIntPointer();
            indexer = IntIndexer.create((IntPointer) pointer);
            break;
        default:
            throw new UnsupportedOperationException();
    }
    CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
    NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(allocationPoint.getDevicePointer(), 0, length * elementSize, 0, context.getSpecialStream());
    if (old.isActualOnDeviceSide()) {
        NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(allocationPoint.getDevicePointer(), old.getDevicePointer(), this.length * elementSize, CudaConstants.cudaMemcpyDeviceToDevice, context.getSpecialStream());
    } else if (old.isActualOnHostSide()) {
        NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(allocationPoint.getDevicePointer(), old.getHostPointer(), this.length * elementSize, CudaConstants.cudaMemcpyHostToDevice, context.getSpecialStream());
    }
    context.getSpecialStream().synchronize();
    allocationPoint.tickDeviceWrite();
    // we're keeping pointer reference for JVM
    pointer.address();
    // this.length = length;
    if (isAttached()) {
    // do nothing here, that's workspaces
    } else {
        AtomicAllocator.getInstance().freeMemory(old);
    }
    return this;
}
Also used : AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Example 8 with AllocationShape

use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.

the class BaseCudaDataBuffer method read.

@Override
public void read(DataInputStream s) {
    try {
        // log.info("Restoring CUDA databuffer");
        // skip allocationMode
        s.readUTF();
        allocationMode = AllocationMode.JAVACPP;
        int locLength = s.readInt();
        boolean reallocate = locLength != length || indexer == null;
        length = locLength;
        Type t = Type.valueOf(s.readUTF());
        // log.info("Restoring buffer ["+t+"] of length ["+ length+"]");
        if (globalType == null && Nd4j.dataType() != null) {
            globalType = Nd4j.dataType();
        }
        if (t != globalType && t != Type.INT && Nd4j.sizeOfDataType(globalType) < Nd4j.sizeOfDataType(t)) {
            log.warn("Loading a data stream with opType different from what is set globally. Expect precision loss");
            if (globalType == Type.INT)
                log.warn("Int to float/double widening UNSUPPORTED!!!");
        }
        if (t == Type.COMPRESSED) {
            type = t;
            return;
        } else if (t == Type.INT || globalType == Type.INT) {
            this.elementSize = 4;
            this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, t), false);
            this.trackingPoint = allocationPoint.getObjectId();
            // we keep int buffer's dtype after ser/de
            this.type = t;
            this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length).asIntPointer();
            indexer = IntIndexer.create((IntPointer) pointer);
            IntIndexer Iindexer = (IntIndexer) indexer;
            int[] array = new int[(int) length];
            for (int i = 0; i < length(); i++) {
                if (t == Type.INT)
                    // array[i] = s.readInt();
                    Iindexer.put(i, s.readInt());
                else if (t == Type.DOUBLE)
                    Iindexer.put(i, (int) s.readDouble());
                else if (t == Type.FLOAT)
                    Iindexer.put(i, (int) s.readFloat());
                else if (t == Type.HALF)
                    Iindexer.put(i, (int) toFloat((int) s.readShort()));
            }
            allocationPoint.tickHostWrite();
        } else if (globalType == Type.DOUBLE) {
            this.elementSize = 8;
            if (reallocate) {
                MemoryWorkspace workspace = Nd4j.getMemoryManager().getCurrentWorkspace();
                if (workspace != null && (workspace instanceof DummyWorkspace)) {
                    this.attached = true;
                    this.parentWorkspace = workspace;
                    workspaceGenerationId = workspace.getGenerationId();
                }
                this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, globalType), false);
                // allocationPoint.attachBuffer(this);
                this.trackingPoint = allocationPoint.getObjectId();
                this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length).asDoublePointer();
                indexer = DoubleIndexer.create((DoublePointer) pointer);
            }
            DoubleIndexer Dindexer = (DoubleIndexer) indexer;
            for (int i = 0; i < length(); i++) {
                if (t == Type.DOUBLE)
                    Dindexer.put(i, s.readDouble());
                else if (t == Type.FLOAT)
                    Dindexer.put(i, (double) s.readFloat());
                else if (t == Type.HALF)
                    Dindexer.put(i, (double) toFloat((int) s.readShort()));
            }
            allocationPoint.tickHostWrite();
        } else if (globalType == Type.FLOAT) {
            this.elementSize = 4;
            if (reallocate) {
                this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, dataType()), false);
                this.trackingPoint = allocationPoint.getObjectId();
                this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length).asFloatPointer();
                indexer = FloatIndexer.create((FloatPointer) pointer);
            }
            FloatIndexer Findexer = (FloatIndexer) indexer;
            for (int i = 0; i < length; i++) {
                if (t == Type.DOUBLE)
                    Findexer.put(i, (float) s.readDouble());
                else if (t == Type.FLOAT)
                    Findexer.put(i, s.readFloat());
                else if (t == Type.HALF) {
                    Findexer.put(i, toFloat((int) s.readShort()));
                }
            }
            allocationPoint.tickHostWrite();
        } else if (globalType == Type.HALF) {
            this.elementSize = 2;
            if (reallocate) {
                this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, dataType()), false);
                this.trackingPoint = allocationPoint.getObjectId();
                this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length).asShortPointer();
                indexer = HalfIndexer.create((ShortPointer) this.pointer);
            }
            HalfIndexer Hindexer = (HalfIndexer) indexer;
            for (int i = 0; i < length; i++) {
                if (t == Type.DOUBLE)
                    Hindexer.put(i, (float) s.readDouble());
                else if (t == Type.FLOAT)
                    Hindexer.put(i, s.readFloat());
                else if (t == Type.HALF) {
                    Hindexer.put(i, toFloat((int) s.readShort()));
                }
            }
            // for HALF & HALF2 datatype we just tag data as fresh on host
            allocationPoint.tickHostWrite();
        } else
            throw new IllegalStateException("Unknown dataType: [" + t.toString() + "]");
    /*
            this.wrappedBuffer = this.pointer.asByteBuffer();
            this.wrappedBuffer.order(ByteOrder.nativeOrder());
            */
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    // we call sync to copyback data to host
    AtomicAllocator.getInstance().getFlowController().synchronizeToDevice(allocationPoint);
// allocator.synchronizeHostData(this);
}
Also used : DummyWorkspace(org.nd4j.linalg.memory.abstracts.DummyWorkspace) AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) IOException(java.io.IOException) MemoryWorkspace(org.nd4j.linalg.api.memory.MemoryWorkspace) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Example 9 with AllocationShape

use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.

the class AllocationUtilsTest method testGetRequiredMemory1.

@Test
public void testGetRequiredMemory1() throws Exception {
    AllocationShape shape = new AllocationShape();
    shape.setOffset(0);
    shape.setLength(10);
    shape.setStride(1);
    shape.setDataType(DataBuffer.Type.DOUBLE);
    assertEquals(80, AllocationUtils.getRequiredMemory(shape));
}
Also used : AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) Test(org.junit.Test)

Example 10 with AllocationShape

use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.

the class CudaFullCachingProviderTest method testPurge1.

@Test
public void testPurge1() throws Exception {
    for (int x = 0; x < 10000; x++) {
        INDArray array = Nd4j.create(10, 10);
    }
    INDArray array = Nd4j.create(10, 10);
    AllocationShape shape = AtomicAllocator.getInstance().getAllocationPoint(array).getShape();
    System.gc();
    System.gc();
    Thread.sleep(1000);
    CudaFullCachingProvider provider = (CudaFullCachingProvider) AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider();
    log.info("Shapes in cache: {}", provider.deviceCache.get(0).get(shape).size());
    assertTrue(provider.deviceCache.get(0).get(shape).size() > 0);
    provider.purgeCache();
    assertTrue(provider.deviceCache.get(0).get(shape).size() == 0);
}
Also used : AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Test(org.junit.Test)

Aggregations

AllocationShape (org.nd4j.jita.allocator.impl.AllocationShape)14 Test (org.junit.Test)6 AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)5 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)4 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)2 IOException (java.io.IOException)1 MemoryWorkspace (org.nd4j.linalg.api.memory.MemoryWorkspace)1 PagedPointer (org.nd4j.linalg.api.memory.pointers.PagedPointer)1 PointersPair (org.nd4j.linalg.api.memory.pointers.PointersPair)1 INDArray (org.nd4j.linalg.api.ndarray.INDArray)1 ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)1 DummyWorkspace (org.nd4j.linalg.memory.abstracts.DummyWorkspace)1