use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.
the class CudaCachingZeroProvider method free.
/**
* This method frees specific chunk of memory, described by AllocationPoint passed in.
*
* PLEASE NOTE: This method can actually ignore free, and keep released memory chunk for future reuse.
*
* @param point
*/
@Override
public void free(AllocationPoint point) {
if (point.getAllocationStatus() == AllocationStatus.DEVICE) {
super.free(point);
} else {
AllocationShape shape = point.getShape();
long reqMemory = AllocationUtils.getRequiredMemory(shape);
// we don't cache too big objects
if (reqMemory > CudaEnvironment.getInstance().getConfiguration().getMaximumHostCacheableLength() || zeroCachedAmount.get() >= CudaEnvironment.getInstance().getConfiguration().getMaximumHostCache()) {
// log.info("HOST memory purging: {} bytes; MS: {}; MT: {}", reqMemory, MAX_SINGLE_ALLOCATION, MAX_CACHED_MEMORY);
super.free(point);
return;
}
ensureCacheHolder(shape);
// log.info("Saving DEVICE memory into cache...");
/*
Now we should decide if this object can be cached or not
*/
CacheHolder cache = zeroCache.get(shape);
// memory chunks < threshold will be cached no matter what
if (reqMemory <= FORCED_CACHE_THRESHOLD) {
Pointer.memset(point.getHostPointer(), 0, reqMemory);
cache.put(new CudaPointer(point.getHostPointer().address()));
} else {
long cacheEntries = cache.size();
long cacheHeight = zeroCache.size();
// total memory allocated within this bucket
long cacheDepth = cacheEntries * reqMemory;
// if (cacheDepth < MAX_CACHED_MEMORY / cacheHeight) {
Pointer.memset(point.getHostPointer(), 0, reqMemory);
cache.put(new CudaPointer(point.getHostPointer().address()));
// } else {
// super.free(point);
// }
}
}
}
use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.
the class BaseCudaDataBuffer method reallocate.
@Override
public DataBuffer reallocate(long length) {
// we want to be sure this array isn't used anywhere RIGHT AT THIS MOMENT
Nd4j.getExecutioner().commit();
AllocationPoint old = allocationPoint;
allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, dataType()), false);
trackingPoint = allocationPoint.getObjectId();
switch(dataType()) {
case DOUBLE:
this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length, 0).asDoublePointer();
indexer = DoubleIndexer.create((DoublePointer) pointer);
break;
case FLOAT:
this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length, 0).asFloatPointer();
indexer = FloatIndexer.create((FloatPointer) pointer);
break;
case HALF:
this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length, 0).asShortPointer();
indexer = ShortIndexer.create((ShortPointer) pointer);
break;
case INT:
this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length, 0).asIntPointer();
indexer = IntIndexer.create((IntPointer) pointer);
break;
default:
throw new UnsupportedOperationException();
}
CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(allocationPoint.getDevicePointer(), 0, length * elementSize, 0, context.getSpecialStream());
if (old.isActualOnDeviceSide()) {
NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(allocationPoint.getDevicePointer(), old.getDevicePointer(), this.length * elementSize, CudaConstants.cudaMemcpyDeviceToDevice, context.getSpecialStream());
} else if (old.isActualOnHostSide()) {
NativeOpsHolder.getInstance().getDeviceNativeOps().memcpyAsync(allocationPoint.getDevicePointer(), old.getHostPointer(), this.length * elementSize, CudaConstants.cudaMemcpyHostToDevice, context.getSpecialStream());
}
context.getSpecialStream().synchronize();
allocationPoint.tickDeviceWrite();
// we're keeping pointer reference for JVM
pointer.address();
// this.length = length;
if (isAttached()) {
// do nothing here, that's workspaces
} else {
AtomicAllocator.getInstance().freeMemory(old);
}
return this;
}
use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.
the class BaseCudaDataBuffer method read.
@Override
public void read(DataInputStream s) {
try {
// log.info("Restoring CUDA databuffer");
// skip allocationMode
s.readUTF();
allocationMode = AllocationMode.JAVACPP;
int locLength = s.readInt();
boolean reallocate = locLength != length || indexer == null;
length = locLength;
Type t = Type.valueOf(s.readUTF());
// log.info("Restoring buffer ["+t+"] of length ["+ length+"]");
if (globalType == null && Nd4j.dataType() != null) {
globalType = Nd4j.dataType();
}
if (t != globalType && t != Type.INT && Nd4j.sizeOfDataType(globalType) < Nd4j.sizeOfDataType(t)) {
log.warn("Loading a data stream with opType different from what is set globally. Expect precision loss");
if (globalType == Type.INT)
log.warn("Int to float/double widening UNSUPPORTED!!!");
}
if (t == Type.COMPRESSED) {
type = t;
return;
} else if (t == Type.INT || globalType == Type.INT) {
this.elementSize = 4;
this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, t), false);
this.trackingPoint = allocationPoint.getObjectId();
// we keep int buffer's dtype after ser/de
this.type = t;
this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length).asIntPointer();
indexer = IntIndexer.create((IntPointer) pointer);
IntIndexer Iindexer = (IntIndexer) indexer;
int[] array = new int[(int) length];
for (int i = 0; i < length(); i++) {
if (t == Type.INT)
// array[i] = s.readInt();
Iindexer.put(i, s.readInt());
else if (t == Type.DOUBLE)
Iindexer.put(i, (int) s.readDouble());
else if (t == Type.FLOAT)
Iindexer.put(i, (int) s.readFloat());
else if (t == Type.HALF)
Iindexer.put(i, (int) toFloat((int) s.readShort()));
}
allocationPoint.tickHostWrite();
} else if (globalType == Type.DOUBLE) {
this.elementSize = 8;
if (reallocate) {
MemoryWorkspace workspace = Nd4j.getMemoryManager().getCurrentWorkspace();
if (workspace != null && (workspace instanceof DummyWorkspace)) {
this.attached = true;
this.parentWorkspace = workspace;
workspaceGenerationId = workspace.getGenerationId();
}
this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, globalType), false);
// allocationPoint.attachBuffer(this);
this.trackingPoint = allocationPoint.getObjectId();
this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length).asDoublePointer();
indexer = DoubleIndexer.create((DoublePointer) pointer);
}
DoubleIndexer Dindexer = (DoubleIndexer) indexer;
for (int i = 0; i < length(); i++) {
if (t == Type.DOUBLE)
Dindexer.put(i, s.readDouble());
else if (t == Type.FLOAT)
Dindexer.put(i, (double) s.readFloat());
else if (t == Type.HALF)
Dindexer.put(i, (double) toFloat((int) s.readShort()));
}
allocationPoint.tickHostWrite();
} else if (globalType == Type.FLOAT) {
this.elementSize = 4;
if (reallocate) {
this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, dataType()), false);
this.trackingPoint = allocationPoint.getObjectId();
this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length).asFloatPointer();
indexer = FloatIndexer.create((FloatPointer) pointer);
}
FloatIndexer Findexer = (FloatIndexer) indexer;
for (int i = 0; i < length; i++) {
if (t == Type.DOUBLE)
Findexer.put(i, (float) s.readDouble());
else if (t == Type.FLOAT)
Findexer.put(i, s.readFloat());
else if (t == Type.HALF) {
Findexer.put(i, toFloat((int) s.readShort()));
}
}
allocationPoint.tickHostWrite();
} else if (globalType == Type.HALF) {
this.elementSize = 2;
if (reallocate) {
this.allocationPoint = AtomicAllocator.getInstance().allocateMemory(this, new AllocationShape(length, elementSize, dataType()), false);
this.trackingPoint = allocationPoint.getObjectId();
this.pointer = new CudaPointer(allocationPoint.getPointers().getHostPointer(), length).asShortPointer();
indexer = HalfIndexer.create((ShortPointer) this.pointer);
}
HalfIndexer Hindexer = (HalfIndexer) indexer;
for (int i = 0; i < length; i++) {
if (t == Type.DOUBLE)
Hindexer.put(i, (float) s.readDouble());
else if (t == Type.FLOAT)
Hindexer.put(i, s.readFloat());
else if (t == Type.HALF) {
Hindexer.put(i, toFloat((int) s.readShort()));
}
}
// for HALF & HALF2 datatype we just tag data as fresh on host
allocationPoint.tickHostWrite();
} else
throw new IllegalStateException("Unknown dataType: [" + t.toString() + "]");
/*
this.wrappedBuffer = this.pointer.asByteBuffer();
this.wrappedBuffer.order(ByteOrder.nativeOrder());
*/
} catch (Exception e) {
throw new RuntimeException(e);
}
// we call sync to copyback data to host
AtomicAllocator.getInstance().getFlowController().synchronizeToDevice(allocationPoint);
// allocator.synchronizeHostData(this);
}
use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.
the class AllocationUtilsTest method testGetRequiredMemory1.
@Test
public void testGetRequiredMemory1() throws Exception {
AllocationShape shape = new AllocationShape();
shape.setOffset(0);
shape.setLength(10);
shape.setStride(1);
shape.setDataType(DataBuffer.Type.DOUBLE);
assertEquals(80, AllocationUtils.getRequiredMemory(shape));
}
use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.
the class CudaFullCachingProviderTest method testPurge1.
@Test
public void testPurge1() throws Exception {
for (int x = 0; x < 10000; x++) {
INDArray array = Nd4j.create(10, 10);
}
INDArray array = Nd4j.create(10, 10);
AllocationShape shape = AtomicAllocator.getInstance().getAllocationPoint(array).getShape();
System.gc();
System.gc();
Thread.sleep(1000);
CudaFullCachingProvider provider = (CudaFullCachingProvider) AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider();
log.info("Shapes in cache: {}", provider.deviceCache.get(0).get(shape).size());
assertTrue(provider.deviceCache.get(0).get(shape).size() > 0);
provider.purgeCache();
assertTrue(provider.deviceCache.get(0).get(shape).size() == 0);
}
Aggregations