use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class GridExecutionerTest method testDupLocality2.
@Test
public void testDupLocality2() throws Exception {
INDArray array2 = Nd4j.createUninitialized(new int[] { 10, 10 }, 'c');
// ((GridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking();
AllocationPoint point2 = AtomicAllocator.getInstance().getAllocationPoint(array2);
assertEquals(true, point2.isActualOnDeviceSide());
assertEquals(true, point2.isActualOnHostSide());
}
use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class GridExecutionerTest method testDupLocality4.
@Test
public void testDupLocality4() throws Exception {
int nIn = 8;
int layerSize = 10;
int nOut = 4;
INDArray in = Nd4j.ones(1, 10).dup('c');
AllocationPoint point1 = AtomicAllocator.getInstance().getAllocationPoint(in);
assertEquals(true, point1.isEnqueued());
// assertEquals(1, ((GridExecutioner) Nd4j.getExecutioner()).getQueueLength());
INDArray out = Nd4j.zeros(1, 10).dup('c');
AllocationPoint point1A = AtomicAllocator.getInstance().getAllocationPoint(in);
AllocationPoint point2 = AtomicAllocator.getInstance().getAllocationPoint(out);
assertEquals(1, ((GridExecutioner) Nd4j.getExecutioner()).getQueueLength());
assertTrue(point1 == point1A);
assertEquals(true, point2.isEnqueued());
assertEquals(false, point1.isEnqueued());
assertEquals(Nd4j.ones(1, 10), in);
assertEquals(Nd4j.zeros(1, 10), out);
INDArray inCopy = in.dup('c');
AllocationPoint point3 = AtomicAllocator.getInstance().getAllocationPoint(inCopy);
assertEquals(false, point2.isEnqueued());
assertEquals(true, point3.isEnqueued());
assertEquals(true, point1.isEnqueued());
}
use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class GridExecutionerTest method testDupLocality3.
@Test
public void testDupLocality3() throws Exception {
INDArray array1 = Nd4j.create(new float[] { 1f, 1f, 1f, 1f, 1f });
INDArray exp1 = Nd4j.create(new float[] { 0f, 1f, 1f, 1f, 1f });
INDArray exp2 = Nd4j.create(new float[] { 1f, 1f, 1f, 1f, 1f });
INDArray array2 = array1.dup();
AllocationPoint point1 = AtomicAllocator.getInstance().getAllocationPoint(array1);
AllocationPoint point2 = AtomicAllocator.getInstance().getAllocationPoint(array2);
assertTrue(point1.isActualOnDeviceSide());
assertTrue(point2.isActualOnDeviceSide());
assertTrue(point1.isEnqueued());
assertTrue(point2.isEnqueued());
array1.putScalar(0, 0f);
assertEquals(0, ((GridExecutioner) Nd4j.getExecutioner()).getQueueLength());
assertFalse(point1.isActualOnDeviceSide());
assertEquals(exp1, array1);
assertEquals(exp2, array2);
}
use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class CudaMemoryManager method memset.
@Override
public void memset(INDArray array) {
if (array.isView()) {
array.assign(0.0);
// we don't want any mGRID activations here
Nd4j.getExecutioner().commit();
return;
}
// we want to be sure we have no trails left in mGRID
Nd4j.getExecutioner().push();
AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(array);
if (point.getAllocationStatus() == AllocationStatus.DEVICE) {
CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(AtomicAllocator.getInstance().getPointer(array, context), 0, array.data().length() * Nd4j.sizeOfDataType(array.data().dataType()), 0, context.getOldStream());
// better be safe then sorry
context.getOldStream().synchronize();
point.tickDeviceWrite();
} else if (point.getAllocationStatus() == AllocationStatus.HOST) {
Nd4j.getExecutioner().commit();
// just casual memset
Pointer.memset(AtomicAllocator.getInstance().getHostPointer(array), 0, array.data().length() * Nd4j.sizeOfDataType(array.data().dataType()));
point.tickHostWrite();
}
}
use of org.nd4j.jita.allocator.impl.AllocationPoint in project nd4j by deeplearning4j.
the class CudaFullCachingProvider method free.
/**
* This method frees specific chunk of memory, described by AllocationPoint passed in
*
* PLEASE NOTE: This method can actually ignore free, and keep released memory chunk for future reuse.
*
* @param point
*/
@Override
public void free(AllocationPoint point) {
if (point.getAllocationStatus() == AllocationStatus.DEVICE) {
if (point.isConstant())
return;
AllocationShape shape = point.getShape();
int deviceId = point.getDeviceId();
long address = point.getDevicePointer().address();
long reqMemory = AllocationUtils.getRequiredMemory(shape);
if (reqMemory > CudaEnvironment.getInstance().getConfiguration().getMaximumDeviceCacheableLength() || deviceCachedAmount.get(deviceId).get() >= CudaEnvironment.getInstance().getConfiguration().getMaximumHostCache()) {
// log.info("DEVICE_{} memory purging: {} bytes; MS: {}; MT: {}", deviceId, reqMemory, MAX_GPU_ALLOCATION, MAX_GPU_CACHE);
super.free(point);
return;
}
// log.info("Saving HOST memory into cache...");
ensureDeviceCacheHolder(deviceId, shape);
CacheHolder cache = deviceCache.get(deviceId).get(shape);
if (point.getDeviceId() != deviceId)
throw new RuntimeException("deviceId changed!");
// memory chunks < threshold will be cached no matter what
if (reqMemory <= FORCED_CACHE_THRESHOLD) {
cache.put(new CudaPointer(point.getDevicePointer().address()));
return;
} else {
long cacheEntries = cache.size();
long cacheHeight = deviceCache.get(deviceId).size();
// total memory allocated within this bucket
long cacheDepth = cacheEntries * reqMemory;
// if (cacheDepth < MAX_CACHED_MEMORY / cacheHeight) {
cache.put(new CudaPointer(point.getDevicePointer().address()));
return;
// } else {
// super.free(point);
// }
}
}
super.free(point);
}
Aggregations