use of org.nd4j.jita.allocator.pointers.PointersPair in project nd4j by deeplearning4j.
the class AtomicAllocator method allocateMemory.
/**
* This method allocates required chunk of memory in specific location
* <p>
* PLEASE NOTE: Do not use this method, unless you're 100% sure what you're doing
*
* @param requiredMemory
* @param location
*/
@Override
public AllocationPoint allocateMemory(DataBuffer buffer, AllocationShape requiredMemory, AllocationStatus location, boolean initialize) {
AllocationPoint point = new AllocationPoint();
useTracker.set(System.currentTimeMillis());
// we use these longs as tracking codes for memory tracking
Long allocId = objectsTracker.getAndIncrement();
// point.attachBuffer(buffer);
point.setObjectId(allocId);
point.setShape(requiredMemory);
/*
if (buffer instanceof CudaIntDataBuffer) {
buffer.setConstant(true);
point.setConstant(true);
}
*/
int numBuckets = configuration.getNumberOfGcThreads();
int bucketId = RandomUtils.nextInt(0, numBuckets);
GarbageBufferReference reference = new GarbageBufferReference((BaseDataBuffer) buffer, queueMap.get(bucketId), point);
point.attachReference(reference);
point.setDeviceId(-1);
if (buffer.isAttached()) {
long reqMem = AllocationUtils.getRequiredMemory(requiredMemory);
// log.info("Allocating {} bytes from attached memory...", reqMem);
// workaround for init order
getMemoryHandler().getCudaContext();
point.setDeviceId(Nd4j.getAffinityManager().getDeviceForCurrentThread());
CudaWorkspace workspace = (CudaWorkspace) Nd4j.getMemoryManager().getCurrentWorkspace();
PointersPair pair = new PointersPair();
PagedPointer ptrDev = workspace.alloc(reqMem, MemoryKind.DEVICE, requiredMemory.getDataType(), initialize);
PagedPointer ptrHost = workspace.alloc(reqMem, MemoryKind.HOST, requiredMemory.getDataType(), initialize);
pair.setHostPointer(ptrHost);
if (ptrDev != null) {
pair.setDevicePointer(ptrDev);
point.setAllocationStatus(AllocationStatus.DEVICE);
} else {
pair.setDevicePointer(ptrHost);
point.setAllocationStatus(AllocationStatus.HOST);
}
// if (!ptrDev.isLeaked())
point.setAttached(true);
point.setPointers(pair);
} else {
// we stay naive on PointersPair, we just don't know on this level, which pointers are set. MemoryHandler will be used for that
PointersPair pair = memoryHandler.alloc(location, point, requiredMemory, initialize);
point.setPointers(pair);
}
allocationsMap.put(allocId, point);
return point;
}
use of org.nd4j.jita.allocator.pointers.PointersPair in project nd4j by deeplearning4j.
the class CudaCachingZeroProvider method malloc.
/**
* This method provides PointersPair to memory chunk specified by AllocationShape
*
* PLEASE NOTE: This method can actually ignore malloc request, and give out previously cached free memory chunk with equal shape.
*
* @param shape shape of desired memory chunk
* @param point target AllocationPoint structure
* @param location either HOST or DEVICE
* @return
*/
@Override
public PointersPair malloc(AllocationShape shape, AllocationPoint point, AllocationStatus location) {
long reqMemory = AllocationUtils.getRequiredMemory(shape);
if (location == AllocationStatus.HOST && reqMemory < CudaEnvironment.getInstance().getConfiguration().getMaximumHostCacheableLength()) {
CacheHolder cache = zeroCache.get(shape);
if (cache != null) {
Pointer pointer = cache.poll();
if (pointer != null) {
cacheZeroHit.incrementAndGet();
// since this memory chunk is going to be used now, remove it's amount from
zeroCachedAmount.addAndGet(-1 * reqMemory);
PointersPair pair = new PointersPair();
pair.setDevicePointer(new CudaPointer(pointer.address()));
pair.setHostPointer(new CudaPointer(pointer.address()));
point.setAllocationStatus(AllocationStatus.HOST);
return pair;
}
}
cacheZeroMiss.incrementAndGet();
if (CudaEnvironment.getInstance().getConfiguration().isUsePreallocation() && zeroCachedAmount.get() < CudaEnvironment.getInstance().getConfiguration().getMaximumHostCache() / 10 && reqMemory < 16 * 1024 * 1024L) {
CachePreallocator preallocator = new CachePreallocator(shape, location, CudaEnvironment.getInstance().getConfiguration().getPreallocationCalls());
preallocator.start();
}
cacheZeroMiss.incrementAndGet();
return super.malloc(shape, point, location);
}
return super.malloc(shape, point, location);
}
use of org.nd4j.jita.allocator.pointers.PointersPair in project nd4j by deeplearning4j.
the class DelayedMemoryTest method testDelayedAllocation4.
@Test
public void testDelayedAllocation4() throws Exception {
INDArray array = Nd4j.create(new float[] { 1f, 2f, 3f, 4f, 5f });
AllocationPoint pointer = AtomicAllocator.getInstance().getAllocationPoint(array);
PointersPair pair = pointer.getPointers();
// pointers should be equal, device memory wasn't allocated yet
assertEquals(pair.getDevicePointer(), pair.getHostPointer());
assertEquals(2.0f, array.getFloat(1), 0.001f);
assertEquals(pair.getDevicePointer(), pair.getHostPointer());
String temp = System.getProperty("java.io.tmpdir");
String outPath = FilenameUtils.concat(temp, "dl4jtestserialization.bin");
try (DataOutputStream dos = new DataOutputStream(Files.newOutputStream(Paths.get(outPath)))) {
Nd4j.write(array, dos);
}
INDArray in;
try (DataInputStream dis = new DataInputStream(new FileInputStream(outPath))) {
in = Nd4j.read(dis);
}
assertEquals(AtomicAllocator.getInstance().getAllocationPoint(in).getPointers().getDevicePointer(), AtomicAllocator.getInstance().getAllocationPoint(in).getPointers().getHostPointer());
assertEquals(array, in);
}
use of org.nd4j.jita.allocator.pointers.PointersPair in project nd4j by deeplearning4j.
the class CudaZeroHandler method promoteObject.
/**
* This method moves specific object from zero-copy memory to device memory
*
* PLEASE NOTE: DO NOT EVER USE THIS METHOD MANUALLY, UNLESS YOU 100% HAVE TO
*
* @return
*/
@Override
public boolean promoteObject(DataBuffer buffer) {
AllocationPoint dstPoint = AtomicAllocator.getInstance().getAllocationPoint(buffer);
if (dstPoint.getAllocationStatus() != AllocationStatus.HOST)
return false;
if (configuration.getMemoryModel() == Configuration.MemoryModel.DELAYED && dstPoint.getAllocationStatus() == AllocationStatus.HOST) {
// if we have constant buffer (aka shapeInfo or other constant stuff)
if (buffer.isConstant()) {
Nd4j.getConstantHandler().moveToConstantSpace(buffer);
} else {
PointersPair pair = memoryProvider.malloc(dstPoint.getShape(), dstPoint, AllocationStatus.DEVICE);
if (pair != null) {
Integer deviceId = getDeviceId();
// log.info("Promoting object to device: [{}]", deviceId);
dstPoint.getPointers().setDevicePointer(pair.getDevicePointer());
dstPoint.setAllocationStatus(AllocationStatus.DEVICE);
deviceAllocations.get(deviceId).put(dstPoint.getObjectId(), dstPoint.getObjectId());
zeroAllocations.get(dstPoint.getBucketId()).remove(dstPoint.getObjectId());
deviceMemoryTracker.addToAllocation(Thread.currentThread().getId(), deviceId, AllocationUtils.getRequiredMemory(dstPoint.getShape()));
dstPoint.tickHostWrite();
} else
throw new RuntimeException("PewPew");
}
}
return true;
}
use of org.nd4j.jita.allocator.pointers.PointersPair in project nd4j by deeplearning4j.
the class CudaZeroHandler method alloc.
/**
* Allocate specified memory chunk on specified device/host
*
* @param targetMode valid arguments are DEVICE, ZERO
* @param shape
* @return
*/
@Override
public PointersPair alloc(AllocationStatus targetMode, AllocationPoint point, AllocationShape shape, boolean initialize) {
long reqMemory = AllocationUtils.getRequiredMemory(shape);
CudaContext context = getCudaContext();
switch(targetMode) {
case HOST:
{
if (zeroUseCounter.get() + reqMemory >= configuration.getMaximumZeroAllocation()) {
if (reqMemory > configuration.getMaximumZeroAllocation()) {
throw new IllegalStateException("You can't allocate more memory, then allowed with configured value: [" + configuration.getMaximumZeroAllocation() + "]");
}
while (zeroUseCounter.get() + reqMemory >= configuration.getMaximumZeroAllocation()) {
try {
log.warn("No available [HOST] memory, sleeping for a while...");
log.debug("Currently used: [" + zeroUseCounter.get() + "], allocated objects: [" + zeroAllocations.get(0) + "]");
Nd4j.getMemoryManager().invokeGc();
Thread.sleep(1000);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
PointersPair pair = memoryProvider.malloc(shape, point, targetMode);
if (initialize) {
org.bytedeco.javacpp.Pointer.memset(pair.getHostPointer(), 0, reqMemory);
point.tickHostWrite();
}
pickupHostAllocation(point);
return pair;
}
case DEVICE:
{
int deviceId = getDeviceId();
PointersPair returnPair = new PointersPair();
PointersPair tmpPair = new PointersPair();
// if the initial memory location is device, there's a chance we don't have zero memory allocated
if (point.getPointers() == null || point.getPointers().getHostPointer() == null) {
tmpPair = alloc(AllocationStatus.HOST, point, point.getShape(), initialize);
returnPair.setDevicePointer(tmpPair.getHostPointer());
returnPair.setHostPointer(tmpPair.getHostPointer());
point.setAllocationStatus(AllocationStatus.HOST);
point.setPointers(tmpPair);
}
/*
if (reqMemory < configuration.getMaximumSingleHostAllocation()
&& deviceMemoryTracker.getAllocatedSize(deviceId) + reqMemory < configuration
.getMaximumDeviceAllocation()) {
*/
if (deviceMemoryTracker.reserveAllocationIfPossible(Thread.currentThread().getId(), deviceId, reqMemory)) {
point.setDeviceId(deviceId);
PointersPair pair = memoryProvider.malloc(shape, point, targetMode);
if (pair != null) {
// log.info("PEWPEW");
returnPair.setDevicePointer(pair.getDevicePointer());
point.setAllocationStatus(AllocationStatus.DEVICE);
if (point.getPointers() == null)
throw new RuntimeException("WTF?");
point.getPointers().setDevicePointer(pair.getDevicePointer());
deviceAllocations.get(deviceId).put(point.getObjectId(), point.getObjectId());
val p = point.getBucketId();
if (p != null) {
val m = zeroAllocations.get(point.getBucketId());
// m can be null, if that's point from workspace - just no bucketId for it
if (m != null)
m.remove(point.getObjectId());
}
deviceMemoryTracker.addToAllocation(Thread.currentThread().getId(), deviceId, reqMemory);
// point.tickDeviceWrite();
point.tickHostWrite();
if (!initialize) {
point.tickDeviceWrite();
point.tickHostRead();
} else {
// CudaContext ctx = AtomicAllocator.getInstance().getFlowController().prepareAction(point);
nativeOps.memsetAsync(pair.getDevicePointer(), 0, reqMemory, 0, context.getSpecialStream());
context.getSpecialStream().synchronize();
point.tickDeviceWrite();
point.tickHostRead();
// AtomicAllocator.getInstance().getFlowController().registerAction(ctx, point);
}
} else {
log.warn("Out of [DEVICE] memory, host memory will be used instead: deviceId: [{}], requested bytes: [{}]", deviceId, reqMemory);
// if device memory allocation failed (aka returned NULL), keep using host memory instead
returnPair.setDevicePointer(tmpPair.getHostPointer());
point.setAllocationStatus(AllocationStatus.HOST);
Nd4j.getMemoryManager().invokeGc();
try {
Thread.sleep(100);
} catch (Exception e) {
}
}
} else {
log.warn("Hard limit on [DEVICE] memory hit, please consider tuning memory parameters, deviceId [{}]", deviceId);
Nd4j.getMemoryManager().invokeGc();
try {
Thread.sleep(100);
} catch (Exception e) {
}
}
return returnPair;
}
default:
throw new IllegalStateException("Can't allocate memory on target [" + targetMode + "]");
}
}
Aggregations