use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.
the class CudaWorkspace method alloc.
@Override
public PagedPointer alloc(long requiredMemory, MemoryKind kind, DataBuffer.Type type, boolean initialize) {
long numElements = requiredMemory / Nd4j.sizeOfDataType(type);
if (!isUsed.get()) {
if (disabledCounter.incrementAndGet() % 10 == 0)
log.warn("Worskpace was turned off, and wasn't enabled after {} allocations", disabledCounter.get());
if (kind == MemoryKind.DEVICE) {
PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
externalAllocations.add(new PointersPair(null, pointer));
return pointer;
} else {
PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
externalAllocations.add(new PointersPair(pointer, null));
return pointer;
}
}
long div = requiredMemory % 8;
if (div != 0)
requiredMemory += div;
boolean trimmer = (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && requiredMemory + cycleAllocations.get() > initialBlockSize.get() && initialBlockSize.get() > 0 && kind == MemoryKind.DEVICE) || trimmedMode.get();
if (trimmer && workspaceConfiguration.getPolicySpill() == SpillPolicy.REALLOCATE && !trimmedMode.get()) {
trimmedMode.set(true);
trimmedStep.set(stepsCount.get());
}
if (kind == MemoryKind.DEVICE) {
if (deviceOffset.get() + requiredMemory <= currentSize.get() && !trimmer) {
cycleAllocations.addAndGet(requiredMemory);
long prevOffset = deviceOffset.getAndAdd(requiredMemory);
if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY)
return null;
PagedPointer ptr = workspace.getDevicePointer().withOffset(prevOffset, numElements);
if (isDebug.get())
log.info("Workspace [{}] device_{}: alloc array of {} bytes, capacity of {} elements; prevOffset: {}; newOffset: {}; size: {}; address: {}", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements, prevOffset, deviceOffset.get(), currentSize.get(), ptr.address());
if (initialize) {
// CudaContext context = AtomicAllocator.getInstance().getMemoryHandler().getCudaContext();
CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
int ret = NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(ptr, 0, requiredMemory, 0, context.getSpecialStream());
if (ret == 0)
throw new ND4JIllegalStateException("memset failed device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread());
context.syncSpecialStream();
}
return ptr;
} else {
// spill
if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && currentSize.get() > 0 && !trimmer) {
// log.info("End of space reached. Current offset: {}; requiredMemory: {}", deviceOffset.get(), requiredMemory);
reset();
resetPlanned.set(true);
return alloc(requiredMemory, kind, type, initialize);
}
if (!trimmer)
spilledAllocationsSize.addAndGet(requiredMemory);
else
pinnedAllocationsSize.addAndGet(requiredMemory);
if (isDebug.get()) {
log.info("Workspace [{}] device_{}: spilled DEVICE array of {} bytes, capacity of {} elements", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements);
}
// Nd4j.getWorkspaceManager().printAllocationStatisticsForCurrentThread();
AllocationShape shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
cycleAllocations.addAndGet(requiredMemory);
if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY)
return null;
switch(workspaceConfiguration.getPolicySpill()) {
case REALLOCATE:
case EXTERNAL:
if (!trimmer) {
externalCount.incrementAndGet();
//
// AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
// pointer.setLeaked(true);
pointer.isLeaked();
externalAllocations.add(new PointersPair(null, pointer));
return pointer;
} else {
pinnedCount.incrementAndGet();
//
// AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
// pointer.setLeaked(true);
pointer.isLeaked();
pinnedAllocations.add(new PointersPair(stepsCount.get(), requiredMemory, null, pointer));
return pointer;
}
case FAIL:
default:
{
throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
}
}
}
} else if (kind == MemoryKind.HOST) {
if (hostOffset.get() + requiredMemory <= currentSize.get() && !trimmer) {
long prevOffset = hostOffset.getAndAdd(requiredMemory);
PagedPointer ptr = workspace.getHostPointer().withOffset(prevOffset, numElements);
if (initialize)
Pointer.memset(ptr, 0, requiredMemory);
return ptr;
} else {
// log.info("Spilled HOST array of {} bytes, capacity of {} elements", requiredMemory, numElements);
AllocationShape shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
switch(workspaceConfiguration.getPolicySpill()) {
case REALLOCATE:
case EXTERNAL:
if (!trimmer) {
// memoryManager.allocate(requiredMemory, MemoryKind.HOST, true)
// AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
// pointer.setLeaked(true);
externalAllocations.add(new PointersPair(pointer, null));
return pointer;
} else {
// AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
// pointer.setLeaked(true);
pointer.isLeaked();
pinnedAllocations.add(new PointersPair(stepsCount.get(), 0L, pointer, null));
return pointer;
}
case FAIL:
default:
{
throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
}
}
}
} else
throw new ND4JIllegalStateException("Unknown MemoryKind was passed in: " + kind);
// throw new ND4JIllegalStateException("Shouldn't ever reach this line");
}
use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.
the class CudaFullCachingProvider method free.
/**
* This method frees specific chunk of memory, described by AllocationPoint passed in
*
* PLEASE NOTE: This method can actually ignore free, and keep released memory chunk for future reuse.
*
* @param point
*/
@Override
public void free(AllocationPoint point) {
if (point.getAllocationStatus() == AllocationStatus.DEVICE) {
if (point.isConstant())
return;
AllocationShape shape = point.getShape();
int deviceId = point.getDeviceId();
long address = point.getDevicePointer().address();
long reqMemory = AllocationUtils.getRequiredMemory(shape);
if (reqMemory > CudaEnvironment.getInstance().getConfiguration().getMaximumDeviceCacheableLength() || deviceCachedAmount.get(deviceId).get() >= CudaEnvironment.getInstance().getConfiguration().getMaximumHostCache()) {
// log.info("DEVICE_{} memory purging: {} bytes; MS: {}; MT: {}", deviceId, reqMemory, MAX_GPU_ALLOCATION, MAX_GPU_CACHE);
super.free(point);
return;
}
// log.info("Saving HOST memory into cache...");
ensureDeviceCacheHolder(deviceId, shape);
CacheHolder cache = deviceCache.get(deviceId).get(shape);
if (point.getDeviceId() != deviceId)
throw new RuntimeException("deviceId changed!");
// memory chunks < threshold will be cached no matter what
if (reqMemory <= FORCED_CACHE_THRESHOLD) {
cache.put(new CudaPointer(point.getDevicePointer().address()));
return;
} else {
long cacheEntries = cache.size();
long cacheHeight = deviceCache.get(deviceId).size();
// total memory allocated within this bucket
long cacheDepth = cacheEntries * reqMemory;
// if (cacheDepth < MAX_CACHED_MEMORY / cacheHeight) {
cache.put(new CudaPointer(point.getDevicePointer().address()));
return;
// } else {
// super.free(point);
// }
}
}
super.free(point);
}
use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.
the class AllocationUtilsTest method testGetRequiredMemory2.
@Test
public void testGetRequiredMemory2() throws Exception {
AllocationShape shape = new AllocationShape();
shape.setOffset(0);
shape.setLength(10);
shape.setStride(1);
shape.setDataType(DataBuffer.Type.FLOAT);
assertEquals(40, AllocationUtils.getRequiredMemory(shape));
}
use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.
the class AllocationUtilsTest method testGetRequiredMemory3.
@Test
public void testGetRequiredMemory3() throws Exception {
AllocationShape shape = new AllocationShape();
shape.setOffset(0);
shape.setLength(10);
shape.setStride(2);
shape.setDataType(DataBuffer.Type.FLOAT);
assertEquals(80, AllocationUtils.getRequiredMemory(shape));
}
use of org.nd4j.jita.allocator.impl.AllocationShape in project nd4j by deeplearning4j.
the class CudaDirectProviderTest method mallocDevice.
@Test
public void mallocDevice() throws Exception {
CudaDirectProvider provider = new CudaDirectProvider();
AllocationShape shape = new AllocationShape(300000, 4, DataBuffer.Type.FLOAT);
AllocationPoint point = new AllocationPoint();
point.setShape(shape);
point.setPointers(provider.malloc(shape, point, AllocationStatus.DEVICE));
System.out.println("Allocated...");
Thread.sleep(1000);
point.setAllocationStatus(AllocationStatus.DEVICE);
provider.free(point);
System.out.println("Deallocated...");
Thread.sleep(1000);
}
Aggregations