use of org.nd4j.linalg.api.memory.pointers.PagedPointer in project nd4j by deeplearning4j.
the class CudaWorkspace method alloc.
@Override
public PagedPointer alloc(long requiredMemory, MemoryKind kind, DataBuffer.Type type, boolean initialize) {
long numElements = requiredMemory / Nd4j.sizeOfDataType(type);
if (!isUsed.get()) {
if (disabledCounter.incrementAndGet() % 10 == 0)
log.warn("Worskpace was turned off, and wasn't enabled after {} allocations", disabledCounter.get());
if (kind == MemoryKind.DEVICE) {
PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
externalAllocations.add(new PointersPair(null, pointer));
return pointer;
} else {
PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
externalAllocations.add(new PointersPair(pointer, null));
return pointer;
}
}
long div = requiredMemory % 8;
if (div != 0)
requiredMemory += div;
boolean trimmer = (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && requiredMemory + cycleAllocations.get() > initialBlockSize.get() && initialBlockSize.get() > 0 && kind == MemoryKind.DEVICE) || trimmedMode.get();
if (trimmer && workspaceConfiguration.getPolicySpill() == SpillPolicy.REALLOCATE && !trimmedMode.get()) {
trimmedMode.set(true);
trimmedStep.set(stepsCount.get());
}
if (kind == MemoryKind.DEVICE) {
if (deviceOffset.get() + requiredMemory <= currentSize.get() && !trimmer) {
cycleAllocations.addAndGet(requiredMemory);
long prevOffset = deviceOffset.getAndAdd(requiredMemory);
if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY)
return null;
PagedPointer ptr = workspace.getDevicePointer().withOffset(prevOffset, numElements);
if (isDebug.get())
log.info("Workspace [{}] device_{}: alloc array of {} bytes, capacity of {} elements; prevOffset: {}; newOffset: {}; size: {}; address: {}", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements, prevOffset, deviceOffset.get(), currentSize.get(), ptr.address());
if (initialize) {
// CudaContext context = AtomicAllocator.getInstance().getMemoryHandler().getCudaContext();
CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
int ret = NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(ptr, 0, requiredMemory, 0, context.getSpecialStream());
if (ret == 0)
throw new ND4JIllegalStateException("memset failed device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread());
context.syncSpecialStream();
}
return ptr;
} else {
// spill
if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && currentSize.get() > 0 && !trimmer) {
// log.info("End of space reached. Current offset: {}; requiredMemory: {}", deviceOffset.get(), requiredMemory);
reset();
resetPlanned.set(true);
return alloc(requiredMemory, kind, type, initialize);
}
if (!trimmer)
spilledAllocationsSize.addAndGet(requiredMemory);
else
pinnedAllocationsSize.addAndGet(requiredMemory);
if (isDebug.get()) {
log.info("Workspace [{}] device_{}: spilled DEVICE array of {} bytes, capacity of {} elements", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements);
}
// Nd4j.getWorkspaceManager().printAllocationStatisticsForCurrentThread();
AllocationShape shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
cycleAllocations.addAndGet(requiredMemory);
if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY)
return null;
switch(workspaceConfiguration.getPolicySpill()) {
case REALLOCATE:
case EXTERNAL:
if (!trimmer) {
externalCount.incrementAndGet();
//
// AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
// pointer.setLeaked(true);
pointer.isLeaked();
externalAllocations.add(new PointersPair(null, pointer));
return pointer;
} else {
pinnedCount.incrementAndGet();
//
// AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
// pointer.setLeaked(true);
pointer.isLeaked();
pinnedAllocations.add(new PointersPair(stepsCount.get(), requiredMemory, null, pointer));
return pointer;
}
case FAIL:
default:
{
throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
}
}
}
} else if (kind == MemoryKind.HOST) {
if (hostOffset.get() + requiredMemory <= currentSize.get() && !trimmer) {
long prevOffset = hostOffset.getAndAdd(requiredMemory);
PagedPointer ptr = workspace.getHostPointer().withOffset(prevOffset, numElements);
if (initialize)
Pointer.memset(ptr, 0, requiredMemory);
return ptr;
} else {
// log.info("Spilled HOST array of {} bytes, capacity of {} elements", requiredMemory, numElements);
AllocationShape shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
switch(workspaceConfiguration.getPolicySpill()) {
case REALLOCATE:
case EXTERNAL:
if (!trimmer) {
// memoryManager.allocate(requiredMemory, MemoryKind.HOST, true)
// AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
// pointer.setLeaked(true);
externalAllocations.add(new PointersPair(pointer, null));
return pointer;
} else {
// AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
// pointer.setLeaked(true);
pointer.isLeaked();
pinnedAllocations.add(new PointersPair(stepsCount.get(), 0L, pointer, null));
return pointer;
}
case FAIL:
default:
{
throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
}
}
}
} else
throw new ND4JIllegalStateException("Unknown MemoryKind was passed in: " + kind);
// throw new ND4JIllegalStateException("Shouldn't ever reach this line");
}
use of org.nd4j.linalg.api.memory.pointers.PagedPointer in project nd4j by deeplearning4j.
the class AtomicAllocator method allocateMemory.
/**
* This method allocates required chunk of memory in specific location
* <p>
* PLEASE NOTE: Do not use this method, unless you're 100% sure what you're doing
*
* @param requiredMemory
* @param location
*/
@Override
public AllocationPoint allocateMemory(DataBuffer buffer, AllocationShape requiredMemory, AllocationStatus location, boolean initialize) {
AllocationPoint point = new AllocationPoint();
useTracker.set(System.currentTimeMillis());
// we use these longs as tracking codes for memory tracking
Long allocId = objectsTracker.getAndIncrement();
// point.attachBuffer(buffer);
point.setObjectId(allocId);
point.setShape(requiredMemory);
/*
if (buffer instanceof CudaIntDataBuffer) {
buffer.setConstant(true);
point.setConstant(true);
}
*/
int numBuckets = configuration.getNumberOfGcThreads();
int bucketId = RandomUtils.nextInt(0, numBuckets);
GarbageBufferReference reference = new GarbageBufferReference((BaseDataBuffer) buffer, queueMap.get(bucketId), point);
point.attachReference(reference);
point.setDeviceId(-1);
if (buffer.isAttached()) {
long reqMem = AllocationUtils.getRequiredMemory(requiredMemory);
// log.info("Allocating {} bytes from attached memory...", reqMem);
// workaround for init order
getMemoryHandler().getCudaContext();
point.setDeviceId(Nd4j.getAffinityManager().getDeviceForCurrentThread());
CudaWorkspace workspace = (CudaWorkspace) Nd4j.getMemoryManager().getCurrentWorkspace();
PointersPair pair = new PointersPair();
PagedPointer ptrDev = workspace.alloc(reqMem, MemoryKind.DEVICE, requiredMemory.getDataType(), initialize);
PagedPointer ptrHost = workspace.alloc(reqMem, MemoryKind.HOST, requiredMemory.getDataType(), initialize);
pair.setHostPointer(ptrHost);
if (ptrDev != null) {
pair.setDevicePointer(ptrDev);
point.setAllocationStatus(AllocationStatus.DEVICE);
} else {
pair.setDevicePointer(ptrHost);
point.setAllocationStatus(AllocationStatus.HOST);
}
// if (!ptrDev.isLeaked())
point.setAttached(true);
point.setPointers(pair);
} else {
// we stay naive on PointersPair, we just don't know on this level, which pointers are set. MemoryHandler will be used for that
PointersPair pair = memoryHandler.alloc(location, point, requiredMemory, initialize);
point.setPointers(pair);
}
allocationsMap.put(allocId, point);
return point;
}
use of org.nd4j.linalg.api.memory.pointers.PagedPointer in project nd4j by deeplearning4j.
the class NativeOpExecutioner method calculateOutputShape.
@Override
public List<int[]> calculateOutputShape(@NonNull CustomOp op) {
val lc = op.opName().toLowerCase();
val hash = op.opHash();
val result = new ArrayList<int[]>();
if (op.numInputArguments() < 1) {
return Collections.emptyList();
}
val inputBuffers = new PointerPointer<>(op.numInputArguments());
val inputShapes = new PointerPointer<>(op.numInputArguments());
val inputArgs = op.inputArguments();
int cnt = 0;
for (val in : inputArgs) {
inputBuffers.put(cnt, in.data().addressPointer());
inputShapes.put(cnt++, in.shapeInfoDataBuffer().addressPointer());
}
val iArgs = op.numIArguments() > 0 ? new IntPointer(op.numIArguments()) : null;
cnt = 0;
val iArgs1 = op.iArgs();
for (val i : iArgs1) iArgs.put(cnt++, i);
if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
val tArgs = op.numTArguments() > 0 ? new FloatPointer(op.numTArguments()) : null;
val tArgs1 = op.tArgs();
cnt = 0;
for (val t : tArgs1) tArgs.put(cnt++, (float) t);
val ptrptr = (Nd4jCpu.ShapeList) loop.calculateOutputShapesFloat(null, hash, inputBuffers, inputShapes, op.numInputArguments(), tArgs, op.numTArguments(), iArgs, op.numIArguments());
if (ptrptr == null)
throw new RuntimeException();
for (int e = 0; e < ptrptr.size(); e++) result.add(getShapeFromPointer(new PagedPointer(ptrptr.at(e)).asIntPointer()));
loop.deleteShapeList(ptrptr);
} else if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
val tArgs = op.numTArguments() > 0 ? new DoublePointer(op.numTArguments()) : null;
cnt = 0;
val tArgs1 = op.tArgs();
for (val t : tArgs1) tArgs.put(cnt++, t);
val ptrptr = (Nd4jCpu.ShapeList) loop.calculateOutputShapesDouble(null, hash, inputBuffers, inputShapes, op.numInputArguments(), tArgs, op.numTArguments(), iArgs, op.numIArguments());
if (ptrptr == null)
throw new RuntimeException();
for (int e = 0; e < ptrptr.size(); e++) result.add(getShapeFromPointer(new PagedPointer(ptrptr.at(e)).asIntPointer()));
loop.deleteShapeList(ptrptr);
} else if (Nd4j.dataType() == DataBuffer.Type.HALF) {
val tArgs = op.numTArguments() > 0 ? new ShortPointer(op.numTArguments()) : null;
cnt = 0;
val tArgs1 = op.tArgs();
for (val t : tArgs1) tArgs.put(cnt++, ArrayUtil.toHalf(t));
val ptrptr = (Nd4jCpu.ShapeList) loop.calculateOutputShapesHalf(null, hash, inputBuffers, inputShapes, op.numInputArguments(), tArgs, op.numTArguments(), iArgs, op.numIArguments());
if (ptrptr == null)
throw new RuntimeException();
val numOutputs = getCustomOperations().get(lc).getNumOutputs();
for (int e = 0; e < ptrptr.size(); e++) result.add(getShapeFromPointer(new PagedPointer(ptrptr.at(e)).asIntPointer()));
loop.deleteShapeList(ptrptr);
}
return result;
}
use of org.nd4j.linalg.api.memory.pointers.PagedPointer in project nd4j by deeplearning4j.
the class CpuWorkspace method init.
@Override
protected void init() {
super.init();
if (workspaceConfiguration.getPolicyLocation() == LocationPolicy.RAM) {
if (currentSize.get() > 0) {
isInit.set(true);
if (isDebug.get())
log.info("Allocating [{}] workspace of {} bytes...", id, currentSize.get());
workspace.setHostPointer(new PagedPointer(memoryManager.allocate(currentSize.get() + SAFETY_OFFSET, MemoryKind.HOST, true)));
}
} else if (workspaceConfiguration.getPolicyLocation() == LocationPolicy.MMAP) {
long flen = tempFile.length();
mmap = NativeOpsHolder.getInstance().getDeviceNativeOps().mmapFile(null, tempFile.getAbsolutePath(), flen);
if (mmap == null)
throw new RuntimeException("MMAP failed");
workspace.setHostPointer(new PagedPointer(mmap.get(0)));
}
}
use of org.nd4j.linalg.api.memory.pointers.PagedPointer in project nd4j by deeplearning4j.
the class CudaWorkspace method init.
@Override
protected void init() {
if (workspaceConfiguration.getPolicyLocation() == LocationPolicy.MMAP) {
throw new ND4JIllegalStateException("CUDA do not support MMAP workspaces yet");
}
super.init();
if (currentSize.get() > 0) {
// log.info("Allocating {} bytes at DEVICE & HOST space...", currentSize.get());
isInit.set(true);
long bytes = currentSize.get();
if (isDebug.get())
log.info("Allocating [{}] workspace on device_{}, {} bytes...", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), bytes);
if (isDebug.get()) {
Nd4j.getWorkspaceManager().printAllocationStatisticsForCurrentThread();
}
Pointer ptr = memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.HOST, false);
if (ptr == null)
throw new ND4JIllegalStateException("Can't allocate memory for workspace");
workspace.setHostPointer(new PagedPointer(ptr));
if (workspaceConfiguration.getPolicyMirroring() != MirroringPolicy.HOST_ONLY)
workspace.setDevicePointer(new PagedPointer(memoryManager.allocate((bytes + SAFETY_OFFSET), MemoryKind.DEVICE, false)));
// log.info("Workspace [{}] initialized successfully", id);
}
}
Aggregations