use of org.nd4j.linalg.api.memory.pointers.PagedPointer in project nd4j by deeplearning4j.
the class CudaExecutioner method calculateOutputShape.
@Override
public List<int[]> calculateOutputShape(@NonNull CustomOp op) {
Nd4j.getExecutioner().commit();
val lc = op.opName().toLowerCase();
val hash = op.opHash();
val result = new ArrayList<int[]>();
val inputBuffers = new PointerPointer<>(op.inputArguments().length);
val inputShapes = new PointerPointer<>(op.inputArguments().length);
int cnt = 0;
for (val in : op.inputArguments()) {
// NOT A TYPO: shape functions work on host side only
inputBuffers.put(cnt, in.data().addressPointer());
inputShapes.put(cnt++, in.shapeInfoDataBuffer().addressPointer());
}
val iArgs = op.iArgs().length > 0 ? new IntPointer(op.iArgs().length) : null;
cnt = 0;
for (val i : op.iArgs()) iArgs.put(cnt++, i);
if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
val tArgs = op.tArgs().length > 0 ? new FloatPointer(op.tArgs().length) : null;
cnt = 0;
for (val t : op.tArgs()) tArgs.put(cnt++, (float) t);
val ptrptr = (Nd4jCuda.ShapeList) nativeOps.calculateOutputShapesFloat(null, hash, inputBuffers, inputShapes, op.inputArguments().length, tArgs, op.tArgs().length, iArgs, op.iArgs().length);
if (ptrptr == null)
throw new RuntimeException();
for (int e = 0; e < ptrptr.size(); e++) result.add(getShapeFromPointer(new PagedPointer(ptrptr.at(e)).asIntPointer()));
nativeOps.deleteShapeList(ptrptr);
} else if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
val tArgs = op.tArgs().length > 0 ? new DoublePointer(op.tArgs().length) : null;
cnt = 0;
for (val t : op.tArgs()) tArgs.put(cnt++, (float) t);
val ptrptr = (Nd4jCuda.ShapeList) nativeOps.calculateOutputShapesDouble(null, hash, inputBuffers, inputShapes, op.inputArguments().length, tArgs, op.tArgs().length, iArgs, op.iArgs().length);
if (ptrptr == null)
throw new RuntimeException();
for (int e = 0; e < ptrptr.size(); e++) result.add(getShapeFromPointer(new PagedPointer(ptrptr.at(e)).asIntPointer()));
nativeOps.deleteShapeList(ptrptr);
} else if (Nd4j.dataType() == DataBuffer.Type.HALF) {
val tArgs = op.tArgs().length > 0 ? new ShortPointer(op.tArgs().length) : null;
cnt = 0;
for (val t : op.tArgs()) tArgs.put(cnt++, ArrayUtil.toHalf((float) t));
val ptrptr = (Nd4jCuda.ShapeList) nativeOps.calculateOutputShapesHalf(null, hash, inputBuffers, inputShapes, op.inputArguments().length, tArgs, op.tArgs().length, iArgs, op.iArgs().length);
if (ptrptr == null)
throw new RuntimeException();
for (int e = 0; e < ptrptr.size(); e++) result.add(getShapeFromPointer(new PagedPointer(ptrptr.at(e)).asIntPointer()));
nativeOps.deleteShapeList(ptrptr);
}
return result;
}
use of org.nd4j.linalg.api.memory.pointers.PagedPointer in project nd4j by deeplearning4j.
the class NativeGraphExecutioner method executeGraph.
/**
* This method executes given graph and returns results
*
* @param sd
* @return
*/
@Override
public INDArray[] executeGraph(SameDiff sd, ExecutorConfiguration configuration) {
Map<Integer, Node> intermediate = new HashMap<>();
ByteBuffer buffer = convertToFlatBuffers(sd, configuration, intermediate);
BytePointer bPtr = new BytePointer(buffer);
log.info("Buffer length: {}", buffer.limit());
Pointer res = NativeOpsHolder.getInstance().getDeviceNativeOps().executeFlatGraphFloat(null, bPtr);
if (res == null)
throw new ND4JIllegalStateException("Graph execution failed");
// FIXME: this is BAD
PagedPointer pagedPointer = new PagedPointer(res, 1024 * 1024L);
FlatResult fr = FlatResult.getRootAsFlatResult(pagedPointer.asBytePointer().asByteBuffer());
log.info("VarMap: {}", sd.variableMap());
INDArray[] results = new INDArray[fr.variablesLength()];
for (int e = 0; e < fr.variablesLength(); e++) {
FlatVariable var = fr.variables(e);
log.info("Var received: id: [{}:{}/<{}>];", var.id().first(), var.id().second(), var.name());
FlatArray ndarray = var.ndarray();
INDArray val = Nd4j.createFromFlatArray(ndarray);
results[e] = val;
if (var.name() != null && sd.variableMap().containsKey(var.name())) {
// log.info("VarName: {}; Exists: {}; NDArrayInfo: {};", var.opName(), sd.variableMap().containsKey(var.opName()), sd.getVertexToArray().containsKey(var.opName()));
// log.info("storing: {}; array: {}", var.name(), val);
sd.associateArrayWithVariable(val, sd.variableMap().get(var.name()));
} else {
// log.info("Original id: {}; out: {}; out2: {}", original, sd.getVertexIdxToInfo().get(original), graph.getVariableForVertex(original));
if (sd.variableMap().get(var.name()) != null) {
sd.associateArrayWithVariable(val, sd.getVariable(var.name()));
} else {
throw new ND4JIllegalStateException("Unknown variable received as result: [" + var.name() + "]");
}
}
}
return results;
}
use of org.nd4j.linalg.api.memory.pointers.PagedPointer in project nd4j by deeplearning4j.
the class Nd4jWorkspace method alloc.
public PagedPointer alloc(long requiredMemory, MemoryKind kind, DataBuffer.Type type, boolean initialize) {
/*
just two options here:
1) reqMem + hostOffset < totalSize, we just return pointer + offset
2) go for either external spilled, or pinned allocation
*/
// we enforce 8 byte alignment to ensure CUDA doesn't blame us
long div = requiredMemory % 8;
if (div != 0)
requiredMemory += div;
long numElements = requiredMemory / Nd4j.sizeOfDataType(type);
// shortcut made to skip workspace
if (!isUsed.get()) {
if (disabledCounter.incrementAndGet() % 10 == 0)
log.warn("Workspace was turned off, and wasn't enabled after {} allocations", disabledCounter.get());
PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
externalAllocations.add(new PointersPair(pointer, null));
return pointer;
}
/*
Trimmed mode is possible for cyclic workspace mode. Used in AsyncDataSetIterator, MQ, etc.
Basically idea is simple: if one of datasets coming out of iterator has size higher then expected - we should reallocate workspace to match this size.
So, we switch to trimmed mode, and all allocations will be "pinned", and eventually workspace will be reallocated.
*/
boolean trimmer = (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && requiredMemory + cycleAllocations.get() > initialBlockSize.get() && initialBlockSize.get() > 0) || trimmedMode.get();
if (trimmer && workspaceConfiguration.getPolicySpill() == SpillPolicy.REALLOCATE && !trimmedMode.get()) {
trimmedMode.set(true);
trimmedStep.set(stepsCount.get());
}
// if size is enough - allocate from workspace
if (hostOffset.get() + requiredMemory <= currentSize.get() && !trimmer) {
// just alignment to 8 bytes
cycleAllocations.addAndGet(requiredMemory);
long prevOffset = hostOffset.getAndAdd(requiredMemory);
deviceOffset.set(hostOffset.get());
PagedPointer ptr = workspace.getHostPointer().withOffset(prevOffset, numElements);
if (isDebug.get())
log.info("Workspace [{}]: Allocating array of {} bytes, capacity of {} elements, prevOffset: {}; currentOffset: {}; address: {}", id, requiredMemory, numElements, prevOffset, hostOffset.get(), ptr.address());
if (initialize)
Pointer.memset(ptr, 0, requiredMemory);
return ptr;
} else {
// in case of circular mode - we just reset offsets, and start from the beginning of the workspace
if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && currentSize.get() > 0 && !trimmer) {
reset();
resetPlanned.set(true);
return alloc(requiredMemory, kind, type, initialize);
}
// updating respective counters
if (!trimmer)
spilledAllocationsSize.addAndGet(requiredMemory);
else
pinnedAllocationsSize.addAndGet(requiredMemory);
if (isDebug.get())
log.info("Workspace [{}]: step: {}, spilled {} bytes, capacity of {} elements", id, stepsCount.get(), requiredMemory, numElements);
switch(workspaceConfiguration.getPolicySpill()) {
case REALLOCATE:
case EXTERNAL:
cycleAllocations.addAndGet(requiredMemory);
if (!trimmer) {
externalCount.incrementAndGet();
PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
externalAllocations.add(new PointersPair(pointer, null));
return pointer;
} else {
pinnedCount.incrementAndGet();
PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
pinnedAllocations.add(new PointersPair(stepsCount.get(), requiredMemory, pointer, null));
return pointer;
}
case FAIL:
default:
{
throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
}
}
}
}
Aggregations