Search in sources :

Example 6 with PagedPointer

use of org.nd4j.linalg.api.memory.pointers.PagedPointer in project nd4j by deeplearning4j.

the class CudaExecutioner method calculateOutputShape.

@Override
public List<int[]> calculateOutputShape(@NonNull CustomOp op) {
    Nd4j.getExecutioner().commit();
    val lc = op.opName().toLowerCase();
    val hash = op.opHash();
    val result = new ArrayList<int[]>();
    val inputBuffers = new PointerPointer<>(op.inputArguments().length);
    val inputShapes = new PointerPointer<>(op.inputArguments().length);
    int cnt = 0;
    for (val in : op.inputArguments()) {
        // NOT A TYPO: shape functions work on host side only
        inputBuffers.put(cnt, in.data().addressPointer());
        inputShapes.put(cnt++, in.shapeInfoDataBuffer().addressPointer());
    }
    val iArgs = op.iArgs().length > 0 ? new IntPointer(op.iArgs().length) : null;
    cnt = 0;
    for (val i : op.iArgs()) iArgs.put(cnt++, i);
    if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
        val tArgs = op.tArgs().length > 0 ? new FloatPointer(op.tArgs().length) : null;
        cnt = 0;
        for (val t : op.tArgs()) tArgs.put(cnt++, (float) t);
        val ptrptr = (Nd4jCuda.ShapeList) nativeOps.calculateOutputShapesFloat(null, hash, inputBuffers, inputShapes, op.inputArguments().length, tArgs, op.tArgs().length, iArgs, op.iArgs().length);
        if (ptrptr == null)
            throw new RuntimeException();
        for (int e = 0; e < ptrptr.size(); e++) result.add(getShapeFromPointer(new PagedPointer(ptrptr.at(e)).asIntPointer()));
        nativeOps.deleteShapeList(ptrptr);
    } else if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
        val tArgs = op.tArgs().length > 0 ? new DoublePointer(op.tArgs().length) : null;
        cnt = 0;
        for (val t : op.tArgs()) tArgs.put(cnt++, (float) t);
        val ptrptr = (Nd4jCuda.ShapeList) nativeOps.calculateOutputShapesDouble(null, hash, inputBuffers, inputShapes, op.inputArguments().length, tArgs, op.tArgs().length, iArgs, op.iArgs().length);
        if (ptrptr == null)
            throw new RuntimeException();
        for (int e = 0; e < ptrptr.size(); e++) result.add(getShapeFromPointer(new PagedPointer(ptrptr.at(e)).asIntPointer()));
        nativeOps.deleteShapeList(ptrptr);
    } else if (Nd4j.dataType() == DataBuffer.Type.HALF) {
        val tArgs = op.tArgs().length > 0 ? new ShortPointer(op.tArgs().length) : null;
        cnt = 0;
        for (val t : op.tArgs()) tArgs.put(cnt++, ArrayUtil.toHalf((float) t));
        val ptrptr = (Nd4jCuda.ShapeList) nativeOps.calculateOutputShapesHalf(null, hash, inputBuffers, inputShapes, op.inputArguments().length, tArgs, op.tArgs().length, iArgs, op.iArgs().length);
        if (ptrptr == null)
            throw new RuntimeException();
        for (int e = 0; e < ptrptr.size(); e++) result.add(getShapeFromPointer(new PagedPointer(ptrptr.at(e)).asIntPointer()));
        nativeOps.deleteShapeList(ptrptr);
    }
    return result;
}
Also used : lombok.val(lombok.val) Nd4jCuda(org.nd4j.nativeblas.Nd4jCuda) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint)

Example 7 with PagedPointer

use of org.nd4j.linalg.api.memory.pointers.PagedPointer in project nd4j by deeplearning4j.

the class NativeGraphExecutioner method executeGraph.

/**
 * This method executes given graph and returns results
 *
 * @param sd
 * @return
 */
@Override
public INDArray[] executeGraph(SameDiff sd, ExecutorConfiguration configuration) {
    Map<Integer, Node> intermediate = new HashMap<>();
    ByteBuffer buffer = convertToFlatBuffers(sd, configuration, intermediate);
    BytePointer bPtr = new BytePointer(buffer);
    log.info("Buffer length: {}", buffer.limit());
    Pointer res = NativeOpsHolder.getInstance().getDeviceNativeOps().executeFlatGraphFloat(null, bPtr);
    if (res == null)
        throw new ND4JIllegalStateException("Graph execution failed");
    // FIXME: this is BAD
    PagedPointer pagedPointer = new PagedPointer(res, 1024 * 1024L);
    FlatResult fr = FlatResult.getRootAsFlatResult(pagedPointer.asBytePointer().asByteBuffer());
    log.info("VarMap: {}", sd.variableMap());
    INDArray[] results = new INDArray[fr.variablesLength()];
    for (int e = 0; e < fr.variablesLength(); e++) {
        FlatVariable var = fr.variables(e);
        log.info("Var received: id: [{}:{}/<{}>];", var.id().first(), var.id().second(), var.name());
        FlatArray ndarray = var.ndarray();
        INDArray val = Nd4j.createFromFlatArray(ndarray);
        results[e] = val;
        if (var.name() != null && sd.variableMap().containsKey(var.name())) {
            // log.info("VarName: {}; Exists: {}; NDArrayInfo: {};", var.opName(), sd.variableMap().containsKey(var.opName()), sd.getVertexToArray().containsKey(var.opName()));
            // log.info("storing: {}; array: {}", var.name(), val);
            sd.associateArrayWithVariable(val, sd.variableMap().get(var.name()));
        } else {
            // log.info("Original id: {}; out: {}; out2: {}", original, sd.getVertexIdxToInfo().get(original), graph.getVariableForVertex(original));
            if (sd.variableMap().get(var.name()) != null) {
                sd.associateArrayWithVariable(val, sd.getVariable(var.name()));
            } else {
                throw new ND4JIllegalStateException("Unknown variable received as result: [" + var.name() + "]");
            }
        }
    }
    return results;
}
Also used : HashMap(java.util.HashMap) BytePointer(org.bytedeco.javacpp.BytePointer) BytePointer(org.bytedeco.javacpp.BytePointer) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer) Pointer(org.bytedeco.javacpp.Pointer) ByteBuffer(java.nio.ByteBuffer) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer) FlatArray(org.nd4j.graph.FlatArray) FlatVariable(org.nd4j.graph.FlatVariable) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) FlatResult(org.nd4j.graph.FlatResult)

Example 8 with PagedPointer

use of org.nd4j.linalg.api.memory.pointers.PagedPointer in project nd4j by deeplearning4j.

the class Nd4jWorkspace method alloc.

public PagedPointer alloc(long requiredMemory, MemoryKind kind, DataBuffer.Type type, boolean initialize) {
    /*
            just two options here:
            1) reqMem + hostOffset < totalSize, we just return pointer + offset
            2) go for either external spilled, or pinned allocation
         */
    // we enforce 8 byte alignment to ensure CUDA doesn't blame us
    long div = requiredMemory % 8;
    if (div != 0)
        requiredMemory += div;
    long numElements = requiredMemory / Nd4j.sizeOfDataType(type);
    // shortcut made to skip workspace
    if (!isUsed.get()) {
        if (disabledCounter.incrementAndGet() % 10 == 0)
            log.warn("Workspace was turned off, and wasn't enabled after {} allocations", disabledCounter.get());
        PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
        externalAllocations.add(new PointersPair(pointer, null));
        return pointer;
    }
    /*
            Trimmed mode is possible for cyclic workspace mode. Used in AsyncDataSetIterator, MQ, etc.
            Basically idea is simple: if one of datasets coming out of iterator has size higher then expected - we should reallocate workspace to match this size.
            So, we switch to trimmed mode, and all allocations will be "pinned", and eventually workspace will be reallocated.
         */
    boolean trimmer = (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && requiredMemory + cycleAllocations.get() > initialBlockSize.get() && initialBlockSize.get() > 0) || trimmedMode.get();
    if (trimmer && workspaceConfiguration.getPolicySpill() == SpillPolicy.REALLOCATE && !trimmedMode.get()) {
        trimmedMode.set(true);
        trimmedStep.set(stepsCount.get());
    }
    // if size is enough - allocate from workspace
    if (hostOffset.get() + requiredMemory <= currentSize.get() && !trimmer) {
        // just alignment to 8 bytes
        cycleAllocations.addAndGet(requiredMemory);
        long prevOffset = hostOffset.getAndAdd(requiredMemory);
        deviceOffset.set(hostOffset.get());
        PagedPointer ptr = workspace.getHostPointer().withOffset(prevOffset, numElements);
        if (isDebug.get())
            log.info("Workspace [{}]: Allocating array of {} bytes, capacity of {} elements, prevOffset: {}; currentOffset: {}; address: {}", id, requiredMemory, numElements, prevOffset, hostOffset.get(), ptr.address());
        if (initialize)
            Pointer.memset(ptr, 0, requiredMemory);
        return ptr;
    } else {
        // in case of circular mode - we just reset offsets, and start from the beginning of the workspace
        if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && currentSize.get() > 0 && !trimmer) {
            reset();
            resetPlanned.set(true);
            return alloc(requiredMemory, kind, type, initialize);
        }
        // updating respective counters
        if (!trimmer)
            spilledAllocationsSize.addAndGet(requiredMemory);
        else
            pinnedAllocationsSize.addAndGet(requiredMemory);
        if (isDebug.get())
            log.info("Workspace [{}]: step: {}, spilled  {} bytes, capacity of {} elements", id, stepsCount.get(), requiredMemory, numElements);
        switch(workspaceConfiguration.getPolicySpill()) {
            case REALLOCATE:
            case EXTERNAL:
                cycleAllocations.addAndGet(requiredMemory);
                if (!trimmer) {
                    externalCount.incrementAndGet();
                    PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
                    externalAllocations.add(new PointersPair(pointer, null));
                    return pointer;
                } else {
                    pinnedCount.incrementAndGet();
                    PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
                    pinnedAllocations.add(new PointersPair(stepsCount.get(), requiredMemory, pointer, null));
                    return pointer;
                }
            case FAIL:
            default:
                {
                    throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
                }
        }
    }
}
Also used : PointersPair(org.nd4j.linalg.api.memory.pointers.PointersPair) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer)

Aggregations

PagedPointer (org.nd4j.linalg.api.memory.pointers.PagedPointer)8 ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)4 lombok.val (lombok.val)2 Pointer (org.bytedeco.javacpp.Pointer)2 PointersPair (org.nd4j.linalg.api.memory.pointers.PointersPair)2 ByteBuffer (java.nio.ByteBuffer)1 HashMap (java.util.HashMap)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 BytePointer (org.bytedeco.javacpp.BytePointer)1 FlatArray (org.nd4j.graph.FlatArray)1 FlatResult (org.nd4j.graph.FlatResult)1 FlatVariable (org.nd4j.graph.FlatVariable)1 GarbageBufferReference (org.nd4j.jita.allocator.garbage.GarbageBufferReference)1 AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)1 AllocationShape (org.nd4j.jita.allocator.impl.AllocationShape)1 PointersPair (org.nd4j.jita.allocator.pointers.PointersPair)1 CudaWorkspace (org.nd4j.jita.workspace.CudaWorkspace)1 INDArray (org.nd4j.linalg.api.ndarray.INDArray)1 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)1 Nd4jCpu (org.nd4j.nativeblas.Nd4jCpu)1