Search in sources :

Example 36 with ND4JIllegalStateException

use of org.nd4j.linalg.exception.ND4JIllegalStateException in project nd4j by deeplearning4j.

the class CudaZeroHandler method relocateObject.

@Override
public synchronized void relocateObject(DataBuffer buffer) {
    AllocationPoint dstPoint = AtomicAllocator.getInstance().getAllocationPoint(buffer);
    // we don't relocate non-DEVICE buffers (i.e HOST or CONSTANT)
    if (dstPoint.getAllocationStatus() != AllocationStatus.DEVICE)
        return;
    int deviceId = getDeviceId();
    if (dstPoint.getDeviceId() >= 0 && dstPoint.getDeviceId() == deviceId) {
        return;
    }
    // FIXME: cross-thread access, might cause problems
    if (!dstPoint.isActualOnHostSide())
        AtomicAllocator.getInstance().synchronizeHostData(buffer);
    if (!dstPoint.isActualOnHostSide())
        throw new RuntimeException("Buffer synchronization failed");
    if (buffer.isAttached() || dstPoint.isAttached()) {
        // if this buffer is Attached, we just relocate to new workspace
        MemoryWorkspace workspace = Nd4j.getMemoryManager().getCurrentWorkspace();
        if (workspace == null) {
            // if we're out of workspace, we should mark our buffer as detached, so gc will pick it up eventually
            alloc(AllocationStatus.DEVICE, dstPoint, dstPoint.getShape(), false);
            CudaContext context = getCudaContext();
            if (nativeOps.memcpyAsync(dstPoint.getDevicePointer(), dstPoint.getHostPointer(), buffer.length() * buffer.getElementSize(), 1, context.getSpecialStream()) == 0)
                throw new ND4JIllegalStateException("memcpyAsync failed");
            context.syncSpecialStream();
            // updating host pointer now
            alloc(AllocationStatus.HOST, dstPoint, dstPoint.getShape(), false);
            // marking it as detached
            dstPoint.setAttached(false);
            // marking it as proper on device
            dstPoint.tickHostRead();
            dstPoint.tickDeviceWrite();
        } else {
            // this call will automagically take care of workspaces, so it'll be either
            // log.info("Relocating to deviceId [{}], workspace [{}]...", deviceId, workspace.getId());
            BaseCudaDataBuffer nBuffer = (BaseCudaDataBuffer) Nd4j.createBuffer(buffer.length());
            Nd4j.getMemoryManager().memcpy(nBuffer, buffer);
            dstPoint.getPointers().setDevicePointer(nBuffer.getAllocationPoint().getDevicePointer());
            dstPoint.getPointers().setHostPointer(nBuffer.getAllocationPoint().getHostPointer());
            dstPoint.setDeviceId(deviceId);
            dstPoint.tickDeviceRead();
            dstPoint.tickHostRead();
        }
        return;
    }
    if (buffer.isConstant()) {
        // we can't relocate or modify buffers
        throw new RuntimeException("Can't relocateObject() for constant buffer");
    } else {
        // log.info("Free relocateObject: deviceId: {}, pointer: {}", deviceId, dstPoint.getPointers().getDevicePointer().address());
        memoryProvider.free(dstPoint);
        deviceMemoryTracker.subFromAllocation(Thread.currentThread().getId(), dstPoint.getDeviceId(), AllocationUtils.getRequiredMemory(dstPoint.getShape()));
        // we replace original device pointer with new one
        alloc(AllocationStatus.DEVICE, dstPoint, dstPoint.getShape(), false);
        // log.info("Pointer after alloc: {}", dstPoint.getPointers().getDevicePointer().address());
        CudaContext context = getCudaContext();
        if (nativeOps.memcpyAsync(dstPoint.getDevicePointer(), dstPoint.getHostPointer(), buffer.length() * buffer.getElementSize(), 1, context.getSpecialStream()) == 0)
            throw new ND4JIllegalStateException("memcpyAsync failed");
        context.syncSpecialStream();
        dstPoint.tickDeviceRead();
        dstPoint.tickHostRead();
    }
}
Also used : CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) BaseCudaDataBuffer(org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) MemoryWorkspace(org.nd4j.linalg.api.memory.MemoryWorkspace) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint)

Example 37 with ND4JIllegalStateException

use of org.nd4j.linalg.exception.ND4JIllegalStateException in project nd4j by deeplearning4j.

the class CudaWorkspace method alloc.

@Override
public PagedPointer alloc(long requiredMemory, MemoryKind kind, DataBuffer.Type type, boolean initialize) {
    long numElements = requiredMemory / Nd4j.sizeOfDataType(type);
    if (!isUsed.get()) {
        if (disabledCounter.incrementAndGet() % 10 == 0)
            log.warn("Worskpace was turned off, and wasn't enabled after {} allocations", disabledCounter.get());
        if (kind == MemoryKind.DEVICE) {
            PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
            externalAllocations.add(new PointersPair(null, pointer));
            return pointer;
        } else {
            PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
            externalAllocations.add(new PointersPair(pointer, null));
            return pointer;
        }
    }
    long div = requiredMemory % 8;
    if (div != 0)
        requiredMemory += div;
    boolean trimmer = (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && requiredMemory + cycleAllocations.get() > initialBlockSize.get() && initialBlockSize.get() > 0 && kind == MemoryKind.DEVICE) || trimmedMode.get();
    if (trimmer && workspaceConfiguration.getPolicySpill() == SpillPolicy.REALLOCATE && !trimmedMode.get()) {
        trimmedMode.set(true);
        trimmedStep.set(stepsCount.get());
    }
    if (kind == MemoryKind.DEVICE) {
        if (deviceOffset.get() + requiredMemory <= currentSize.get() && !trimmer) {
            cycleAllocations.addAndGet(requiredMemory);
            long prevOffset = deviceOffset.getAndAdd(requiredMemory);
            if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY)
                return null;
            PagedPointer ptr = workspace.getDevicePointer().withOffset(prevOffset, numElements);
            if (isDebug.get())
                log.info("Workspace [{}] device_{}: alloc array of {} bytes, capacity of {} elements; prevOffset: {}; newOffset: {}; size: {}; address: {}", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements, prevOffset, deviceOffset.get(), currentSize.get(), ptr.address());
            if (initialize) {
                // CudaContext context = AtomicAllocator.getInstance().getMemoryHandler().getCudaContext();
                CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
                int ret = NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(ptr, 0, requiredMemory, 0, context.getSpecialStream());
                if (ret == 0)
                    throw new ND4JIllegalStateException("memset failed device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread());
                context.syncSpecialStream();
            }
            return ptr;
        } else {
            // spill
            if (workspaceConfiguration.getPolicyReset() == ResetPolicy.ENDOFBUFFER_REACHED && currentSize.get() > 0 && !trimmer) {
                // log.info("End of space reached. Current offset: {}; requiredMemory: {}", deviceOffset.get(), requiredMemory);
                reset();
                resetPlanned.set(true);
                return alloc(requiredMemory, kind, type, initialize);
            }
            if (!trimmer)
                spilledAllocationsSize.addAndGet(requiredMemory);
            else
                pinnedAllocationsSize.addAndGet(requiredMemory);
            if (isDebug.get()) {
                log.info("Workspace [{}] device_{}: spilled DEVICE array of {} bytes, capacity of {} elements", id, Nd4j.getAffinityManager().getDeviceForCurrentThread(), requiredMemory, numElements);
            }
            // Nd4j.getWorkspaceManager().printAllocationStatisticsForCurrentThread();
            AllocationShape shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
            cycleAllocations.addAndGet(requiredMemory);
            if (workspaceConfiguration.getPolicyMirroring() == MirroringPolicy.HOST_ONLY)
                return null;
            switch(workspaceConfiguration.getPolicySpill()) {
                case REALLOCATE:
                case EXTERNAL:
                    if (!trimmer) {
                        externalCount.incrementAndGet();
                        // 
                        // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
                        PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
                        // pointer.setLeaked(true);
                        pointer.isLeaked();
                        externalAllocations.add(new PointersPair(null, pointer));
                        return pointer;
                    } else {
                        pinnedCount.incrementAndGet();
                        // 
                        // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
                        PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.DEVICE, initialize), numElements);
                        // pointer.setLeaked(true);
                        pointer.isLeaked();
                        pinnedAllocations.add(new PointersPair(stepsCount.get(), requiredMemory, null, pointer));
                        return pointer;
                    }
                case FAIL:
                default:
                    {
                        throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
                    }
            }
        }
    } else if (kind == MemoryKind.HOST) {
        if (hostOffset.get() + requiredMemory <= currentSize.get() && !trimmer) {
            long prevOffset = hostOffset.getAndAdd(requiredMemory);
            PagedPointer ptr = workspace.getHostPointer().withOffset(prevOffset, numElements);
            if (initialize)
                Pointer.memset(ptr, 0, requiredMemory);
            return ptr;
        } else {
            // log.info("Spilled HOST array of {} bytes, capacity of {} elements", requiredMemory, numElements);
            AllocationShape shape = new AllocationShape(requiredMemory / Nd4j.sizeOfDataType(type), Nd4j.sizeOfDataType(type), type);
            switch(workspaceConfiguration.getPolicySpill()) {
                case REALLOCATE:
                case EXTERNAL:
                    if (!trimmer) {
                        // memoryManager.allocate(requiredMemory, MemoryKind.HOST, true)
                        // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
                        PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
                        // pointer.setLeaked(true);
                        externalAllocations.add(new PointersPair(pointer, null));
                        return pointer;
                    } else {
                        // AtomicAllocator.getInstance().getMemoryHandler().getMemoryProvider().malloc(shape, null, AllocationStatus.DEVICE).getDevicePointer()
                        PagedPointer pointer = new PagedPointer(memoryManager.allocate(requiredMemory, MemoryKind.HOST, initialize), numElements);
                        // pointer.setLeaked(true);
                        pointer.isLeaked();
                        pinnedAllocations.add(new PointersPair(stepsCount.get(), 0L, pointer, null));
                        return pointer;
                    }
                case FAIL:
                default:
                    {
                        throw new ND4JIllegalStateException("Can't allocate memory: Workspace is full");
                    }
            }
        }
    } else
        throw new ND4JIllegalStateException("Unknown MemoryKind was passed in: " + kind);
// throw new ND4JIllegalStateException("Shouldn't ever reach this line");
}
Also used : AllocationShape(org.nd4j.jita.allocator.impl.AllocationShape) PointersPair(org.nd4j.linalg.api.memory.pointers.PointersPair) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer)

Example 38 with ND4JIllegalStateException

use of org.nd4j.linalg.exception.ND4JIllegalStateException in project nd4j by deeplearning4j.

the class CudaExecutioner method invoke.

protected CudaContext invoke(Accumulation op, int[] dimension) {
    long st = profilingHookIn(op);
    checkForCompression(op);
    validateDataType(Nd4j.dataType(), op);
    if (extraz.get() == null)
        extraz.set(new PointerPointer(32));
    // dimension is ALWAYS null here.
    if (dimension == null)
        dimension = new int[] { Integer.MAX_VALUE };
    Arrays.sort(dimension);
    for (int i = 0; i < dimension.length; i++) if (dimension[i] >= op.x().rank() && dimension[i] != Integer.MAX_VALUE)
        throw new ND4JIllegalStateException("Op target dimension " + Arrays.toString(dimension) + " contains element that higher then rank of op.X: [" + op.x().rank() + "]");
    CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(op.z(), op.x(), op.y());
    if (CudaEnvironment.getInstance().getConfiguration().isDebug())
        lastOp.set(op.opName());
    Pointer hostYShapeInfo = op.y() == null ? null : AddressRetriever.retrieveHostPointer(op.y().shapeInfoDataBuffer());
    Pointer hostZShapeInfo = op.z() == null ? null : AddressRetriever.retrieveHostPointer(op.z().shapeInfoDataBuffer());
    Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(op.x(), dimension);
    Pointer hostTadShapeInfo = AddressRetriever.retrieveHostPointer(tadBuffers.getFirst());
    Pointer devTadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context);
    DataBuffer offsets = tadBuffers.getSecond();
    Pointer devTadOffsets = offsets == null ? null : AtomicAllocator.getInstance().getPointer(offsets, context);
    PointerPointer xShapeInfoHostPointer = extraz.get().put(AddressRetriever.retrieveHostPointer(op.x().shapeInfoDataBuffer()), context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer(), context.getBufferAllocation(), context.getBufferReduction(), context.getBufferScalar(), context.getBufferSpecial(), hostYShapeInfo, hostZShapeInfo, hostTadShapeInfo, devTadShapeInfo, devTadOffsets);
    if (op.y() != null) {
        Pair<DataBuffer, DataBuffer> yTadBuffers = tadManager.getTADOnlyShapeInfo(op.y(), dimension);
        Pointer yDevTadShapeInfo = AtomicAllocator.getInstance().getPointer(yTadBuffers.getFirst(), context);
        DataBuffer yOffsets = yTadBuffers.getSecond();
        Pointer yDevTadOffsets = yOffsets == null ? null : AtomicAllocator.getInstance().getPointer(yOffsets, context);
        xShapeInfoHostPointer.put(12, yDevTadShapeInfo);
        xShapeInfoHostPointer.put(13, yDevTadOffsets);
    }
    Pointer x = AtomicAllocator.getInstance().getPointer(op.x(), context);
    Pointer xShapeInfo = AtomicAllocator.getInstance().getPointer(op.x().shapeInfoDataBuffer(), context);
    Pointer extraArgs = op.extraArgs() != null ? AtomicAllocator.getInstance().getPointer(op.extraArgsDataBuff(), context) : null;
    int[] retShape = Shape.wholeArrayDimension(dimension) ? new int[] { 1, 1 } : ArrayUtil.removeIndex(op.x().shape(), dimension);
    // ensure vector is proper shape
    if (retShape.length == 1) {
        if (dimension[0] == 0)
            retShape = new int[] { 1, retShape[0] };
        else
            retShape = new int[] { retShape[0], 1 };
    } else if (retShape.length == 0) {
        retShape = new int[] { 1, 1 };
    }
    if (op.x().isVector() && op.x().length() == ArrayUtil.prod(retShape))
        return null;
    INDArray ret = null;
    if (0.0 + Math.abs(op.zeroDouble()) <= Nd4j.EPS_THRESHOLD) {
        ret = Nd4j.zeros(retShape);
    } else {
        if (op.x().data().dataType() == DataBuffer.Type.DOUBLE)
            ret = Nd4j.valueArrayOf(retShape, op.zeroDouble());
        else if (op.x().data().dataType() == DataBuffer.Type.FLOAT)
            ret = Nd4j.valueArrayOf(retShape, op.zeroFloat());
        else if (op.x().data().dataType() == DataBuffer.Type.HALF)
            ret = Nd4j.valueArrayOf(retShape, op.zeroHalf());
    }
    op.setZ(ret);
    if (op.z().isScalar()) {
        if (op.x().data().dataType() == DataBuffer.Type.DOUBLE) {
            if (op instanceof Variance) {
                double result = nativeOps.execSummaryStatsScalarDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, ((Variance) op).isBiasCorrected());
                op.setFinalResult(result);
            } else if (op.y() != null) {
                Pointer y = AtomicAllocator.getInstance().getPointer(op.y(), context);
                Pointer yShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
                double result = nativeOps.execReduce3ScalarDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) y, (IntPointer) yShapeInfo);
                op.setFinalResult(result);
            } else {
                double result = nativeOps.execReduceScalarDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs);
                op.setFinalResult(result);
            }
        } else if (op.x().data().dataType() == DataBuffer.Type.FLOAT) {
            if (op instanceof Variance) {
                float result = nativeOps.execSummaryStatsScalarFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, ((Variance) op).isBiasCorrected());
                op.setFinalResult(result);
            } else if (op.y() != null) {
                Pointer y = AtomicAllocator.getInstance().getPointer(op.y(), context);
                Pointer yShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
                float result = nativeOps.execReduce3ScalarFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) y, (IntPointer) yShapeInfo);
                op.setFinalResult(result);
            } else {
                float result = nativeOps.execReduceScalarFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs);
                op.setFinalResult(result);
            }
        } else {
            if (op instanceof Variance) {
                float result = nativeOps.execSummaryStatsScalarHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, ((Variance) op).isBiasCorrected());
                op.setFinalResult(result);
            } else if (op.y() != null) {
                Pointer y = AtomicAllocator.getInstance().getPointer(op.y(), context);
                Pointer yShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
                float result = nativeOps.execReduce3ScalarHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) y, (IntPointer) yShapeInfo);
                op.setFinalResult(result);
            } else {
                float result = nativeOps.execReduceScalarHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs);
                op.setFinalResult(result);
            }
        }
    } else {
        Pointer result = AtomicAllocator.getInstance().getPointer(op.z(), context);
        Pointer resultShapeInfo = AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context);
        Pointer dimensionPointer = AtomicAllocator.getInstance().getPointer(AtomicAllocator.getInstance().getConstantBuffer(dimension), // AtomicAllocator.getInstance().getPointer(Nd4j.createBuffer(dimension), context);
        context);
        if (op.x().data().dataType() == DataBuffer.Type.DOUBLE) {
            if (op.y() != null) {
                Pointer y = AtomicAllocator.getInstance().getPointer(op.y(), context);
                Pointer yShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
                nativeOps.execReduce3Double(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) y, (IntPointer) yShapeInfo, (DoublePointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length);
            } else {
                if (op instanceof Variance) {
                    nativeOps.execSummaryStatsDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length, ((Variance) op).isBiasCorrected());
                } else {
                    nativeOps.execReduceDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length);
                }
            }
        } else // float
        if (op.x().data().dataType() == DataBuffer.Type.FLOAT) {
            if (op.y() != null) {
                Pointer y = AtomicAllocator.getInstance().getPointer(op.y(), context);
                Pointer yShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
                nativeOps.execReduce3Float(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) y, (IntPointer) yShapeInfo, (FloatPointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length);
            } else {
                if (op instanceof Variance) {
                    nativeOps.execSummaryStatsFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length, ((Variance) op).isBiasCorrected());
                } else {
                    nativeOps.execReduceFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length);
                }
            }
        } else // Half
        {
            if (op.y() != null) {
                Pointer y = AtomicAllocator.getInstance().getPointer(op.y(), context);
                Pointer yShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
                nativeOps.execReduce3Half(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) y, (IntPointer) yShapeInfo, (ShortPointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length);
            } else {
                if (op instanceof Variance) {
                    nativeOps.execSummaryStatsHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length, ((Variance) op).isBiasCorrected());
                } else {
                    nativeOps.execReduceHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length);
                }
            }
        }
    }
    AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
    profilingHookOut(op, st);
    return context;
}
Also used : CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Variance(org.nd4j.linalg.api.ops.impl.accum.Variance) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) BaseDataBuffer(org.nd4j.linalg.api.buffer.BaseDataBuffer)

Example 39 with ND4JIllegalStateException

use of org.nd4j.linalg.exception.ND4JIllegalStateException in project nd4j by deeplearning4j.

the class CudaExecutioner method executeGraph.

@Override
public Map<String, INDArray> executeGraph(long id, Map<String, INDArray> map) {
    this.commit();
    val ptrBuffers = new PointerPointer(map.size() * 2);
    val ptrShapes = new PointerPointer(map.size() * 2);
    val ptrIndices = new IntPointer(map.size());
    int cnt = 0;
    val keySet = new ArrayList<String>(map.keySet());
    for (val key : keySet) {
        val array = map.get(key);
        ptrBuffers.put(cnt, AtomicAllocator.getInstance().getHostPointer(array));
        ptrShapes.put(cnt, AtomicAllocator.getInstance().getHostPointer(array.shapeInfoDataBuffer()));
        ptrIndices.put(cnt, cnt);
        cnt++;
    }
    val newMap = new LinkedHashMap<String, INDArray>();
    if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
        val result = (Nd4jCuda.FloatVariablesSet) nativeOps.executeStoredGraphFloat(null, id, ptrBuffers, ptrShapes, ptrIndices, map.size());
        val status = OpStatus.byNumber(result.status());
        if (status != OpStatus.ND4J_STATUS_OK)
            throw new ND4JIllegalStateException("Op execution failed: " + status);
        for (int e = 0; e < result.size(); e++) {
            val var = result.at(e);
            val nodeId = var.id();
            val index = var.index();
            val shapeInfo = var.getNDArray().shapeInfo();
            val buffer = var.getNDArray().buffer();
            val rank = shapeInfo.get(0);
            val jshape = new int[rank * 2 + 4];
            for (int i = 0; i < jshape.length; i++) {
                jshape[i] = shapeInfo.get(i);
            }
            val shapeOf = Shape.shapeOf(jshape);
            val stridesOf = Shape.stridesOf(jshape);
            val order = Shape.order(jshape);
            val array = Nd4j.create(shapeOf, stridesOf, 0, order);
            Pointer.memcpy(AtomicAllocator.getInstance().getHostPointer(array), buffer, ArrayUtil.prod(shapeOf) * Nd4j.sizeOfDataType());
            AtomicAllocator.getInstance().getAllocationPoint(array).tickHostWrite();
            newMap.put(keySet.get(nodeId), array);
        }
        nativeOps.deleteVariablesSetFloat(result);
    } else if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
        val result = (Nd4jCuda.DoubleVariablesSet) nativeOps.executeStoredGraphDouble(null, id, ptrBuffers, ptrShapes, ptrIndices, map.size());
        val status = OpStatus.byNumber(result.status());
        if (status != OpStatus.ND4J_STATUS_OK)
            throw new ND4JIllegalStateException("Op execution failed: " + status);
        for (int e = 0; e < result.size(); e++) {
            val var = result.at(e);
            val nodeId = var.id();
            val index = var.index();
            val shapeInfo = var.getNDArray().shapeInfo();
            val buffer = var.getNDArray().buffer();
            val rank = shapeInfo.get(0);
            val jshape = new int[rank * 2 + 4];
            for (int i = 0; i < jshape.length; i++) {
                jshape[i] = shapeInfo.get(i);
            }
            val shapeOf = Shape.shapeOf(jshape);
            val stridesOf = Shape.stridesOf(jshape);
            val order = Shape.order(jshape);
            val array = Nd4j.create(shapeOf, stridesOf, 0, order);
            Pointer.memcpy(AtomicAllocator.getInstance().getHostPointer(array), buffer, ArrayUtil.prod(shapeOf) * Nd4j.sizeOfDataType());
            AtomicAllocator.getInstance().getAllocationPoint(array).tickHostWrite();
            newMap.put(keySet.get(nodeId), array);
        }
        nativeOps.deleteVariablesSetDouble(result);
    } else if (Nd4j.dataType() == DataBuffer.Type.HALF) {
        val result = (Nd4jCuda.DoubleVariablesSet) nativeOps.executeStoredGraphHalf(null, id, ptrBuffers, ptrShapes, ptrIndices, map.size());
        val status = OpStatus.byNumber(result.status());
        if (status != OpStatus.ND4J_STATUS_OK)
            throw new ND4JIllegalStateException("Op execution failed: " + status);
        for (int e = 0; e < result.size(); e++) {
            val var = result.at(e);
            val nodeId = var.id();
            val index = var.index();
            val shapeInfo = var.getNDArray().shapeInfo();
            val buffer = var.getNDArray().buffer();
            val rank = shapeInfo.get(0);
            val jshape = new int[rank * 2 + 4];
            for (int i = 0; i < jshape.length; i++) {
                jshape[i] = shapeInfo.get(i);
            }
            val shapeOf = Shape.shapeOf(jshape);
            val stridesOf = Shape.stridesOf(jshape);
            val order = Shape.order(jshape);
            val array = Nd4j.create(shapeOf, stridesOf, 0, order);
            Pointer.memcpy(AtomicAllocator.getInstance().getHostPointer(array), buffer, ArrayUtil.prod(shapeOf) * Nd4j.sizeOfDataType());
            AtomicAllocator.getInstance().getAllocationPoint(array).tickHostWrite();
            newMap.put(keySet.get(nodeId), array);
        }
        nativeOps.deleteVariablesSetHalf(result);
    }
    return newMap;
}
Also used : lombok.val(lombok.val) Nd4jCuda(org.nd4j.nativeblas.Nd4jCuda) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint)

Example 40 with ND4JIllegalStateException

use of org.nd4j.linalg.exception.ND4JIllegalStateException in project nd4j by deeplearning4j.

the class CudaExecutioner method naiveExec.

/**
 * @param op
 * @param dimension
 * @return
 */
protected INDArray naiveExec(Accumulation op, int... dimension) {
    long st = profilingHookIn(op);
    INDArray ret = op.z();
    validateDataType(Nd4j.dataType(), op);
    for (int i = 0; i < dimension.length; i++) if (dimension[i] >= op.x().rank() && dimension[i] != Integer.MAX_VALUE)
        throw new ND4JIllegalStateException("Op target dimension " + Arrays.toString(dimension) + " contains element that higher then rank of op.X: [" + op.x().rank() + "]");
    CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(op.z(), op.x(), op.y());
    if (CudaEnvironment.getInstance().getConfiguration().isDebug())
        lastOp.set(op.opName());
    Pointer hostYShapeInfo = op.y() == null ? null : AddressRetriever.retrieveHostPointer(op.y().shapeInfoDataBuffer());
    Pointer hostZShapeInfo = op.z() == null ? null : AddressRetriever.retrieveHostPointer(op.z().shapeInfoDataBuffer());
    Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(op.x(), dimension);
    /*
        if (op.opNum() == 3) {
            log.info("Max shape: {}", Arrays.toString(op.x().shapeInfoDataBuffer().asInt()));
            log.info("Max TAD: {}", Arrays.toString(tadBuffers.getFirst().asInt()));
            context.syncOldStream();
        }
*/
    Pointer hostTadShapeInfo = AddressRetriever.retrieveHostPointer(tadBuffers.getFirst());
    Pointer devTadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context);
    DataBuffer offsets = tadBuffers.getSecond();
    Pointer devTadOffsets = offsets == null ? null : AtomicAllocator.getInstance().getPointer(offsets, context);
    Pointer x = AtomicAllocator.getInstance().getPointer(op.x(), context);
    Pointer xShapeInfo = AtomicAllocator.getInstance().getPointer(op.x().shapeInfoDataBuffer(), context);
    if (extraz.get() == null)
        extraz.set(new PointerPointer(32));
    PointerPointer xShapeInfoHostPointer = extraz.get().put(AddressRetriever.retrieveHostPointer(op.x().shapeInfoDataBuffer()), context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer(), context.getBufferAllocation(), context.getBufferReduction(), context.getBufferScalar(), context.getBufferSpecial(), hostYShapeInfo, hostZShapeInfo, hostTadShapeInfo, devTadShapeInfo, devTadOffsets);
    Pointer yDevTadOffsets = null;
    Pointer yDevTadShapeInfo = null;
    if (op.y() != null) {
        if ((dimension.length == 1 && dimension[0] == Integer.MAX_VALUE) || op.x().tensorAlongDimension(0, dimension).lengthLong() != op.y().lengthLong()) {
            if (!op.isComplexAccumulation() && op.x().lengthLong() != op.y().lengthLong())
                throw new ND4JIllegalStateException("Op.X [" + op.x().lengthLong() + "] and Op.Y [" + op.y().lengthLong() + "] lengths should match");
            Pair<DataBuffer, DataBuffer> yTadBuffers = tadManager.getTADOnlyShapeInfo(op.y(), dimension);
            yDevTadShapeInfo = AtomicAllocator.getInstance().getPointer(yTadBuffers.getFirst(), context);
            DataBuffer yOffsets = yTadBuffers.getSecond();
            yDevTadOffsets = yOffsets == null ? null : AtomicAllocator.getInstance().getPointer(yOffsets, context);
            xShapeInfoHostPointer.put(12, yDevTadShapeInfo);
            xShapeInfoHostPointer.put(13, yDevTadOffsets);
        } else {
            // TAD vs full array code branch
            val fakeOffsets = Nd4j.getConstantHandler().getConstantBuffer(new int[] { 0, 0 });
            yDevTadOffsets = fakeOffsets == null ? null : AtomicAllocator.getInstance().getPointer(fakeOffsets, context);
            yDevTadShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
            xShapeInfoHostPointer.put(12, AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context));
            xShapeInfoHostPointer.put(13, null);
        }
    }
    Pointer extraArgs = op.extraArgs() != null ? AtomicAllocator.getInstance().getPointer(op.extraArgsDataBuff(), context) : null;
    // Pointer extraArgs = op.extraArgs() != null ? AtomicAllocator.getInstance().getPointer(op.extraArgsDataBuff(), context) : 0;
    // Pointer dimensionPointer = AtomicAllocator.getInstance().getPointer(Nd4j.createBuffer(dimension), context);
    Pointer dimensionPointer = AtomicAllocator.getInstance().getPointer(AtomicAllocator.getInstance().getConstantBuffer(dimension), // AtomicAllocator.getInstance().getPointer(Nd4j.createBuffer(dimension), context);
    context);
    if (op.x().data().dataType() == DataBuffer.Type.DOUBLE) {
        if (op instanceof Variance) {
            if (ret.isScalar()) {
                double res = nativeOps.execSummaryStatsScalarDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, ((Variance) op).isBiasCorrected());
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
                ret.assign(res);
                op.setFinalResult(res);
            } else {
                nativeOps.execSummaryStatsDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, ((Variance) op).isBiasCorrected());
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        } else if (op.y() != null) {
            if (op.isComplexAccumulation()) {
                val dT = new LongPointerWrapper(devTadOffsets);
                val yT = new LongPointerWrapper(yDevTadOffsets);
                nativeOps.execReduce3AllDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (DoublePointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) devTadShapeInfo, dT, (IntPointer) yDevTadShapeInfo, yT);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            } else if (ret.isScalar()) {
                double res = nativeOps.execReduce3ScalarDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context));
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
                ret.assign(res);
                op.setFinalResult(res);
            } else {
                nativeOps.execReduce3Double(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (DoublePointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        } else {
            if (ret.isScalar()) {
                double res = nativeOps.execReduceScalarDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
                ret.assign(res);
                op.setFinalResult(res);
            } else {
                nativeOps.execReduceDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        }
    } else if (op.x().data().dataType() == DataBuffer.Type.FLOAT) {
        if (op instanceof Variance) {
            if (ret.isScalar()) {
                float res = nativeOps.execSummaryStatsScalarFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, ((Variance) op).isBiasCorrected());
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
                ret.assign(res);
                op.setFinalResult(res);
            } else {
                nativeOps.execSummaryStatsFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, ((Variance) op).isBiasCorrected());
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        } else if (op.y() != null) {
            if (op.isComplexAccumulation()) {
                nativeOps.execReduce3AllFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (FloatPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) devTadShapeInfo, new LongPointerWrapper(devTadOffsets), (IntPointer) yDevTadShapeInfo, new LongPointerWrapper(yDevTadOffsets));
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            } else if (ret.isScalar()) {
                float res = nativeOps.execReduce3ScalarFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context));
                ret.assign(res);
                op.setFinalResult(res);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            } else {
                nativeOps.execReduce3Float(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (FloatPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        } else {
            if (ret.isScalar()) {
                float res = nativeOps.execReduceScalarFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
                ret.assign(res);
                op.setFinalResult(res);
            } else {
                nativeOps.execReduceFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        }
    } else {
        if (op instanceof Variance) {
            if (ret.isScalar()) {
                float res = nativeOps.execSummaryStatsScalarHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, ((Variance) op).isBiasCorrected());
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
                ret.assign(res);
                op.setFinalResult(res);
            } else {
                nativeOps.execSummaryStatsHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, ((Variance) op).isBiasCorrected());
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        } else if (op.y() != null) {
            if (op.isComplexAccumulation()) {
                nativeOps.execReduce3AllHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (ShortPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) devTadShapeInfo, new LongPointerWrapper(devTadOffsets), (IntPointer) yDevTadShapeInfo, new LongPointerWrapper(yDevTadOffsets));
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            } else if (ret.isScalar()) {
                float res = nativeOps.execReduce3ScalarHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context));
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
                ret.assign(res);
                op.setFinalResult(res);
            } else {
                nativeOps.execReduce3Half(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (ShortPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        } else {
            if (ret.isScalar()) {
                float res = nativeOps.execReduceScalarHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
                ret.assign(res);
                op.setFinalResult(res);
            } else {
                nativeOps.execReduceHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        }
    }
    profilingHookOut(op, st);
    return op.z();
}
Also used : lombok.val(lombok.val) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Variance(org.nd4j.linalg.api.ops.impl.accum.Variance) INDArray(org.nd4j.linalg.api.ndarray.INDArray) LongPointerWrapper(org.nd4j.nativeblas.LongPointerWrapper) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) BaseDataBuffer(org.nd4j.linalg.api.buffer.BaseDataBuffer)

Aggregations

ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)116 lombok.val (lombok.val)26 INDArray (org.nd4j.linalg.api.ndarray.INDArray)23 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)21 AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)19 DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)17 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)15 PagedPointer (org.nd4j.linalg.api.memory.pointers.PagedPointer)12 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)8 BaseDataBuffer (org.nd4j.linalg.api.buffer.BaseDataBuffer)7 IComplexNDArray (org.nd4j.linalg.api.complex.IComplexNDArray)6 Pointer (org.bytedeco.javacpp.Pointer)5 ArrayList (java.util.ArrayList)4 DifferentialFunction (org.nd4j.autodiff.functions.DifferentialFunction)4 Aeron (io.aeron.Aeron)3 FragmentAssembler (io.aeron.FragmentAssembler)3 MediaDriver (io.aeron.driver.MediaDriver)3 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)3 Slf4j (lombok.extern.slf4j.Slf4j)3 CloseHelper (org.agrona.CloseHelper)3