Search in sources :

Example 36 with DataBuffer

use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.

the class CudaExecutioner method invoke.

protected CudaContext invoke(Accumulation op, int[] dimension) {
    long st = profilingHookIn(op);
    checkForCompression(op);
    validateDataType(Nd4j.dataType(), op);
    if (extraz.get() == null)
        extraz.set(new PointerPointer(32));
    // dimension is ALWAYS null here.
    if (dimension == null)
        dimension = new int[] { Integer.MAX_VALUE };
    Arrays.sort(dimension);
    for (int i = 0; i < dimension.length; i++) if (dimension[i] >= op.x().rank() && dimension[i] != Integer.MAX_VALUE)
        throw new ND4JIllegalStateException("Op target dimension " + Arrays.toString(dimension) + " contains element that higher then rank of op.X: [" + op.x().rank() + "]");
    CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(op.z(), op.x(), op.y());
    if (CudaEnvironment.getInstance().getConfiguration().isDebug())
        lastOp.set(op.opName());
    Pointer hostYShapeInfo = op.y() == null ? null : AddressRetriever.retrieveHostPointer(op.y().shapeInfoDataBuffer());
    Pointer hostZShapeInfo = op.z() == null ? null : AddressRetriever.retrieveHostPointer(op.z().shapeInfoDataBuffer());
    Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(op.x(), dimension);
    Pointer hostTadShapeInfo = AddressRetriever.retrieveHostPointer(tadBuffers.getFirst());
    Pointer devTadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context);
    DataBuffer offsets = tadBuffers.getSecond();
    Pointer devTadOffsets = offsets == null ? null : AtomicAllocator.getInstance().getPointer(offsets, context);
    PointerPointer xShapeInfoHostPointer = extraz.get().put(AddressRetriever.retrieveHostPointer(op.x().shapeInfoDataBuffer()), context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer(), context.getBufferAllocation(), context.getBufferReduction(), context.getBufferScalar(), context.getBufferSpecial(), hostYShapeInfo, hostZShapeInfo, hostTadShapeInfo, devTadShapeInfo, devTadOffsets);
    if (op.y() != null) {
        Pair<DataBuffer, DataBuffer> yTadBuffers = tadManager.getTADOnlyShapeInfo(op.y(), dimension);
        Pointer yDevTadShapeInfo = AtomicAllocator.getInstance().getPointer(yTadBuffers.getFirst(), context);
        DataBuffer yOffsets = yTadBuffers.getSecond();
        Pointer yDevTadOffsets = yOffsets == null ? null : AtomicAllocator.getInstance().getPointer(yOffsets, context);
        xShapeInfoHostPointer.put(12, yDevTadShapeInfo);
        xShapeInfoHostPointer.put(13, yDevTadOffsets);
    }
    Pointer x = AtomicAllocator.getInstance().getPointer(op.x(), context);
    Pointer xShapeInfo = AtomicAllocator.getInstance().getPointer(op.x().shapeInfoDataBuffer(), context);
    Pointer extraArgs = op.extraArgs() != null ? AtomicAllocator.getInstance().getPointer(op.extraArgsDataBuff(), context) : null;
    int[] retShape = Shape.wholeArrayDimension(dimension) ? new int[] { 1, 1 } : ArrayUtil.removeIndex(op.x().shape(), dimension);
    // ensure vector is proper shape
    if (retShape.length == 1) {
        if (dimension[0] == 0)
            retShape = new int[] { 1, retShape[0] };
        else
            retShape = new int[] { retShape[0], 1 };
    } else if (retShape.length == 0) {
        retShape = new int[] { 1, 1 };
    }
    if (op.x().isVector() && op.x().length() == ArrayUtil.prod(retShape))
        return null;
    INDArray ret = null;
    if (0.0 + Math.abs(op.zeroDouble()) <= Nd4j.EPS_THRESHOLD) {
        ret = Nd4j.zeros(retShape);
    } else {
        if (op.x().data().dataType() == DataBuffer.Type.DOUBLE)
            ret = Nd4j.valueArrayOf(retShape, op.zeroDouble());
        else if (op.x().data().dataType() == DataBuffer.Type.FLOAT)
            ret = Nd4j.valueArrayOf(retShape, op.zeroFloat());
        else if (op.x().data().dataType() == DataBuffer.Type.HALF)
            ret = Nd4j.valueArrayOf(retShape, op.zeroHalf());
    }
    op.setZ(ret);
    if (op.z().isScalar()) {
        if (op.x().data().dataType() == DataBuffer.Type.DOUBLE) {
            if (op instanceof Variance) {
                double result = nativeOps.execSummaryStatsScalarDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, ((Variance) op).isBiasCorrected());
                op.setFinalResult(result);
            } else if (op.y() != null) {
                Pointer y = AtomicAllocator.getInstance().getPointer(op.y(), context);
                Pointer yShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
                double result = nativeOps.execReduce3ScalarDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) y, (IntPointer) yShapeInfo);
                op.setFinalResult(result);
            } else {
                double result = nativeOps.execReduceScalarDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs);
                op.setFinalResult(result);
            }
        } else if (op.x().data().dataType() == DataBuffer.Type.FLOAT) {
            if (op instanceof Variance) {
                float result = nativeOps.execSummaryStatsScalarFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, ((Variance) op).isBiasCorrected());
                op.setFinalResult(result);
            } else if (op.y() != null) {
                Pointer y = AtomicAllocator.getInstance().getPointer(op.y(), context);
                Pointer yShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
                float result = nativeOps.execReduce3ScalarFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) y, (IntPointer) yShapeInfo);
                op.setFinalResult(result);
            } else {
                float result = nativeOps.execReduceScalarFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs);
                op.setFinalResult(result);
            }
        } else {
            if (op instanceof Variance) {
                float result = nativeOps.execSummaryStatsScalarHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, ((Variance) op).isBiasCorrected());
                op.setFinalResult(result);
            } else if (op.y() != null) {
                Pointer y = AtomicAllocator.getInstance().getPointer(op.y(), context);
                Pointer yShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
                float result = nativeOps.execReduce3ScalarHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) y, (IntPointer) yShapeInfo);
                op.setFinalResult(result);
            } else {
                float result = nativeOps.execReduceScalarHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs);
                op.setFinalResult(result);
            }
        }
    } else {
        Pointer result = AtomicAllocator.getInstance().getPointer(op.z(), context);
        Pointer resultShapeInfo = AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context);
        Pointer dimensionPointer = AtomicAllocator.getInstance().getPointer(AtomicAllocator.getInstance().getConstantBuffer(dimension), // AtomicAllocator.getInstance().getPointer(Nd4j.createBuffer(dimension), context);
        context);
        if (op.x().data().dataType() == DataBuffer.Type.DOUBLE) {
            if (op.y() != null) {
                Pointer y = AtomicAllocator.getInstance().getPointer(op.y(), context);
                Pointer yShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
                nativeOps.execReduce3Double(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) y, (IntPointer) yShapeInfo, (DoublePointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length);
            } else {
                if (op instanceof Variance) {
                    nativeOps.execSummaryStatsDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length, ((Variance) op).isBiasCorrected());
                } else {
                    nativeOps.execReduceDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length);
                }
            }
        } else // float
        if (op.x().data().dataType() == DataBuffer.Type.FLOAT) {
            if (op.y() != null) {
                Pointer y = AtomicAllocator.getInstance().getPointer(op.y(), context);
                Pointer yShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
                nativeOps.execReduce3Float(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) y, (IntPointer) yShapeInfo, (FloatPointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length);
            } else {
                if (op instanceof Variance) {
                    nativeOps.execSummaryStatsFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length, ((Variance) op).isBiasCorrected());
                } else {
                    nativeOps.execReduceFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length);
                }
            }
        } else // Half
        {
            if (op.y() != null) {
                Pointer y = AtomicAllocator.getInstance().getPointer(op.y(), context);
                Pointer yShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
                nativeOps.execReduce3Half(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) y, (IntPointer) yShapeInfo, (ShortPointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length);
            } else {
                if (op instanceof Variance) {
                    nativeOps.execSummaryStatsHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length, ((Variance) op).isBiasCorrected());
                } else {
                    nativeOps.execReduceHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) result, (IntPointer) resultShapeInfo, (IntPointer) dimensionPointer, dimension.length);
                }
            }
        }
    }
    AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
    profilingHookOut(op, st);
    return context;
}
Also used : CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Variance(org.nd4j.linalg.api.ops.impl.accum.Variance) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) BaseDataBuffer(org.nd4j.linalg.api.buffer.BaseDataBuffer)

Example 37 with DataBuffer

use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.

the class CudaExecutioner method naiveExec.

/**
 * @param op
 * @param dimension
 * @return
 */
protected INDArray naiveExec(Accumulation op, int... dimension) {
    long st = profilingHookIn(op);
    INDArray ret = op.z();
    validateDataType(Nd4j.dataType(), op);
    for (int i = 0; i < dimension.length; i++) if (dimension[i] >= op.x().rank() && dimension[i] != Integer.MAX_VALUE)
        throw new ND4JIllegalStateException("Op target dimension " + Arrays.toString(dimension) + " contains element that higher then rank of op.X: [" + op.x().rank() + "]");
    CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(op.z(), op.x(), op.y());
    if (CudaEnvironment.getInstance().getConfiguration().isDebug())
        lastOp.set(op.opName());
    Pointer hostYShapeInfo = op.y() == null ? null : AddressRetriever.retrieveHostPointer(op.y().shapeInfoDataBuffer());
    Pointer hostZShapeInfo = op.z() == null ? null : AddressRetriever.retrieveHostPointer(op.z().shapeInfoDataBuffer());
    Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(op.x(), dimension);
    /*
        if (op.opNum() == 3) {
            log.info("Max shape: {}", Arrays.toString(op.x().shapeInfoDataBuffer().asInt()));
            log.info("Max TAD: {}", Arrays.toString(tadBuffers.getFirst().asInt()));
            context.syncOldStream();
        }
*/
    Pointer hostTadShapeInfo = AddressRetriever.retrieveHostPointer(tadBuffers.getFirst());
    Pointer devTadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context);
    DataBuffer offsets = tadBuffers.getSecond();
    Pointer devTadOffsets = offsets == null ? null : AtomicAllocator.getInstance().getPointer(offsets, context);
    Pointer x = AtomicAllocator.getInstance().getPointer(op.x(), context);
    Pointer xShapeInfo = AtomicAllocator.getInstance().getPointer(op.x().shapeInfoDataBuffer(), context);
    if (extraz.get() == null)
        extraz.set(new PointerPointer(32));
    PointerPointer xShapeInfoHostPointer = extraz.get().put(AddressRetriever.retrieveHostPointer(op.x().shapeInfoDataBuffer()), context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer(), context.getBufferAllocation(), context.getBufferReduction(), context.getBufferScalar(), context.getBufferSpecial(), hostYShapeInfo, hostZShapeInfo, hostTadShapeInfo, devTadShapeInfo, devTadOffsets);
    Pointer yDevTadOffsets = null;
    Pointer yDevTadShapeInfo = null;
    if (op.y() != null) {
        if ((dimension.length == 1 && dimension[0] == Integer.MAX_VALUE) || op.x().tensorAlongDimension(0, dimension).lengthLong() != op.y().lengthLong()) {
            if (!op.isComplexAccumulation() && op.x().lengthLong() != op.y().lengthLong())
                throw new ND4JIllegalStateException("Op.X [" + op.x().lengthLong() + "] and Op.Y [" + op.y().lengthLong() + "] lengths should match");
            Pair<DataBuffer, DataBuffer> yTadBuffers = tadManager.getTADOnlyShapeInfo(op.y(), dimension);
            yDevTadShapeInfo = AtomicAllocator.getInstance().getPointer(yTadBuffers.getFirst(), context);
            DataBuffer yOffsets = yTadBuffers.getSecond();
            yDevTadOffsets = yOffsets == null ? null : AtomicAllocator.getInstance().getPointer(yOffsets, context);
            xShapeInfoHostPointer.put(12, yDevTadShapeInfo);
            xShapeInfoHostPointer.put(13, yDevTadOffsets);
        } else {
            // TAD vs full array code branch
            val fakeOffsets = Nd4j.getConstantHandler().getConstantBuffer(new int[] { 0, 0 });
            yDevTadOffsets = fakeOffsets == null ? null : AtomicAllocator.getInstance().getPointer(fakeOffsets, context);
            yDevTadShapeInfo = AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context);
            xShapeInfoHostPointer.put(12, AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context));
            xShapeInfoHostPointer.put(13, null);
        }
    }
    Pointer extraArgs = op.extraArgs() != null ? AtomicAllocator.getInstance().getPointer(op.extraArgsDataBuff(), context) : null;
    // Pointer extraArgs = op.extraArgs() != null ? AtomicAllocator.getInstance().getPointer(op.extraArgsDataBuff(), context) : 0;
    // Pointer dimensionPointer = AtomicAllocator.getInstance().getPointer(Nd4j.createBuffer(dimension), context);
    Pointer dimensionPointer = AtomicAllocator.getInstance().getPointer(AtomicAllocator.getInstance().getConstantBuffer(dimension), // AtomicAllocator.getInstance().getPointer(Nd4j.createBuffer(dimension), context);
    context);
    if (op.x().data().dataType() == DataBuffer.Type.DOUBLE) {
        if (op instanceof Variance) {
            if (ret.isScalar()) {
                double res = nativeOps.execSummaryStatsScalarDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, ((Variance) op).isBiasCorrected());
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
                ret.assign(res);
                op.setFinalResult(res);
            } else {
                nativeOps.execSummaryStatsDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, ((Variance) op).isBiasCorrected());
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        } else if (op.y() != null) {
            if (op.isComplexAccumulation()) {
                val dT = new LongPointerWrapper(devTadOffsets);
                val yT = new LongPointerWrapper(yDevTadOffsets);
                nativeOps.execReduce3AllDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (DoublePointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) devTadShapeInfo, dT, (IntPointer) yDevTadShapeInfo, yT);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            } else if (ret.isScalar()) {
                double res = nativeOps.execReduce3ScalarDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context));
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
                ret.assign(res);
                op.setFinalResult(res);
            } else {
                nativeOps.execReduce3Double(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (DoublePointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        } else {
            if (ret.isScalar()) {
                double res = nativeOps.execReduceScalarDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
                ret.assign(res);
                op.setFinalResult(res);
            } else {
                nativeOps.execReduceDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        }
    } else if (op.x().data().dataType() == DataBuffer.Type.FLOAT) {
        if (op instanceof Variance) {
            if (ret.isScalar()) {
                float res = nativeOps.execSummaryStatsScalarFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, ((Variance) op).isBiasCorrected());
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
                ret.assign(res);
                op.setFinalResult(res);
            } else {
                nativeOps.execSummaryStatsFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, ((Variance) op).isBiasCorrected());
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        } else if (op.y() != null) {
            if (op.isComplexAccumulation()) {
                nativeOps.execReduce3AllFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (FloatPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) devTadShapeInfo, new LongPointerWrapper(devTadOffsets), (IntPointer) yDevTadShapeInfo, new LongPointerWrapper(yDevTadOffsets));
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            } else if (ret.isScalar()) {
                float res = nativeOps.execReduce3ScalarFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context));
                ret.assign(res);
                op.setFinalResult(res);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            } else {
                nativeOps.execReduce3Float(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (FloatPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        } else {
            if (ret.isScalar()) {
                float res = nativeOps.execReduceScalarFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
                ret.assign(res);
                op.setFinalResult(res);
            } else {
                nativeOps.execReduceFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        }
    } else {
        if (op instanceof Variance) {
            if (ret.isScalar()) {
                float res = nativeOps.execSummaryStatsScalarHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, ((Variance) op).isBiasCorrected());
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
                ret.assign(res);
                op.setFinalResult(res);
            } else {
                nativeOps.execSummaryStatsHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, ((Variance) op).isBiasCorrected());
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        } else if (op.y() != null) {
            if (op.isComplexAccumulation()) {
                nativeOps.execReduce3AllHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (ShortPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length, (IntPointer) devTadShapeInfo, new LongPointerWrapper(devTadOffsets), (IntPointer) yDevTadShapeInfo, new LongPointerWrapper(yDevTadOffsets));
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            } else if (ret.isScalar()) {
                float res = nativeOps.execReduce3ScalarHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context));
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
                ret.assign(res);
                op.setFinalResult(res);
            } else {
                nativeOps.execReduce3Half(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) AtomicAllocator.getInstance().getPointer(op.y(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.y().shapeInfoDataBuffer(), context), (ShortPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        } else {
            if (ret.isScalar()) {
                float res = nativeOps.execReduceScalarHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
                ret.assign(res);
                op.setFinalResult(res);
            } else {
                nativeOps.execReduceHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) AtomicAllocator.getInstance().getPointer(op.z(), context), (IntPointer) AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context), (IntPointer) dimensionPointer, dimension.length);
                AtomicAllocator.getInstance().registerAction(context, op.z(), op.x(), op.y());
            }
        }
    }
    profilingHookOut(op, st);
    return op.z();
}
Also used : lombok.val(lombok.val) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) Variance(org.nd4j.linalg.api.ops.impl.accum.Variance) INDArray(org.nd4j.linalg.api.ndarray.INDArray) LongPointerWrapper(org.nd4j.nativeblas.LongPointerWrapper) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) BaseDataBuffer(org.nd4j.linalg.api.buffer.BaseDataBuffer)

Example 38 with DataBuffer

use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.

the class CudaExecutioner method invoke.

protected CudaContext invoke(IndexAccumulation op, int[] dimension) {
    long st = profilingHookIn(op);
    if (dimension == null || (dimension.length == 1 && dimension[0] == Integer.MAX_VALUE)) {
        if (op.z() == op.x() || op.z() == null) {
            op.setZ(Nd4j.scalar(0.0));
        }
    }
    checkForCompression(op);
    validateDataType(Nd4j.dataType(), op);
    if (extraz.get() == null)
        extraz.set(new PointerPointer(32));
    if (CudaEnvironment.getInstance().getConfiguration().isDebug())
        lastOp.set(op.opName());
    CudaEnvironment.getInstance().getConfiguration().enableDebug(true);
    for (int i = 0; i < dimension.length; i++) if (dimension[i] >= op.x().rank() && dimension[i] != Integer.MAX_VALUE)
        throw new ND4JIllegalStateException("Op target dimension " + Arrays.toString(dimension) + " contains element that higher then rank of op.X: [" + op.x().rank() + "]");
    CudaContext context = AtomicAllocator.getInstance().getFlowController().prepareAction(op.z().isScalar() ? null : op.z(), op.x(), op.y());
    Pointer x = AtomicAllocator.getInstance().getPointer(op.x(), context);
    Pointer xShapeInfo = AtomicAllocator.getInstance().getPointer(op.x().shapeInfoDataBuffer(), context);
    Pointer extraArgs = op.extraArgs() != null ? AtomicAllocator.getInstance().getPointer(op.extraArgsDataBuff(), context) : null;
    Pointer hostYShapeInfo = op.y() == null ? null : AddressRetriever.retrieveHostPointer(op.y().shapeInfoDataBuffer());
    Pointer hostZShapeInfo = op.z() == null ? null : AddressRetriever.retrieveHostPointer(op.z().shapeInfoDataBuffer());
    int[] fdimension = dimension;
    if (fdimension == null)
        fdimension = new int[] { 0 };
    Pair<DataBuffer, DataBuffer> tadBuffers = tadManager.getTADOnlyShapeInfo(op.x(), fdimension);
    Pointer hostTadShapeInfo = AddressRetriever.retrieveHostPointer(tadBuffers.getFirst());
    Pointer devTadShapeInfo = AtomicAllocator.getInstance().getPointer(tadBuffers.getFirst(), context);
    DataBuffer offsets = tadBuffers.getSecond();
    Pointer devTadOffsets = offsets == null ? null : AtomicAllocator.getInstance().getPointer(offsets, context);
    PointerPointer xShapeInfoHostPointer = extraz.get().put(AddressRetriever.retrieveHostPointer(op.x().shapeInfoDataBuffer()), context.getOldStream(), AtomicAllocator.getInstance().getDeviceIdPointer(), context.getBufferAllocation(), context.getBufferReduction(), context.getBufferScalar(), context.getBufferSpecial(), hostYShapeInfo, hostZShapeInfo, hostTadShapeInfo, devTadShapeInfo, devTadOffsets);
    if (op.z().isScalar() || dimension == null || dimension[0] == Integer.MAX_VALUE) {
        if (op.x().data().dataType() == DataBuffer.Type.DOUBLE) {
            double result = nativeOps.execIndexReduceScalarDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs);
            op.setFinalResult((int) result);
            op.z().assign(result);
        } else if (op.x().data().dataType() == DataBuffer.Type.FLOAT) {
            float result = nativeOps.execIndexReduceScalarFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs);
            op.setFinalResult((int) result);
            op.z().assign(result);
        } else {
            float result = nativeOps.execIndexReduceScalarHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs);
            op.setFinalResult((int) result);
            op.z().assign(result);
        }
    } else {
        Arrays.sort(dimension);
        Pointer z = AtomicAllocator.getInstance().getPointer(op.z(), context);
        Pointer zShapeInfo = AtomicAllocator.getInstance().getPointer(op.z().shapeInfoDataBuffer(), context);
        // long dimensionPointer = AtomicAllocator.getInstance().getPointer(Nd4j.createBuffer(dimension), context);
        Pointer dimensionPointer = AtomicAllocator.getInstance().getPointer(AtomicAllocator.getInstance().getConstantBuffer(dimension), context);
        if (op.x().data().dataType() == DataBuffer.Type.DOUBLE) {
            nativeOps.execIndexReduceDouble(xShapeInfoHostPointer, op.opNum(), (DoublePointer) x, (IntPointer) xShapeInfo, (DoublePointer) extraArgs, (DoublePointer) z, (IntPointer) zShapeInfo, (IntPointer) dimensionPointer, dimension.length);
        } else if (op.x().data().dataType() == DataBuffer.Type.FLOAT) {
            nativeOps.execIndexReduceFloat(xShapeInfoHostPointer, op.opNum(), (FloatPointer) x, (IntPointer) xShapeInfo, (FloatPointer) extraArgs, (FloatPointer) z, (IntPointer) zShapeInfo, (IntPointer) dimensionPointer, dimension.length);
        } else {
            nativeOps.execIndexReduceHalf(xShapeInfoHostPointer, op.opNum(), (ShortPointer) x, (IntPointer) xShapeInfo, (ShortPointer) extraArgs, (ShortPointer) z, (IntPointer) zShapeInfo, (IntPointer) dimensionPointer, dimension.length);
        }
    }
    AtomicAllocator.getInstance().registerAction(context, null, op.x(), op.y());
    profilingHookOut(op, st);
    return null;
}
Also used : CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) PagedPointer(org.nd4j.linalg.api.memory.pointers.PagedPointer) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) BaseDataBuffer(org.nd4j.linalg.api.buffer.BaseDataBuffer)

Example 39 with DataBuffer

use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.

the class CudaExecutioner method exec.

@Override
public void exec(Aggregate op) {
    int numArguments = op.getArguments().size();
    int numShapeArguments = op.getShapes().size();
    int numIndexArguments = op.getIndexingArguments().size();
    int numIntArrays = op.getIntArrayArguments().size();
    int numRealArguments = op.getRealArguments().size();
    CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
    PointerPointer extraArgs = new PointerPointer(32);
    extraArgs.put(0, null);
    extraArgs.put(1, context.getOldStream());
    extraArgs.put(2, new CudaPointer(1));
    extraArgs.put(3, new CudaPointer(op.getThreadsPerInstance()));
    extraArgs.put(4, new CudaPointer(op.getSharedMemorySize()));
    long[] arguments = new long[numArguments];
    for (int x = 0; x < numArguments; x++) {
        arguments[x] = op.getArguments().get(x) == null ? 0 : AtomicAllocator.getInstance().getPointer(op.getArguments().get(x), context).address();
        if (op.getArguments().get(x) != null)
            AtomicAllocator.getInstance().getAllocationPoint(op.getArguments().get(x)).tickDeviceWrite();
    }
    DataBuffer tempX = AllocationUtils.getPointersBuffer(arguments);
    PointerPointer xPtr = new PointerPointer(AtomicAllocator.getInstance().getPointer(tempX, context));
    long[] shapes = new long[numShapeArguments];
    for (int x = 0; x < numShapeArguments; x++) {
        shapes[x] = op.getShapes().get(x) == null ? 0 : AtomicAllocator.getInstance().getPointer(op.getShapes().get(x), context).address();
        if (op.getShapes().get(x) != null)
            AtomicAllocator.getInstance().getAllocationPoint(op.getShapes().get(x)).tickDeviceWrite();
    }
    DataBuffer tempS = AllocationUtils.getPointersBuffer(shapes);
    PointerPointer sPtr = new PointerPointer(AtomicAllocator.getInstance().getPointer(tempS, context));
    long[] ints = new long[numIntArrays];
    for (int x = 0; x < numIntArrays; x++) {
        if (op.getIntArrayArguments().get(x) != null) {
            DataBuffer intBuf = Nd4j.getDataBufferFactory().createInt(op.getIntArrayArguments().get(x));
            ints[x] = AtomicAllocator.getInstance().getPointer(intBuf, context).address();
        }
    }
    DataBuffer tempI = AllocationUtils.getPointersBuffer(ints);
    PointerPointer iPtr = new PointerPointer(AtomicAllocator.getInstance().getPointer(tempI, context));
    int[] indexes = new int[numIndexArguments];
    for (int x = 0; x < numIndexArguments; x++) {
        indexes[x] = op.getIndexingArguments().get(x);
    }
    DataBuffer intBuffer = Nd4j.getDataBufferFactory().createInt(indexes);
    double[] reals = new double[numRealArguments];
    for (int x = 0; x < numRealArguments; x++) {
        reals[x] = op.getRealArguments().get(x).doubleValue();
    }
    INDArray realsBuffer = Nd4j.create(reals);
    if (Nd4j.dataType() == DataBuffer.Type.FLOAT) {
        nativeOps.execAggregateFloat(extraArgs, op.opNum(), xPtr, numArguments, sPtr, numShapeArguments, (IntPointer) AtomicAllocator.getInstance().getPointer(intBuffer, context), numIndexArguments, iPtr, numIntArrays, (FloatPointer) AtomicAllocator.getInstance().getPointer(realsBuffer.data(), context), numRealArguments);
    } else if (Nd4j.dataType() == DataBuffer.Type.DOUBLE) {
        nativeOps.execAggregateDouble(extraArgs, op.opNum(), xPtr, numArguments, sPtr, numShapeArguments, (IntPointer) AtomicAllocator.getInstance().getPointer(intBuffer, context), numIndexArguments, iPtr, numIntArrays, (DoublePointer) AtomicAllocator.getInstance().getPointer(realsBuffer.data(), context), numRealArguments);
    } else if (Nd4j.dataType() == DataBuffer.Type.HALF) {
        nativeOps.execAggregateHalf(extraArgs, op.opNum(), xPtr, numArguments, sPtr, numShapeArguments, (IntPointer) AtomicAllocator.getInstance().getPointer(intBuffer, context), numIndexArguments, iPtr, numIntArrays, (ShortPointer) AtomicAllocator.getInstance().getPointer(realsBuffer.data(), context), numRealArguments);
    }
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) BaseDataBuffer(org.nd4j.linalg.api.buffer.BaseDataBuffer)

Example 40 with DataBuffer

use of org.nd4j.linalg.api.buffer.DataBuffer in project nd4j by deeplearning4j.

the class CompressionTests method testInt8Compression1.

@Test
public void testInt8Compression1() {
    DataBuffer buffer = Nd4j.createBuffer(new float[] { 1f, 2f, 3f, 4f, 1005f, -3.7f });
    BasicNDArrayCompressor.getInstance().setDefaultCompression("INT8");
    DataBuffer compr = BasicNDArrayCompressor.getInstance().compress(buffer);
    assertEquals(DataBuffer.Type.COMPRESSED, compr.dataType());
    DataBuffer decomp = BasicNDArrayCompressor.getInstance().decompress(compr);
    assertEquals(1.0f, decomp.getFloat(0), 0.01f);
    assertEquals(2.0f, decomp.getFloat(1), 0.01f);
    assertEquals(3.0f, decomp.getFloat(2), 0.01f);
    assertEquals(4.0f, decomp.getFloat(3), 0.01f);
    assertEquals(127.0f, decomp.getFloat(4), 0.01f);
    assertEquals(-3.0f, decomp.getFloat(5), 0.01f);
}
Also used : DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) Test(org.junit.Test) BaseNd4jTest(org.nd4j.linalg.BaseNd4jTest)

Aggregations

DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)186 INDArray (org.nd4j.linalg.api.ndarray.INDArray)79 Test (org.junit.Test)47 CompressedDataBuffer (org.nd4j.linalg.compression.CompressedDataBuffer)44 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)39 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)30 AllocationPoint (org.nd4j.jita.allocator.impl.AllocationPoint)25 ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)23 BaseDataBuffer (org.nd4j.linalg.api.buffer.BaseDataBuffer)19 Pointer (org.bytedeco.javacpp.Pointer)18 BaseNd4jTest (org.nd4j.linalg.BaseNd4jTest)16 CudaDoubleDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)16 IntPointer (org.bytedeco.javacpp.IntPointer)13 PagedPointer (org.nd4j.linalg.api.memory.pointers.PagedPointer)13 CudaIntDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer)13 DoublePointer (org.bytedeco.javacpp.DoublePointer)12 FloatPointer (org.bytedeco.javacpp.FloatPointer)12 GridExecutioner (org.nd4j.linalg.api.ops.executioner.GridExecutioner)12 LongPointerWrapper (org.nd4j.nativeblas.LongPointerWrapper)11 CUstream_st (org.bytedeco.javacpp.cuda.CUstream_st)10