Search in sources :

Example 1 with CudaLongDataBuffer

use of org.nd4j.linalg.jcublas.buffer.CudaLongDataBuffer in project nd4j by deeplearning4j.

the class BasicTADManager method getTADOnlyShapeInfo.

@Override
public Pair<DataBuffer, DataBuffer> getTADOnlyShapeInfo(INDArray array, int[] dimension) {
    if (dimension != null && dimension.length > 1)
        Arrays.sort(dimension);
    if (dimension == null)
        dimension = new int[] { Integer.MAX_VALUE };
    boolean isScalar = dimension == null || (dimension.length == 1 && dimension[0] == Integer.MAX_VALUE);
    // FIXME: this is fast triage, remove it later
    // dimensionLength <= 1 ? 2 : dimensionLength;
    int targetRank = isScalar ? 2 : array.rank();
    long offsetLength = 0;
    long tadLength = 1;
    if (!isScalar)
        for (int i = 0; i < dimension.length; i++) {
            tadLength *= array.shape()[dimension[i]];
        }
    if (!isScalar)
        offsetLength = array.lengthLong() / tadLength;
    else
        offsetLength = 1;
    // logger.info("Original shape info before TAD: {}", array.shapeInfoDataBuffer());
    // logger.info("dimension: {}, tadLength: {}, offsetLength for TAD: {}", Arrays.toString(dimension),tadLength, offsetLength);
    DataBuffer outputBuffer = new CudaIntDataBuffer(targetRank * 2 + 4);
    DataBuffer offsetsBuffer = new CudaLongDataBuffer(offsetLength);
    AtomicAllocator.getInstance().getAllocationPoint(outputBuffer).tickHostWrite();
    AtomicAllocator.getInstance().getAllocationPoint(offsetsBuffer).tickHostWrite();
    DataBuffer dimensionBuffer = AtomicAllocator.getInstance().getConstantBuffer(dimension);
    Pointer dimensionPointer = AtomicAllocator.getInstance().getHostPointer(dimensionBuffer);
    Pointer xShapeInfo = AddressRetriever.retrieveHostPointer(array.shapeInfoDataBuffer());
    Pointer targetPointer = AddressRetriever.retrieveHostPointer(outputBuffer);
    Pointer offsetsPointer = AddressRetriever.retrieveHostPointer(offsetsBuffer);
    if (!isScalar)
        nativeOps.tadOnlyShapeInfo((IntPointer) xShapeInfo, (IntPointer) dimensionPointer, dimension.length, (IntPointer) targetPointer, new LongPointerWrapper(offsetsPointer));
    else {
        outputBuffer.put(0, 2);
        outputBuffer.put(1, 1);
        outputBuffer.put(2, 1);
        outputBuffer.put(3, 1);
        outputBuffer.put(4, 1);
        outputBuffer.put(5, 0);
        outputBuffer.put(6, 0);
        outputBuffer.put(7, 99);
    }
    AtomicAllocator.getInstance().getAllocationPoint(outputBuffer).tickHostWrite();
    AtomicAllocator.getInstance().getAllocationPoint(offsetsBuffer).tickHostWrite();
    return new Pair<>(outputBuffer, offsetsBuffer);
}
Also used : CudaLongDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaLongDataBuffer) IntPointer(org.bytedeco.javacpp.IntPointer) LongPointerWrapper(org.nd4j.nativeblas.LongPointerWrapper) IntPointer(org.bytedeco.javacpp.IntPointer) Pointer(org.bytedeco.javacpp.Pointer) CudaIntDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CudaLongDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaLongDataBuffer) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer) CudaIntDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer) Pair(org.nd4j.linalg.primitives.Pair)

Aggregations

IntPointer (org.bytedeco.javacpp.IntPointer)1 Pointer (org.bytedeco.javacpp.Pointer)1 DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)1 CudaDoubleDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer)1 CudaIntDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer)1 CudaLongDataBuffer (org.nd4j.linalg.jcublas.buffer.CudaLongDataBuffer)1 Pair (org.nd4j.linalg.primitives.Pair)1 LongPointerWrapper (org.nd4j.nativeblas.LongPointerWrapper)1