Search in sources :

Example 16 with Pointer

use of org.bytedeco.javacpp.Pointer in project nd4j by deeplearning4j.

the class CudaZeroHandler method memcpySpecial.

/**
 * Special memcpy version, addressing shapeInfoDataBuffer copies
 *
 * PLEASE NOTE: Blocking H->H, Async H->D
 *
 * @param dstBuffer
 * @param srcPointer
 * @param length
 * @param dstOffset
 */
@Override
public void memcpySpecial(DataBuffer dstBuffer, Pointer srcPointer, long length, long dstOffset) {
    // log.info("Memcpy special: {} bytes ", length);
    CudaContext context = getCudaContext();
    AllocationPoint point = ((BaseCudaDataBuffer) dstBuffer).getAllocationPoint();
    // context.syncOldStream();
    Pointer dP = new CudaPointer((point.getPointers().getHostPointer().address()) + dstOffset);
    if (nativeOps.memcpyAsync(dP, srcPointer, length, CudaConstants.cudaMemcpyHostToHost, context.getOldStream()) == 0)
        throw new ND4JIllegalStateException("memcpyAsync failed");
    if (point.getAllocationStatus() == AllocationStatus.DEVICE) {
        Pointer rDP = new CudaPointer(point.getPointers().getDevicePointer().address() + dstOffset);
        if (nativeOps.memcpyAsync(rDP, dP, length, CudaConstants.cudaMemcpyHostToDevice, context.getOldStream()) == 0)
            throw new ND4JIllegalStateException("memcpyAsync failed");
        context.syncOldStream();
    }
    context.syncOldStream();
    point.tickDeviceWrite();
// point.tickHostRead();
}
Also used : CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) BaseCudaDataBuffer(org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) Pointer(org.bytedeco.javacpp.Pointer) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Example 17 with Pointer

use of org.bytedeco.javacpp.Pointer in project nd4j by deeplearning4j.

the class CudaZeroHandler method memcpy.

/**
 *  Synchronous version of memcpy.
 *
 * @param dstBuffer
 * @param srcBuffer
 */
@Override
public void memcpy(DataBuffer dstBuffer, DataBuffer srcBuffer) {
    // log.info("Buffer MemCpy called");
    // log.info("Memcpy buffer: {} bytes ", dstBuffer.length() * dstBuffer.getElementSize());
    CudaContext context = getCudaContext();
    AllocationPoint dstPoint = ((BaseCudaDataBuffer) dstBuffer).getAllocationPoint();
    AllocationPoint srcPoint = ((BaseCudaDataBuffer) srcBuffer).getAllocationPoint();
    Pointer dP = new CudaPointer(dstPoint.getPointers().getHostPointer().address());
    Pointer sP = null;
    if (srcPoint.getAllocationStatus() == AllocationStatus.DEVICE) {
        sP = new CudaPointer(srcPoint.getPointers().getDevicePointer().address());
        /*
            JCuda.cudaMemcpyAsync(
                    dP,
                    sP,
                    srcBuffer.length(),
                    cudaMemcpyKind.cudaMemcpyHostToDevice,
                    context.getOldStream()
            );*/
        if (nativeOps.memcpyAsync(dP, sP, srcBuffer.length() * srcBuffer.getElementSize(), CudaConstants.cudaMemcpyHostToDevice, context.getOldStream()) == 0) {
            throw new ND4JIllegalStateException("memcpyAsync failed");
        }
    } else {
        sP = new CudaPointer(srcPoint.getPointers().getHostPointer().address());
        /*
            JCuda.cudaMemcpyAsync(
                    dP,
                    sP,
                    srcBuffer.length(),
                    cudaMemcpyKind.cudaMemcpyHostToDevice,
                    context.getOldStream()
            );*/
        if (nativeOps.memcpyAsync(dP, sP, srcBuffer.length() * srcBuffer.getElementSize(), CudaConstants.cudaMemcpyHostToDevice, context.getOldStream()) == 0) {
            throw new ND4JIllegalStateException("memcpyAsync failed");
        }
    }
    if (dstPoint.getAllocationStatus() == AllocationStatus.DEVICE) {
        Pointer rDP = new CudaPointer(dstPoint.getPointers().getDevicePointer().address());
        /*
            JCuda.cudaMemcpyAsync(
                    rDP,
                    dP,
                    srcBuffer.length(),
                    cudaMemcpyKind.cudaMemcpyHostToDevice,
                    context.getOldStream()
            );*/
        if (nativeOps.memcpyAsync(rDP, dP, srcBuffer.length() * srcBuffer.getElementSize(), CudaConstants.cudaMemcpyHostToDevice, context.getOldStream()) == 0) {
            throw new ND4JIllegalStateException("memcpyAsync failed");
        }
    }
    dstPoint.tickDeviceWrite();
    // it has to be blocking call
    context.syncOldStream();
}
Also used : CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) BaseCudaDataBuffer(org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) Pointer(org.bytedeco.javacpp.Pointer) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Example 18 with Pointer

use of org.bytedeco.javacpp.Pointer in project nd4j by deeplearning4j.

the class TestNDArrayCreation method testCreateNpy3.

@Test
public void testCreateNpy3() throws Exception {
    INDArray arrCreate = Nd4j.createFromNpyFile(new ClassPathResource("rank3.npy").getFile());
    assertEquals(8, arrCreate.length());
    assertEquals(3, arrCreate.rank());
    Pointer pointer = NativeOpsHolder.getInstance().getDeviceNativeOps().pointerForAddress(arrCreate.data().address());
    assertEquals(arrCreate.data().address(), pointer.address());
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) FloatPointer(org.bytedeco.javacpp.FloatPointer) Pointer(org.bytedeco.javacpp.Pointer) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) Test(org.junit.Test) BaseNd4jTest(org.nd4j.linalg.BaseNd4jTest)

Example 19 with Pointer

use of org.bytedeco.javacpp.Pointer in project nd4j by deeplearning4j.

the class BasicTADManager method getTADOnlyShapeInfo.

@Override
public Pair<DataBuffer, DataBuffer> getTADOnlyShapeInfo(INDArray array, int[] dimension) {
    if (dimension != null && dimension.length > 1)
        Arrays.sort(dimension);
    if (dimension == null)
        dimension = new int[] { Integer.MAX_VALUE };
    boolean isScalar = dimension == null || (dimension.length == 1 && dimension[0] == Integer.MAX_VALUE);
    // FIXME: this is fast triage, remove it later
    // dimensionLength <= 1 ? 2 : dimensionLength;
    int targetRank = isScalar ? 2 : array.rank();
    long offsetLength = 0;
    long tadLength = 1;
    if (!isScalar)
        for (int i = 0; i < dimension.length; i++) {
            tadLength *= array.shape()[dimension[i]];
        }
    if (!isScalar)
        offsetLength = array.lengthLong() / tadLength;
    else
        offsetLength = 1;
    // logger.info("Original shape info before TAD: {}", array.shapeInfoDataBuffer());
    // logger.info("dimension: {}, tadLength: {}, offsetLength for TAD: {}", Arrays.toString(dimension),tadLength, offsetLength);
    DataBuffer outputBuffer = new CudaIntDataBuffer(targetRank * 2 + 4);
    DataBuffer offsetsBuffer = new CudaLongDataBuffer(offsetLength);
    AtomicAllocator.getInstance().getAllocationPoint(outputBuffer).tickHostWrite();
    AtomicAllocator.getInstance().getAllocationPoint(offsetsBuffer).tickHostWrite();
    DataBuffer dimensionBuffer = AtomicAllocator.getInstance().getConstantBuffer(dimension);
    Pointer dimensionPointer = AtomicAllocator.getInstance().getHostPointer(dimensionBuffer);
    Pointer xShapeInfo = AddressRetriever.retrieveHostPointer(array.shapeInfoDataBuffer());
    Pointer targetPointer = AddressRetriever.retrieveHostPointer(outputBuffer);
    Pointer offsetsPointer = AddressRetriever.retrieveHostPointer(offsetsBuffer);
    if (!isScalar)
        nativeOps.tadOnlyShapeInfo((IntPointer) xShapeInfo, (IntPointer) dimensionPointer, dimension.length, (IntPointer) targetPointer, new LongPointerWrapper(offsetsPointer));
    else {
        outputBuffer.put(0, 2);
        outputBuffer.put(1, 1);
        outputBuffer.put(2, 1);
        outputBuffer.put(3, 1);
        outputBuffer.put(4, 1);
        outputBuffer.put(5, 0);
        outputBuffer.put(6, 0);
        outputBuffer.put(7, 99);
    }
    AtomicAllocator.getInstance().getAllocationPoint(outputBuffer).tickHostWrite();
    AtomicAllocator.getInstance().getAllocationPoint(offsetsBuffer).tickHostWrite();
    return new Pair<>(outputBuffer, offsetsBuffer);
}
Also used : CudaLongDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaLongDataBuffer) IntPointer(org.bytedeco.javacpp.IntPointer) LongPointerWrapper(org.nd4j.nativeblas.LongPointerWrapper) IntPointer(org.bytedeco.javacpp.IntPointer) Pointer(org.bytedeco.javacpp.Pointer) CudaIntDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CudaLongDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaLongDataBuffer) CudaDoubleDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaDoubleDataBuffer) CudaIntDataBuffer(org.nd4j.linalg.jcublas.buffer.CudaIntDataBuffer) Pair(org.nd4j.linalg.primitives.Pair)

Example 20 with Pointer

use of org.bytedeco.javacpp.Pointer in project nd4j by deeplearning4j.

the class GridExecutionerTest method testOpPointerizeScalar1.

// ///////////////////////////////////////////////////////////////////////
// ///////////////////////////////////////////////////////////////////////
/*
    Pointerize tests are checking how Ops are converted into GridPointers
*/
// ///////////////////////////////////////////////////////////////////////
// ///////////////////////////////////////////////////////////////////////
@Test
public void testOpPointerizeScalar1() throws Exception {
    CudaGridExecutioner executioner = new CudaGridExecutioner();
    INDArray array = Nd4j.create(10);
    ScalarMultiplication opA = new ScalarMultiplication(array, 10f);
    GridPointers pointers = executioner.pointerizeOp(opA, null);
    assertEquals(opA.opNum(), pointers.getOpNum());
    assertEquals(Op.Type.SCALAR, pointers.getType());
    CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
    Pointer x = AtomicAllocator.getInstance().getPointer(array, context);
    Pointer xShapeInfo = AtomicAllocator.getInstance().getPointer(array.shapeInfoDataBuffer(), context);
    assertEquals(x, pointers.getX());
    assertEquals(null, pointers.getY());
    assertEquals(x, pointers.getZ());
    assertEquals(1, pointers.getXStride());
    assertEquals(-1, pointers.getYStride());
    assertEquals(1, pointers.getZStride());
    assertEquals(xShapeInfo, pointers.getXShapeInfo());
    assertEquals(null, pointers.getYShapeInfo());
    assertEquals(xShapeInfo, pointers.getZShapeInfo());
    assertEquals(null, pointers.getDimensions());
    assertEquals(0, pointers.getDimensionsLength());
    assertEquals(null, pointers.getTadShape());
    assertEquals(null, pointers.getTadOffsets());
    assertEquals(null, pointers.getExtraArgs());
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) GridPointers(org.nd4j.linalg.api.ops.grid.GridPointers) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) ScalarMultiplication(org.nd4j.linalg.api.ops.impl.scalar.ScalarMultiplication) Pointer(org.bytedeco.javacpp.Pointer) Test(org.junit.Test)

Aggregations

Pointer (org.bytedeco.javacpp.Pointer)61 FloatPointer (org.bytedeco.javacpp.FloatPointer)29 DoublePointer (org.bytedeco.javacpp.DoublePointer)27 IntPointer (org.bytedeco.javacpp.IntPointer)23 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)23 INDArray (org.nd4j.linalg.api.ndarray.INDArray)21 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)19 BytePointer (org.bytedeco.javacpp.BytePointer)18 DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)18 ShortPointer (org.bytedeco.javacpp.ShortPointer)16 GridExecutioner (org.nd4j.linalg.api.ops.executioner.GridExecutioner)16 PointerPointer (org.bytedeco.javacpp.PointerPointer)11 ByteBuffer (java.nio.ByteBuffer)10 CUstream_st (org.bytedeco.javacpp.cuda.CUstream_st)10 org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t (org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t)10 CublasPointer (org.nd4j.linalg.jcublas.CublasPointer)10 FunctionPointer (org.bytedeco.javacpp.FunctionPointer)9 BoolPointer (org.bytedeco.javacpp.BoolPointer)8 CLongPointer (org.bytedeco.javacpp.CLongPointer)8 CharPointer (org.bytedeco.javacpp.CharPointer)8