use of org.bytedeco.javacpp.Pointer in project nd4j by deeplearning4j.
the class CudaZeroHandler method memcpySpecial.
/**
* Special memcpy version, addressing shapeInfoDataBuffer copies
*
* PLEASE NOTE: Blocking H->H, Async H->D
*
* @param dstBuffer
* @param srcPointer
* @param length
* @param dstOffset
*/
@Override
public void memcpySpecial(DataBuffer dstBuffer, Pointer srcPointer, long length, long dstOffset) {
// log.info("Memcpy special: {} bytes ", length);
CudaContext context = getCudaContext();
AllocationPoint point = ((BaseCudaDataBuffer) dstBuffer).getAllocationPoint();
// context.syncOldStream();
Pointer dP = new CudaPointer((point.getPointers().getHostPointer().address()) + dstOffset);
if (nativeOps.memcpyAsync(dP, srcPointer, length, CudaConstants.cudaMemcpyHostToHost, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memcpyAsync failed");
if (point.getAllocationStatus() == AllocationStatus.DEVICE) {
Pointer rDP = new CudaPointer(point.getPointers().getDevicePointer().address() + dstOffset);
if (nativeOps.memcpyAsync(rDP, dP, length, CudaConstants.cudaMemcpyHostToDevice, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memcpyAsync failed");
context.syncOldStream();
}
context.syncOldStream();
point.tickDeviceWrite();
// point.tickHostRead();
}
use of org.bytedeco.javacpp.Pointer in project nd4j by deeplearning4j.
the class CudaZeroHandler method memcpy.
/**
* Synchronous version of memcpy.
*
* @param dstBuffer
* @param srcBuffer
*/
@Override
public void memcpy(DataBuffer dstBuffer, DataBuffer srcBuffer) {
// log.info("Buffer MemCpy called");
// log.info("Memcpy buffer: {} bytes ", dstBuffer.length() * dstBuffer.getElementSize());
CudaContext context = getCudaContext();
AllocationPoint dstPoint = ((BaseCudaDataBuffer) dstBuffer).getAllocationPoint();
AllocationPoint srcPoint = ((BaseCudaDataBuffer) srcBuffer).getAllocationPoint();
Pointer dP = new CudaPointer(dstPoint.getPointers().getHostPointer().address());
Pointer sP = null;
if (srcPoint.getAllocationStatus() == AllocationStatus.DEVICE) {
sP = new CudaPointer(srcPoint.getPointers().getDevicePointer().address());
/*
JCuda.cudaMemcpyAsync(
dP,
sP,
srcBuffer.length(),
cudaMemcpyKind.cudaMemcpyHostToDevice,
context.getOldStream()
);*/
if (nativeOps.memcpyAsync(dP, sP, srcBuffer.length() * srcBuffer.getElementSize(), CudaConstants.cudaMemcpyHostToDevice, context.getOldStream()) == 0) {
throw new ND4JIllegalStateException("memcpyAsync failed");
}
} else {
sP = new CudaPointer(srcPoint.getPointers().getHostPointer().address());
/*
JCuda.cudaMemcpyAsync(
dP,
sP,
srcBuffer.length(),
cudaMemcpyKind.cudaMemcpyHostToDevice,
context.getOldStream()
);*/
if (nativeOps.memcpyAsync(dP, sP, srcBuffer.length() * srcBuffer.getElementSize(), CudaConstants.cudaMemcpyHostToDevice, context.getOldStream()) == 0) {
throw new ND4JIllegalStateException("memcpyAsync failed");
}
}
if (dstPoint.getAllocationStatus() == AllocationStatus.DEVICE) {
Pointer rDP = new CudaPointer(dstPoint.getPointers().getDevicePointer().address());
/*
JCuda.cudaMemcpyAsync(
rDP,
dP,
srcBuffer.length(),
cudaMemcpyKind.cudaMemcpyHostToDevice,
context.getOldStream()
);*/
if (nativeOps.memcpyAsync(rDP, dP, srcBuffer.length() * srcBuffer.getElementSize(), CudaConstants.cudaMemcpyHostToDevice, context.getOldStream()) == 0) {
throw new ND4JIllegalStateException("memcpyAsync failed");
}
}
dstPoint.tickDeviceWrite();
// it has to be blocking call
context.syncOldStream();
}
use of org.bytedeco.javacpp.Pointer in project nd4j by deeplearning4j.
the class TestNDArrayCreation method testCreateNpy3.
@Test
public void testCreateNpy3() throws Exception {
INDArray arrCreate = Nd4j.createFromNpyFile(new ClassPathResource("rank3.npy").getFile());
assertEquals(8, arrCreate.length());
assertEquals(3, arrCreate.rank());
Pointer pointer = NativeOpsHolder.getInstance().getDeviceNativeOps().pointerForAddress(arrCreate.data().address());
assertEquals(arrCreate.data().address(), pointer.address());
}
use of org.bytedeco.javacpp.Pointer in project nd4j by deeplearning4j.
the class BasicTADManager method getTADOnlyShapeInfo.
@Override
public Pair<DataBuffer, DataBuffer> getTADOnlyShapeInfo(INDArray array, int[] dimension) {
if (dimension != null && dimension.length > 1)
Arrays.sort(dimension);
if (dimension == null)
dimension = new int[] { Integer.MAX_VALUE };
boolean isScalar = dimension == null || (dimension.length == 1 && dimension[0] == Integer.MAX_VALUE);
// FIXME: this is fast triage, remove it later
// dimensionLength <= 1 ? 2 : dimensionLength;
int targetRank = isScalar ? 2 : array.rank();
long offsetLength = 0;
long tadLength = 1;
if (!isScalar)
for (int i = 0; i < dimension.length; i++) {
tadLength *= array.shape()[dimension[i]];
}
if (!isScalar)
offsetLength = array.lengthLong() / tadLength;
else
offsetLength = 1;
// logger.info("Original shape info before TAD: {}", array.shapeInfoDataBuffer());
// logger.info("dimension: {}, tadLength: {}, offsetLength for TAD: {}", Arrays.toString(dimension),tadLength, offsetLength);
DataBuffer outputBuffer = new CudaIntDataBuffer(targetRank * 2 + 4);
DataBuffer offsetsBuffer = new CudaLongDataBuffer(offsetLength);
AtomicAllocator.getInstance().getAllocationPoint(outputBuffer).tickHostWrite();
AtomicAllocator.getInstance().getAllocationPoint(offsetsBuffer).tickHostWrite();
DataBuffer dimensionBuffer = AtomicAllocator.getInstance().getConstantBuffer(dimension);
Pointer dimensionPointer = AtomicAllocator.getInstance().getHostPointer(dimensionBuffer);
Pointer xShapeInfo = AddressRetriever.retrieveHostPointer(array.shapeInfoDataBuffer());
Pointer targetPointer = AddressRetriever.retrieveHostPointer(outputBuffer);
Pointer offsetsPointer = AddressRetriever.retrieveHostPointer(offsetsBuffer);
if (!isScalar)
nativeOps.tadOnlyShapeInfo((IntPointer) xShapeInfo, (IntPointer) dimensionPointer, dimension.length, (IntPointer) targetPointer, new LongPointerWrapper(offsetsPointer));
else {
outputBuffer.put(0, 2);
outputBuffer.put(1, 1);
outputBuffer.put(2, 1);
outputBuffer.put(3, 1);
outputBuffer.put(4, 1);
outputBuffer.put(5, 0);
outputBuffer.put(6, 0);
outputBuffer.put(7, 99);
}
AtomicAllocator.getInstance().getAllocationPoint(outputBuffer).tickHostWrite();
AtomicAllocator.getInstance().getAllocationPoint(offsetsBuffer).tickHostWrite();
return new Pair<>(outputBuffer, offsetsBuffer);
}
use of org.bytedeco.javacpp.Pointer in project nd4j by deeplearning4j.
the class GridExecutionerTest method testOpPointerizeScalar1.
// ///////////////////////////////////////////////////////////////////////
// ///////////////////////////////////////////////////////////////////////
/*
Pointerize tests are checking how Ops are converted into GridPointers
*/
// ///////////////////////////////////////////////////////////////////////
// ///////////////////////////////////////////////////////////////////////
@Test
public void testOpPointerizeScalar1() throws Exception {
CudaGridExecutioner executioner = new CudaGridExecutioner();
INDArray array = Nd4j.create(10);
ScalarMultiplication opA = new ScalarMultiplication(array, 10f);
GridPointers pointers = executioner.pointerizeOp(opA, null);
assertEquals(opA.opNum(), pointers.getOpNum());
assertEquals(Op.Type.SCALAR, pointers.getType());
CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext();
Pointer x = AtomicAllocator.getInstance().getPointer(array, context);
Pointer xShapeInfo = AtomicAllocator.getInstance().getPointer(array.shapeInfoDataBuffer(), context);
assertEquals(x, pointers.getX());
assertEquals(null, pointers.getY());
assertEquals(x, pointers.getZ());
assertEquals(1, pointers.getXStride());
assertEquals(-1, pointers.getYStride());
assertEquals(1, pointers.getZStride());
assertEquals(xShapeInfo, pointers.getXShapeInfo());
assertEquals(null, pointers.getYShapeInfo());
assertEquals(xShapeInfo, pointers.getZShapeInfo());
assertEquals(null, pointers.getDimensions());
assertEquals(0, pointers.getDimensionsLength());
assertEquals(null, pointers.getTadShape());
assertEquals(null, pointers.getTadOffsets());
assertEquals(null, pointers.getExtraArgs());
}
Aggregations