use of org.bytedeco.javacpp.cuda.CUstream_st in project nd4j by deeplearning4j.
the class JcublasLevel2 method dgemv.
@Override
protected void dgemv(char order, char TransA, int M, int N, double alpha, INDArray A, int lda, INDArray X, int incX, double beta, INDArray Y, int incY) {
if (Nd4j.dataType() != DataBuffer.Type.DOUBLE)
logger.warn("DOUBLE gemv called");
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(Y, A, X);
CublasPointer cAPointer = new CublasPointer(A, ctx);
CublasPointer cBPointer = new CublasPointer(X, ctx);
CublasPointer cCPointer = new CublasPointer(Y, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
cublasDgemv_v2(new cublasContext(handle), convertTranspose(TransA), M, N, new DoublePointer(alpha), (DoublePointer) cAPointer.getDevicePointer(), lda, (DoublePointer) cBPointer.getDevicePointer(), incX, new DoublePointer(beta), (DoublePointer) cCPointer.getDevicePointer(), incY);
}
allocator.registerAction(ctx, Y, A, X);
OpExecutionerUtil.checkForAny(Y);
}
use of org.bytedeco.javacpp.cuda.CUstream_st in project nd4j by deeplearning4j.
the class JcublasLevel1 method dscal.
@Override
protected void dscal(int N, double alpha, INDArray X, int incX) {
if (Nd4j.dataType() != DataBuffer.Type.DOUBLE)
logger.warn("DOUBLE scal called");
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(X);
CublasPointer xCPointer = new CublasPointer(X, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
cublasDscal_v2(new cublasContext(handle), N, new DoublePointer(alpha), (DoublePointer) xCPointer.getDevicePointer(), incX);
}
allocator.registerAction(ctx, X);
OpExecutionerUtil.checkForAny(X);
}
use of org.bytedeco.javacpp.cuda.CUstream_st in project nd4j by deeplearning4j.
the class JcublasLevel1 method idamax.
@Override
protected int idamax(int N, INDArray X, int incX) {
if (Nd4j.dataType() != DataBuffer.Type.DOUBLE)
logger.warn("DOUBLE imax called");
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(null, X);
int ret2;
CublasPointer xCPointer = new CublasPointer(X, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
IntPointer resultPointer = new IntPointer(new int[] { 0 });
cublasIdamax_v2(new cublasContext(handle), N, (DoublePointer) xCPointer.getDevicePointer(), incX, resultPointer);
ret2 = resultPointer.get();
}
allocator.registerAction(ctx, null, X);
return ret2 - 1;
}
use of org.bytedeco.javacpp.cuda.CUstream_st in project nd4j by deeplearning4j.
the class JcublasLevel1 method sdot.
@Override
protected float sdot(int N, INDArray X, int incX, INDArray Y, int incY) {
if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
logger.warn("FLOAT dot called");
DataTypeValidation.assertSameDataType(X, Y);
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(null, X, Y);
float ret = 1f;
CublasPointer xCPointer = new CublasPointer(X, ctx);
CublasPointer yCPointer = new CublasPointer(Y, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
long result = cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
if (result != 0)
throw new IllegalStateException("cublasSetStream failed");
FloatPointer resultPointer = new FloatPointer(0.0f);
result = cublasSdot_v2(new cublasContext(handle), N, (FloatPointer) xCPointer.getDevicePointer(), incX, (FloatPointer) yCPointer.getDevicePointer(), incY, resultPointer);
ret = resultPointer.get();
}
allocator.registerAction(ctx, null, X, Y);
return ret;
}
use of org.bytedeco.javacpp.cuda.CUstream_st in project nd4j by deeplearning4j.
the class JcublasLevel1 method sswap.
@Override
protected void sswap(int N, INDArray X, int incX, INDArray Y, int incY) {
if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
logger.warn("FLOAT swap called");
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(Y, X);
CublasPointer xCPointer = new CublasPointer(X, ctx);
CublasPointer yCPointer = new CublasPointer(Y, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
cublasSswap_v2(new cublasContext(handle), N, (FloatPointer) xCPointer.getDevicePointer(), incX, (FloatPointer) yCPointer.getDevicePointer(), incY);
}
allocator.registerAction(ctx, Y, X);
OpExecutionerUtil.checkForAny(Y);
}
Aggregations