use of org.nd4j.linalg.jcublas.CublasPointer in project nd4j by deeplearning4j.
the class JcublasLevel3 method dgemm.
@Override
protected void dgemm(char Order, char TransA, char TransB, int M, int N, int K, double alpha, INDArray A, int lda, INDArray B, int ldb, double beta, INDArray C, int ldc) {
// B = Shape.toOffsetZero(B);
if (Nd4j.dataType() != DataBuffer.Type.DOUBLE)
logger.warn("DOUBLE gemm called");
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(C, A, B);
DataTypeValidation.assertDouble(A, B, C);
CublasPointer cAPointer = new CublasPointer(A, ctx);
CublasPointer cBPointer = new CublasPointer(B, ctx);
CublasPointer cCPointer = new CublasPointer(C, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
cublasDgemm_v2(new cublasContext(handle), convertTranspose(TransA), convertTranspose(TransB), M, N, K, new DoublePointer(alpha), (DoublePointer) cAPointer.getDevicePointer(), lda, (DoublePointer) cBPointer.getDevicePointer(), ldb, new DoublePointer(beta), (DoublePointer) cCPointer.getDevicePointer(), ldc);
}
allocator.registerAction(ctx, C, A, B);
OpExecutionerUtil.checkForAny(C);
}
use of org.nd4j.linalg.jcublas.CublasPointer in project nd4j by deeplearning4j.
the class JcublasLevel1 method dcopy.
@Override
protected void dcopy(int N, INDArray X, int incX, INDArray Y, int incY) {
if (Nd4j.dataType() != DataBuffer.Type.DOUBLE)
logger.warn("DOUBLE copy called");
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(Y, X);
CublasPointer xCPointer = new CublasPointer(X, ctx);
CublasPointer yCPointer = new CublasPointer(Y, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
cublasDcopy_v2(new cublasContext(handle), N, (DoublePointer) xCPointer.getDevicePointer(), incX, (DoublePointer) yCPointer.getDevicePointer(), incY);
}
allocator.registerAction(ctx, Y, X);
OpExecutionerUtil.checkForAny(Y);
}
use of org.nd4j.linalg.jcublas.CublasPointer in project nd4j by deeplearning4j.
the class JcublasLevel1 method isamax.
@Override
protected int isamax(int N, INDArray X, int incX) {
if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
logger.warn("FLOAT iamax called");
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(null, X);
int ret2;
CublasPointer xCPointer = new CublasPointer(X, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
IntPointer resultPointer = new IntPointer(new int[] { 0 });
cublasIsamax_v2(new cublasContext(handle), N, (FloatPointer) xCPointer.getDevicePointer(), incX, resultPointer);
ret2 = resultPointer.get();
}
allocator.registerAction(ctx, null, X);
return ret2 - 1;
}
use of org.nd4j.linalg.jcublas.CublasPointer in project nd4j by deeplearning4j.
the class JcublasLevel1 method scopy.
@Override
protected void scopy(int N, INDArray X, int incX, INDArray Y, int incY) {
if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
logger.warn("FLOAT copy called");
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(Y, X);
CublasPointer xCPointer = new CublasPointer(X, ctx);
CublasPointer yCPointer = new CublasPointer(Y, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
cublasScopy_v2(new cublasContext(handle), N, (FloatPointer) xCPointer.getDevicePointer(), incX, (FloatPointer) yCPointer.getDevicePointer(), incY);
}
allocator.registerAction(ctx, Y, X);
OpExecutionerUtil.checkForAny(Y);
}
use of org.nd4j.linalg.jcublas.CublasPointer in project nd4j by deeplearning4j.
the class JcublasLevel1 method sscal.
@Override
protected void sscal(int N, float alpha, INDArray X, int incX) {
if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
logger.warn("FLOAT scal called");
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(X);
CublasPointer xCPointer = new CublasPointer(X, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
cublasSscal_v2(new cublasContext(handle), N, new FloatPointer(alpha), (FloatPointer) xCPointer.getDevicePointer(), incX);
}
allocator.registerAction(ctx, X);
OpExecutionerUtil.checkForAny(X);
}
Aggregations