use of org.nd4j.linalg.jcublas.CublasPointer in project nd4j by deeplearning4j.
the class JcublasLevel1 method dnrm2.
@Override
protected double dnrm2(int N, INDArray X, int incX) {
if (Nd4j.dataType() != DataBuffer.Type.DOUBLE)
logger.warn("DOUBLE nrm2 called");
Nd4j.getExecutioner().push();
double ret;
CudaContext ctx = allocator.getFlowController().prepareAction(null, X);
CublasPointer cAPointer = new CublasPointer(X, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
DoublePointer resultPointer = new DoublePointer(0.0f);
cublasDnrm2_v2(new cublasContext(handle), N, (DoublePointer) cAPointer.getDevicePointer(), incX, resultPointer);
ret = resultPointer.get();
}
allocator.registerAction(ctx, null, X);
return ret;
}
use of org.nd4j.linalg.jcublas.CublasPointer in project nd4j by deeplearning4j.
the class JcublasLevel1 method dswap.
@Override
protected void dswap(int N, INDArray X, int incX, INDArray Y, int incY) {
if (Nd4j.dataType() != DataBuffer.Type.DOUBLE)
logger.warn("DOUBLE swap called");
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(Y, X);
CublasPointer xCPointer = new CublasPointer(X, ctx);
CublasPointer yCPointer = new CublasPointer(Y, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
cublasDswap_v2(new cublasContext(handle), N, (DoublePointer) xCPointer.getDevicePointer(), incX, (DoublePointer) yCPointer.getDevicePointer(), incY);
}
allocator.registerAction(ctx, Y, X);
OpExecutionerUtil.checkForAny(Y);
}
use of org.nd4j.linalg.jcublas.CublasPointer in project nd4j by deeplearning4j.
the class JcublasLevel3 method dsymm.
@Override
protected void dsymm(char Order, char Side, char Uplo, int M, int N, double alpha, INDArray A, int lda, INDArray B, int ldb, double beta, INDArray C, int ldc) {
if (Nd4j.dataType() != DataBuffer.Type.DOUBLE)
logger.warn("DOUBLE symm called");
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(C, A, B);
CublasPointer aPointer = new CublasPointer(A, ctx);
CublasPointer bPointer = new CublasPointer(B, ctx);
CublasPointer cPointer = new CublasPointer(C, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
cublasDsymm_v2(new cublasContext(handle), convertSideMode(Side), convertUplo(Uplo), M, N, new DoublePointer(alpha), (DoublePointer) aPointer.getDevicePointer(), lda, (DoublePointer) bPointer.getDevicePointer(), ldb, new DoublePointer(beta), (DoublePointer) cPointer.getDevicePointer(), ldc);
}
allocator.registerAction(ctx, C, A, B);
OpExecutionerUtil.checkForAny(C);
}
use of org.nd4j.linalg.jcublas.CublasPointer in project nd4j by deeplearning4j.
the class JcublasLevel3 method ssymm.
@Override
protected void ssymm(char Order, char Side, char Uplo, int M, int N, float alpha, INDArray A, int lda, INDArray B, int ldb, float beta, INDArray C, int ldc) {
if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
logger.warn("FLOAT symm called");
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(C, A, B);
CublasPointer aPointer = new CublasPointer(A, ctx);
CublasPointer bPointer = new CublasPointer(B, ctx);
CublasPointer cPointer = new CublasPointer(C, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
cublasSsymm_v2(new cublasContext(handle), convertSideMode(Side), convertUplo(Uplo), M, N, new FloatPointer(alpha), (FloatPointer) aPointer.getDevicePointer(), lda, (FloatPointer) bPointer.getDevicePointer(), ldb, new FloatPointer(beta), (FloatPointer) cPointer.getDevicePointer(), ldc);
}
allocator.registerAction(ctx, C, A, B);
OpExecutionerUtil.checkForAny(C);
}
use of org.nd4j.linalg.jcublas.CublasPointer in project nd4j by deeplearning4j.
the class JcublasLevel3 method hgemm.
@Override
protected void hgemm(char Order, char TransA, char TransB, int M, int N, int K, float alpha, INDArray A, int lda, INDArray B, int ldb, float beta, INDArray C, int ldc) {
// A = Shape.toOffsetZero(A);
// B = Shape.toOffsetZero(B);
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(C, A, B);
CublasPointer cAPointer = new CublasPointer(A, ctx);
CublasPointer cBPointer = new CublasPointer(B, ctx);
CublasPointer cCPointer = new CublasPointer(C, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
int arch = CudaEnvironment.getInstance().getCurrentDeviceArchitecture();
if (CUDA_VERSION >= 8000 && (arch == 53 || arch == 60)) {
// on these selected archs we run with cublasHgemm
__half alphaHalf = new __half();
__half betaHalf = new __half();
new ShortPointer(alphaHalf).put((short) HalfIndexer.fromFloat(alpha));
new ShortPointer(betaHalf).put((short) HalfIndexer.fromFloat(beta));
cublasHgemm(new cublasContext(handle), convertTranspose(TransA), convertTranspose(TransB), M, N, K, alphaHalf, new __half(cAPointer.getDevicePointer()), lda, new __half(cBPointer.getDevicePointer()), ldb, betaHalf, new __half(cCPointer.getDevicePointer()), ldc);
} else {
// CUDA_R_16F == 2 for CUDA 8
// CUBLAS_DATA_HALF == 2 for CUDA 7.5
cublasSgemmEx(new cublasContext(handle), convertTranspose(TransA), convertTranspose(TransB), M, N, K, new FloatPointer(alpha), (ShortPointer) cAPointer.getDevicePointer(), 2, lda, (ShortPointer) cBPointer.getDevicePointer(), 2, ldb, new FloatPointer(beta), (ShortPointer) cCPointer.getDevicePointer(), 2, ldc);
}
}
allocator.registerAction(ctx, C, A, B);
OpExecutionerUtil.checkForAny(C);
}
Aggregations