use of org.bytedeco.javacpp.cuda.CUstream_st in project nd4j by deeplearning4j.
the class JcublasLevel1 method dnrm2.
@Override
protected double dnrm2(int N, INDArray X, int incX) {
if (Nd4j.dataType() != DataBuffer.Type.DOUBLE)
logger.warn("DOUBLE nrm2 called");
Nd4j.getExecutioner().push();
double ret;
CudaContext ctx = allocator.getFlowController().prepareAction(null, X);
CublasPointer cAPointer = new CublasPointer(X, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
DoublePointer resultPointer = new DoublePointer(0.0f);
cublasDnrm2_v2(new cublasContext(handle), N, (DoublePointer) cAPointer.getDevicePointer(), incX, resultPointer);
ret = resultPointer.get();
}
allocator.registerAction(ctx, null, X);
return ret;
}
use of org.bytedeco.javacpp.cuda.CUstream_st in project nd4j by deeplearning4j.
the class JcublasLevel1 method dswap.
@Override
protected void dswap(int N, INDArray X, int incX, INDArray Y, int incY) {
if (Nd4j.dataType() != DataBuffer.Type.DOUBLE)
logger.warn("DOUBLE swap called");
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(Y, X);
CublasPointer xCPointer = new CublasPointer(X, ctx);
CublasPointer yCPointer = new CublasPointer(Y, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
cublasDswap_v2(new cublasContext(handle), N, (DoublePointer) xCPointer.getDevicePointer(), incX, (DoublePointer) yCPointer.getDevicePointer(), incY);
}
allocator.registerAction(ctx, Y, X);
OpExecutionerUtil.checkForAny(Y);
}
use of org.bytedeco.javacpp.cuda.CUstream_st in project nd4j by deeplearning4j.
the class JcublasLevel1 method dcopy.
@Override
protected void dcopy(int N, INDArray X, int incX, INDArray Y, int incY) {
if (Nd4j.dataType() != DataBuffer.Type.DOUBLE)
logger.warn("DOUBLE copy called");
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(Y, X);
CublasPointer xCPointer = new CublasPointer(X, ctx);
CublasPointer yCPointer = new CublasPointer(Y, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
cublasDcopy_v2(new cublasContext(handle), N, (DoublePointer) xCPointer.getDevicePointer(), incX, (DoublePointer) yCPointer.getDevicePointer(), incY);
}
allocator.registerAction(ctx, Y, X);
OpExecutionerUtil.checkForAny(Y);
}
use of org.bytedeco.javacpp.cuda.CUstream_st in project nd4j by deeplearning4j.
the class JcublasLevel1 method isamax.
@Override
protected int isamax(int N, INDArray X, int incX) {
if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
logger.warn("FLOAT iamax called");
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(null, X);
int ret2;
CublasPointer xCPointer = new CublasPointer(X, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
IntPointer resultPointer = new IntPointer(new int[] { 0 });
cublasIsamax_v2(new cublasContext(handle), N, (FloatPointer) xCPointer.getDevicePointer(), incX, resultPointer);
ret2 = resultPointer.get();
}
allocator.registerAction(ctx, null, X);
return ret2 - 1;
}
use of org.bytedeco.javacpp.cuda.CUstream_st in project nd4j by deeplearning4j.
the class JcublasLevel1 method scopy.
@Override
protected void scopy(int N, INDArray X, int incX, INDArray Y, int incY) {
if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
logger.warn("FLOAT copy called");
Nd4j.getExecutioner().push();
CudaContext ctx = allocator.getFlowController().prepareAction(Y, X);
CublasPointer xCPointer = new CublasPointer(X, ctx);
CublasPointer yCPointer = new CublasPointer(Y, ctx);
cublasHandle_t handle = ctx.getHandle();
synchronized (handle) {
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
cublasScopy_v2(new cublasContext(handle), N, (FloatPointer) xCPointer.getDevicePointer(), incX, (FloatPointer) yCPointer.getDevicePointer(), incY);
}
allocator.registerAction(ctx, Y, X);
OpExecutionerUtil.checkForAny(Y);
}
Aggregations