use of org.bytedeco.javacpp.FloatPointer in project MindsEye by SimiaCryptus.
the class Hdf5Archive method readDataSet.
/**
* Read data setBytes as ND4J array from HDF5 group.
*
* @param fileGroup HDF5 file or group
* @param datasetName Name of data setBytes
* @return
*/
@Nullable
private Tensor readDataSet(@Nonnull Group fileGroup, CharSequence datasetName) {
DataSet dataset = fileGroup.openDataSet(datasetName.toString());
DataSpace space = dataset.getSpace();
int nbDims = space.getSimpleExtentNdims();
@Nonnull long[] dims = new long[nbDims];
space.getSimpleExtentDims(dims);
@Nullable float[] dataBuffer = null;
@Nullable FloatPointer fp = null;
int j = 0;
@Nonnull DataType dataType = new DataType(PredType.NATIVE_FLOAT());
@Nullable Tensor data = null;
switch(nbDims) {
case 4:
/* 2D Convolution weights */
dataBuffer = new float[(int) (dims[0] * dims[1] * dims[2] * dims[3])];
fp = new FloatPointer(dataBuffer);
dataset.read(fp, dataType);
fp.get(dataBuffer);
data = new Tensor((int) dims[0], (int) dims[1], (int) dims[2], (int) dims[3]);
j = 0;
for (int i1 = 0; i1 < dims[0]; i1++) for (int i2 = 0; i2 < dims[1]; i2++) for (int i3 = 0; i3 < dims[2]; i3++) for (int i4 = 0; i4 < dims[3]; i4++) data.set(i1, i2, i3, i4, (double) dataBuffer[j++]);
break;
case 3:
dataBuffer = new float[(int) (dims[0] * dims[1] * dims[2])];
fp = new FloatPointer(dataBuffer);
dataset.read(fp, dataType);
fp.get(dataBuffer);
data = new Tensor((int) dims[0], (int) dims[1], (int) dims[2]);
j = 0;
for (int i1 = 0; i1 < dims[0]; i1++) for (int i2 = 0; i2 < dims[1]; i2++) for (int i3 = 0; i3 < dims[2]; i3++) data.set(i1, i2, i3, dataBuffer[j++]);
break;
case 2:
/* Dense and Recurrent weights */
dataBuffer = new float[(int) (dims[0] * dims[1])];
fp = new FloatPointer(dataBuffer);
dataset.read(fp, dataType);
fp.get(dataBuffer);
data = new Tensor((int) dims[0], (int) dims[1]);
j = 0;
for (int i1 = 0; i1 < dims[0]; i1++) for (int i2 = 0; i2 < dims[1]; i2++) data.set(i1, i2, dataBuffer[j++]);
break;
case 1:
/* Bias */
dataBuffer = new float[(int) dims[0]];
fp = new FloatPointer(dataBuffer);
dataset.read(fp, dataType);
fp.get(dataBuffer);
data = new Tensor((int) dims[0]);
j = 0;
for (int i1 = 0; i1 < dims[0]; i1++) data.set(i1, dataBuffer[j++]);
break;
default:
throw new RuntimeException("Cannot import weights apply rank " + nbDims);
}
space.deallocate();
dataset.deallocate();
return data;
}
use of org.bytedeco.javacpp.FloatPointer in project nd4j by deeplearning4j.
the class AbstractCompressor method compress.
/**
* This method creates compressed INDArray from Java float array, skipping usual INDArray instantiation routines
*
* @param data
* @param shape
* @param order
* @return
*/
@Override
public INDArray compress(float[] data, int[] shape, char order) {
FloatPointer pointer = new FloatPointer(data);
DataBuffer shapeInfo = Nd4j.getShapeInfoProvider().createShapeInformation(shape, order).getFirst();
DataBuffer buffer = compressPointer(DataBuffer.TypeEx.FLOAT, pointer, data.length, 4);
return Nd4j.createArrayFromShapeBuffer(buffer, shapeInfo);
}
use of org.bytedeco.javacpp.FloatPointer in project nd4j by deeplearning4j.
the class CpuLapack method ssyev.
// =========================
// syev EigenValue/Vectors
//
@Override
public int ssyev(char jobz, char uplo, int N, INDArray A, INDArray R) {
FloatPointer fp = new FloatPointer(1);
int status = LAPACKE_ssyev_work(getColumnOrder(A), (byte) jobz, (byte) uplo, N, (FloatPointer) A.data().addressPointer(), getLda(A), (FloatPointer) R.data().addressPointer(), fp, -1);
if (status == 0) {
int lwork = (int) fp.get();
INDArray work = Nd4j.createArrayFromShapeBuffer(Nd4j.getDataBufferFactory().createFloat(lwork), Nd4j.getShapeInfoProvider().createShapeInformation(new int[] { 1, lwork }).getFirst());
status = LAPACKE_ssyev(getColumnOrder(A), (byte) jobz, (byte) uplo, N, (FloatPointer) A.data().addressPointer(), getLda(A), (FloatPointer) work.data().addressPointer());
if (status == 0) {
R.assign(work.get(NDArrayIndex.interval(0, N)));
}
}
return status;
}
use of org.bytedeco.javacpp.FloatPointer in project nd4j by deeplearning4j.
the class JcublasLapack method spotrf.
// =========================
// CHOLESKY DECOMP
@Override
public void spotrf(byte uplo, int N, INDArray A, INDArray INFO) {
INDArray a = A;
if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
log.warn("DOUBLE potrf called in FLOAT environment");
if (A.ordering() == 'c')
a = A.dup('f');
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
// Get context for current thread
CudaContext ctx = (CudaContext) allocator.getDeviceContext().getContext();
// setup the solver handles for cuSolver calls
cusolverDnHandle_t handle = ctx.getSolverHandle();
cusolverDnContext solverDn = new cusolverDnContext(handle);
// synchronized on the solver
synchronized (handle) {
int result = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getOldStream()));
if (result != 0)
throw new BlasException("solverSetStream failed");
// transfer the INDArray into GPU memory
CublasPointer xAPointer = new CublasPointer(a, ctx);
// this output - indicates how much memory we'll need for the real operation
DataBuffer worksizeBuffer = Nd4j.getDataBufferFactory().createInt(1);
int stat = cusolverDnSpotrf_bufferSize(solverDn, uplo, N, (FloatPointer) xAPointer.getDevicePointer(), N, // we intentionally use host pointer here
(IntPointer) worksizeBuffer.addressPointer());
if (stat != CUSOLVER_STATUS_SUCCESS) {
throw new BlasException("cusolverDnSpotrf_bufferSize failed", stat);
}
int worksize = worksizeBuffer.getInt(0);
// Now allocate memory for the workspace, the permutation matrix and a return code
Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType());
// Do the actual decomp
stat = cusolverDnSpotrf(solverDn, uplo, N, (FloatPointer) xAPointer.getDevicePointer(), N, new CudaPointer(workspace).asFloatPointer(), worksize, new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer());
if (stat != CUSOLVER_STATUS_SUCCESS) {
throw new BlasException("cusolverDnSpotrf failed", stat);
}
}
allocator.registerAction(ctx, a);
allocator.registerAction(ctx, INFO);
if (a != A)
A.assign(a);
if (uplo == 'U') {
A.assign(A.transpose());
INDArrayIndex[] ix = new INDArrayIndex[2];
for (int i = 1; i < Math.min(A.rows(), A.columns()); i++) {
ix[0] = NDArrayIndex.point(i);
ix[1] = NDArrayIndex.interval(0, i);
A.put(ix, 0);
}
} else {
INDArrayIndex[] ix = new INDArrayIndex[2];
for (int i = 0; i < Math.min(A.rows(), A.columns() - 1); i++) {
ix[0] = NDArrayIndex.point(i);
ix[1] = NDArrayIndex.interval(i + 1, A.columns());
A.put(ix, 0);
}
}
log.info("A: {}", A);
}
use of org.bytedeco.javacpp.FloatPointer in project nd4j by deeplearning4j.
the class JcublasLapack method sgesvd.
@Override
public void sgesvd(byte jobu, byte jobvt, int M, int N, INDArray A, INDArray S, INDArray U, INDArray VT, INDArray INFO) {
if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
log.warn("FLOAT gesvd called in DOUBLE environment");
INDArray a = A;
INDArray u = U;
INDArray vt = VT;
// we should transpose & adjust outputs if M<N
// cuda has a limitation, but it's OK we know
// A = U S V'
// transpose multiply rules give us ...
// A' = V S' U'
boolean hadToTransposeA = false;
if (M < N) {
hadToTransposeA = true;
int tmp1 = N;
N = M;
M = tmp1;
a = A.transpose().dup('f');
u = VT.dup('f');
vt = U.dup('f');
} else {
// cuda requires column ordering - we'll register a warning in case
if (A.ordering() == 'c')
a = A.dup('f');
if (U != null && U.ordering() == 'c')
u = U.dup('f');
if (VT != null && VT.ordering() == 'c')
vt = VT.dup('f');
}
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
// Get context for current thread
CudaContext ctx = (CudaContext) allocator.getDeviceContext().getContext();
// setup the solver handles for cuSolver calls
cusolverDnHandle_t handle = ctx.getSolverHandle();
cusolverDnContext solverDn = new cusolverDnContext(handle);
// synchronized on the solver
synchronized (handle) {
int result = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getOldStream()));
if (result != 0)
throw new BlasException("solverSetStream failed");
// transfer the INDArray into GPU memory
CublasPointer xAPointer = new CublasPointer(a, ctx);
// this output - indicates how much memory we'll need for the real operation
DataBuffer worksizeBuffer = Nd4j.getDataBufferFactory().createInt(1);
int stat = cusolverDnSgesvd_bufferSize(// we intentionally use host pointer here
solverDn, // we intentionally use host pointer here
M, // we intentionally use host pointer here
N, // we intentionally use host pointer here
(IntPointer) worksizeBuffer.addressPointer());
if (stat != CUSOLVER_STATUS_SUCCESS) {
throw new BlasException("cusolverDnSgesvd_bufferSize failed", stat);
}
int worksize = worksizeBuffer.getInt(0);
Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType());
DataBuffer rwork = Nd4j.getDataBufferFactory().createFloat((M < N ? M : N) - 1);
// Do the actual decomp
stat = cusolverDnSgesvd(solverDn, jobu, jobvt, M, N, (FloatPointer) xAPointer.getDevicePointer(), M, new CudaPointer(allocator.getPointer(S, ctx)).asFloatPointer(), U == null ? null : new CudaPointer(allocator.getPointer(u, ctx)).asFloatPointer(), M, VT == null ? null : new CudaPointer(allocator.getPointer(vt, ctx)).asFloatPointer(), N, new CudaPointer(workspace).asFloatPointer(), worksize, new CudaPointer(allocator.getPointer(rwork, ctx)).asFloatPointer(), new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer());
if (stat != CUSOLVER_STATUS_SUCCESS) {
throw new BlasException("cusolverDnSgesvd failed", stat);
}
}
allocator.registerAction(ctx, INFO);
allocator.registerAction(ctx, S);
if (U != null)
allocator.registerAction(ctx, u);
if (VT != null)
allocator.registerAction(ctx, vt);
// if we transposed A then swap & transpose U & V'
if (hadToTransposeA) {
U.assign(vt.transpose());
VT.assign(u.transpose());
} else {
if (u != U)
U.assign(u);
if (vt != VT)
VT.assign(vt);
}
}
Aggregations