Example 6 with FloatPointer

use of org.bytedeco.javacpp.FloatPointer in project MindsEye by SimiaCryptus.

the class Hdf5Archive method readDataSet.

 * Read data setBytes as ND4J array from HDF5 group.
 * @param fileGroup   HDF5 file or group
 * @param datasetName Name of data setBytes
 * @return
private Tensor readDataSet(@Nonnull Group fileGroup, CharSequence datasetName) {
    DataSet dataset = fileGroup.openDataSet(datasetName.toString());
    DataSpace space = dataset.getSpace();
    int nbDims = space.getSimpleExtentNdims();
    @Nonnull long[] dims = new long[nbDims];
    @Nullable float[] dataBuffer = null;
    @Nullable FloatPointer fp = null;
    int j = 0;
    @Nonnull DataType dataType = new DataType(PredType.NATIVE_FLOAT());
    @Nullable Tensor data = null;
    switch(nbDims) {
        case 4:
            /* 2D Convolution weights */
            dataBuffer = new float[(int) (dims[0] * dims[1] * dims[2] * dims[3])];
            fp = new FloatPointer(dataBuffer);
  , dataType);
            data = new Tensor((int) dims[0], (int) dims[1], (int) dims[2], (int) dims[3]);
            j = 0;
            for (int i1 = 0; i1 < dims[0]; i1++) for (int i2 = 0; i2 < dims[1]; i2++) for (int i3 = 0; i3 < dims[2]; i3++) for (int i4 = 0; i4 < dims[3]; i4++) data.set(i1, i2, i3, i4, (double) dataBuffer[j++]);
        case 3:
            dataBuffer = new float[(int) (dims[0] * dims[1] * dims[2])];
            fp = new FloatPointer(dataBuffer);
  , dataType);
            data = new Tensor((int) dims[0], (int) dims[1], (int) dims[2]);
            j = 0;
            for (int i1 = 0; i1 < dims[0]; i1++) for (int i2 = 0; i2 < dims[1]; i2++) for (int i3 = 0; i3 < dims[2]; i3++) data.set(i1, i2, i3, dataBuffer[j++]);
        case 2:
            /* Dense and Recurrent weights */
            dataBuffer = new float[(int) (dims[0] * dims[1])];
            fp = new FloatPointer(dataBuffer);
  , dataType);
            data = new Tensor((int) dims[0], (int) dims[1]);
            j = 0;
            for (int i1 = 0; i1 < dims[0]; i1++) for (int i2 = 0; i2 < dims[1]; i2++) data.set(i1, i2, dataBuffer[j++]);
        case 1:
            /* Bias */
            dataBuffer = new float[(int) dims[0]];
            fp = new FloatPointer(dataBuffer);
  , dataType);
            data = new Tensor((int) dims[0]);
            j = 0;
            for (int i1 = 0; i1 < dims[0]; i1++) data.set(i1, dataBuffer[j++]);
            throw new RuntimeException("Cannot import weights apply rank " + nbDims);
    return data;
Also used : Tensor(com.simiacryptus.mindseye.lang.Tensor) Nonnull(javax.annotation.Nonnull) FloatPointer(org.bytedeco.javacpp.FloatPointer) Nullable(javax.annotation.Nullable) Nullable(javax.annotation.Nullable)

Example 7 with FloatPointer

use of org.bytedeco.javacpp.FloatPointer in project nd4j by deeplearning4j.

the class AbstractCompressor method compress.

 * This method creates compressed INDArray from Java float array, skipping usual INDArray instantiation routines
 * @param data
 * @param shape
 * @param order
 * @return
public INDArray compress(float[] data, int[] shape, char order) {
    FloatPointer pointer = new FloatPointer(data);
    DataBuffer shapeInfo = Nd4j.getShapeInfoProvider().createShapeInformation(shape, order).getFirst();
    DataBuffer buffer = compressPointer(DataBuffer.TypeEx.FLOAT, pointer, data.length, 4);
    return Nd4j.createArrayFromShapeBuffer(buffer, shapeInfo);
Also used : FloatPointer(org.bytedeco.javacpp.FloatPointer) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer)

Example 8 with FloatPointer

use of org.bytedeco.javacpp.FloatPointer in project nd4j by deeplearning4j.

the class CpuLapack method ssyev.

// =========================
// syev EigenValue/Vectors
public int ssyev(char jobz, char uplo, int N, INDArray A, INDArray R) {
    FloatPointer fp = new FloatPointer(1);
    int status = LAPACKE_ssyev_work(getColumnOrder(A), (byte) jobz, (byte) uplo, N, (FloatPointer), getLda(A), (FloatPointer), fp, -1);
    if (status == 0) {
        int lwork = (int) fp.get();
        INDArray work = Nd4j.createArrayFromShapeBuffer(Nd4j.getDataBufferFactory().createFloat(lwork), Nd4j.getShapeInfoProvider().createShapeInformation(new int[] { 1, lwork }).getFirst());
        status = LAPACKE_ssyev(getColumnOrder(A), (byte) jobz, (byte) uplo, N, (FloatPointer), getLda(A), (FloatPointer);
        if (status == 0) {
            R.assign(work.get(NDArrayIndex.interval(0, N)));
    return status;
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) FloatPointer(org.bytedeco.javacpp.FloatPointer)

Example 9 with FloatPointer

use of org.bytedeco.javacpp.FloatPointer in project nd4j by deeplearning4j.

the class JcublasLapack method spotrf.

// =========================
public void spotrf(byte uplo, int N, INDArray A, INDArray INFO) {
    INDArray a = A;
    if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
        log.warn("DOUBLE potrf called in FLOAT environment");
    if (A.ordering() == 'c')
        a = A.dup('f');
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
    // Get context for current thread
    CudaContext ctx = (CudaContext) allocator.getDeviceContext().getContext();
    // setup the solver handles for cuSolver calls
    cusolverDnHandle_t handle = ctx.getSolverHandle();
    cusolverDnContext solverDn = new cusolverDnContext(handle);
    // synchronized on the solver
    synchronized (handle) {
        int result = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getOldStream()));
        if (result != 0)
            throw new BlasException("solverSetStream failed");
        // transfer the INDArray into GPU memory
        CublasPointer xAPointer = new CublasPointer(a, ctx);
        // this output - indicates how much memory we'll need for the real operation
        DataBuffer worksizeBuffer = Nd4j.getDataBufferFactory().createInt(1);
        int stat = cusolverDnSpotrf_bufferSize(solverDn, uplo, N, (FloatPointer) xAPointer.getDevicePointer(), N, // we intentionally use host pointer here
        (IntPointer) worksizeBuffer.addressPointer());
        if (stat != CUSOLVER_STATUS_SUCCESS) {
            throw new BlasException("cusolverDnSpotrf_bufferSize failed", stat);
        int worksize = worksizeBuffer.getInt(0);
        // Now allocate memory for the workspace, the permutation matrix and a return code
        Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType());
        // Do the actual decomp
        stat = cusolverDnSpotrf(solverDn, uplo, N, (FloatPointer) xAPointer.getDevicePointer(), N, new CudaPointer(workspace).asFloatPointer(), worksize, new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer());
        if (stat != CUSOLVER_STATUS_SUCCESS) {
            throw new BlasException("cusolverDnSpotrf failed", stat);
    allocator.registerAction(ctx, a);
    allocator.registerAction(ctx, INFO);
    if (a != A)
    if (uplo == 'U') {
        INDArrayIndex[] ix = new INDArrayIndex[2];
        for (int i = 1; i < Math.min(A.rows(), A.columns()); i++) {
            ix[0] = NDArrayIndex.point(i);
            ix[1] = NDArrayIndex.interval(0, i);
            A.put(ix, 0);
    } else {
        INDArrayIndex[] ix = new INDArrayIndex[2];
        for (int i = 0; i < Math.min(A.rows(), A.columns() - 1); i++) {
            ix[0] = NDArrayIndex.point(i);
            ix[1] = NDArrayIndex.interval(i + 1, A.columns());
            A.put(ix, 0);
    }"A: {}", A);
Also used : CUstream_st(org.bytedeco.javacpp.cuda.CUstream_st) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) INDArrayIndex(org.nd4j.linalg.indexing.INDArrayIndex) CublasPointer(org.nd4j.linalg.jcublas.CublasPointer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) DoublePointer(org.bytedeco.javacpp.DoublePointer) IntPointer(org.bytedeco.javacpp.IntPointer) FloatPointer(org.bytedeco.javacpp.FloatPointer) Pointer(org.bytedeco.javacpp.Pointer) org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t(org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t) GridExecutioner(org.nd4j.linalg.api.ops.executioner.GridExecutioner) BlasException(org.nd4j.linalg.api.blas.BlasException) INDArray(org.nd4j.linalg.api.ndarray.INDArray) FloatPointer(org.bytedeco.javacpp.FloatPointer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) CublasPointer(org.nd4j.linalg.jcublas.CublasPointer) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer)

Example 10 with FloatPointer

use of org.bytedeco.javacpp.FloatPointer in project nd4j by deeplearning4j.

the class JcublasLapack method sgesvd.

public void sgesvd(byte jobu, byte jobvt, int M, int N, INDArray A, INDArray S, INDArray U, INDArray VT, INDArray INFO) {
    if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
        log.warn("FLOAT gesvd called in DOUBLE environment");
    INDArray a = A;
    INDArray u = U;
    INDArray vt = VT;
    // we should transpose & adjust outputs if M<N
    // cuda has a limitation, but it's OK we know
    // A = U S V'
    // transpose multiply rules give us ...
    // A' = V S' U'
    boolean hadToTransposeA = false;
    if (M < N) {
        hadToTransposeA = true;
        int tmp1 = N;
        N = M;
        M = tmp1;
        a = A.transpose().dup('f');
        u = VT.dup('f');
        vt = U.dup('f');
    } else {
        // cuda requires column ordering - we'll register a warning in case
        if (A.ordering() == 'c')
            a = A.dup('f');
        if (U != null && U.ordering() == 'c')
            u = U.dup('f');
        if (VT != null && VT.ordering() == 'c')
            vt = VT.dup('f');
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
    // Get context for current thread
    CudaContext ctx = (CudaContext) allocator.getDeviceContext().getContext();
    // setup the solver handles for cuSolver calls
    cusolverDnHandle_t handle = ctx.getSolverHandle();
    cusolverDnContext solverDn = new cusolverDnContext(handle);
    // synchronized on the solver
    synchronized (handle) {
        int result = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getOldStream()));
        if (result != 0)
            throw new BlasException("solverSetStream failed");
        // transfer the INDArray into GPU memory
        CublasPointer xAPointer = new CublasPointer(a, ctx);
        // this output - indicates how much memory we'll need for the real operation
        DataBuffer worksizeBuffer = Nd4j.getDataBufferFactory().createInt(1);
        int stat = cusolverDnSgesvd_bufferSize(// we intentionally use host pointer here
        solverDn, // we intentionally use host pointer here
        M, // we intentionally use host pointer here
        N, // we intentionally use host pointer here
        (IntPointer) worksizeBuffer.addressPointer());
        if (stat != CUSOLVER_STATUS_SUCCESS) {
            throw new BlasException("cusolverDnSgesvd_bufferSize failed", stat);
        int worksize = worksizeBuffer.getInt(0);
        Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType());
        DataBuffer rwork = Nd4j.getDataBufferFactory().createFloat((M < N ? M : N) - 1);
        // Do the actual decomp
        stat = cusolverDnSgesvd(solverDn, jobu, jobvt, M, N, (FloatPointer) xAPointer.getDevicePointer(), M, new CudaPointer(allocator.getPointer(S, ctx)).asFloatPointer(), U == null ? null : new CudaPointer(allocator.getPointer(u, ctx)).asFloatPointer(), M, VT == null ? null : new CudaPointer(allocator.getPointer(vt, ctx)).asFloatPointer(), N, new CudaPointer(workspace).asFloatPointer(), worksize, new CudaPointer(allocator.getPointer(rwork, ctx)).asFloatPointer(), new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer());
        if (stat != CUSOLVER_STATUS_SUCCESS) {
            throw new BlasException("cusolverDnSgesvd failed", stat);
    allocator.registerAction(ctx, INFO);
    allocator.registerAction(ctx, S);
    if (U != null)
        allocator.registerAction(ctx, u);
    if (VT != null)
        allocator.registerAction(ctx, vt);
    // if we transposed A then swap & transpose U & V'
    if (hadToTransposeA) {
    } else {
        if (u != U)
        if (vt != VT)
Also used : CUstream_st(org.bytedeco.javacpp.cuda.CUstream_st) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) CublasPointer(org.nd4j.linalg.jcublas.CublasPointer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) DoublePointer(org.bytedeco.javacpp.DoublePointer) IntPointer(org.bytedeco.javacpp.IntPointer) FloatPointer(org.bytedeco.javacpp.FloatPointer) Pointer(org.bytedeco.javacpp.Pointer) org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t(org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t) GridExecutioner(org.nd4j.linalg.api.ops.executioner.GridExecutioner) BlasException(org.nd4j.linalg.api.blas.BlasException) INDArray(org.nd4j.linalg.api.ndarray.INDArray) FloatPointer(org.bytedeco.javacpp.FloatPointer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) CublasPointer(org.nd4j.linalg.jcublas.CublasPointer) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer)


