Search in sources :

Example 6 with FloatPointer

use of org.bytedeco.javacpp.FloatPointer in project MindsEye by SimiaCryptus.

the class Hdf5Archive method readDataSet.

/**
 * Read data setBytes as ND4J array from HDF5 group.
 *
 * @param fileGroup   HDF5 file or group
 * @param datasetName Name of data setBytes
 * @return
 */
@Nullable
private Tensor readDataSet(@Nonnull Group fileGroup, CharSequence datasetName) {
    DataSet dataset = fileGroup.openDataSet(datasetName.toString());
    DataSpace space = dataset.getSpace();
    int nbDims = space.getSimpleExtentNdims();
    @Nonnull long[] dims = new long[nbDims];
    space.getSimpleExtentDims(dims);
    @Nullable float[] dataBuffer = null;
    @Nullable FloatPointer fp = null;
    int j = 0;
    @Nonnull DataType dataType = new DataType(PredType.NATIVE_FLOAT());
    @Nullable Tensor data = null;
    switch(nbDims) {
        case 4:
            /* 2D Convolution weights */
            dataBuffer = new float[(int) (dims[0] * dims[1] * dims[2] * dims[3])];
            fp = new FloatPointer(dataBuffer);
            dataset.read(fp, dataType);
            fp.get(dataBuffer);
            data = new Tensor((int) dims[0], (int) dims[1], (int) dims[2], (int) dims[3]);
            j = 0;
            for (int i1 = 0; i1 < dims[0]; i1++) for (int i2 = 0; i2 < dims[1]; i2++) for (int i3 = 0; i3 < dims[2]; i3++) for (int i4 = 0; i4 < dims[3]; i4++) data.set(i1, i2, i3, i4, (double) dataBuffer[j++]);
            break;
        case 3:
            dataBuffer = new float[(int) (dims[0] * dims[1] * dims[2])];
            fp = new FloatPointer(dataBuffer);
            dataset.read(fp, dataType);
            fp.get(dataBuffer);
            data = new Tensor((int) dims[0], (int) dims[1], (int) dims[2]);
            j = 0;
            for (int i1 = 0; i1 < dims[0]; i1++) for (int i2 = 0; i2 < dims[1]; i2++) for (int i3 = 0; i3 < dims[2]; i3++) data.set(i1, i2, i3, dataBuffer[j++]);
            break;
        case 2:
            /* Dense and Recurrent weights */
            dataBuffer = new float[(int) (dims[0] * dims[1])];
            fp = new FloatPointer(dataBuffer);
            dataset.read(fp, dataType);
            fp.get(dataBuffer);
            data = new Tensor((int) dims[0], (int) dims[1]);
            j = 0;
            for (int i1 = 0; i1 < dims[0]; i1++) for (int i2 = 0; i2 < dims[1]; i2++) data.set(i1, i2, dataBuffer[j++]);
            break;
        case 1:
            /* Bias */
            dataBuffer = new float[(int) dims[0]];
            fp = new FloatPointer(dataBuffer);
            dataset.read(fp, dataType);
            fp.get(dataBuffer);
            data = new Tensor((int) dims[0]);
            j = 0;
            for (int i1 = 0; i1 < dims[0]; i1++) data.set(i1, dataBuffer[j++]);
            break;
        default:
            throw new RuntimeException("Cannot import weights apply rank " + nbDims);
    }
    space.deallocate();
    dataset.deallocate();
    return data;
}
Also used : Tensor(com.simiacryptus.mindseye.lang.Tensor) Nonnull(javax.annotation.Nonnull) FloatPointer(org.bytedeco.javacpp.FloatPointer) Nullable(javax.annotation.Nullable) Nullable(javax.annotation.Nullable)

Example 7 with FloatPointer

use of org.bytedeco.javacpp.FloatPointer in project nd4j by deeplearning4j.

the class AbstractCompressor method compress.

/**
 * This method creates compressed INDArray from Java float array, skipping usual INDArray instantiation routines
 *
 * @param data
 * @param shape
 * @param order
 * @return
 */
@Override
public INDArray compress(float[] data, int[] shape, char order) {
    FloatPointer pointer = new FloatPointer(data);
    DataBuffer shapeInfo = Nd4j.getShapeInfoProvider().createShapeInformation(shape, order).getFirst();
    DataBuffer buffer = compressPointer(DataBuffer.TypeEx.FLOAT, pointer, data.length, 4);
    return Nd4j.createArrayFromShapeBuffer(buffer, shapeInfo);
}
Also used : FloatPointer(org.bytedeco.javacpp.FloatPointer) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer) CompressedDataBuffer(org.nd4j.linalg.compression.CompressedDataBuffer)

Example 8 with FloatPointer

use of org.bytedeco.javacpp.FloatPointer in project nd4j by deeplearning4j.

the class CpuLapack method ssyev.

// =========================
// syev EigenValue/Vectors
// 
@Override
public int ssyev(char jobz, char uplo, int N, INDArray A, INDArray R) {
    FloatPointer fp = new FloatPointer(1);
    int status = LAPACKE_ssyev_work(getColumnOrder(A), (byte) jobz, (byte) uplo, N, (FloatPointer) A.data().addressPointer(), getLda(A), (FloatPointer) R.data().addressPointer(), fp, -1);
    if (status == 0) {
        int lwork = (int) fp.get();
        INDArray work = Nd4j.createArrayFromShapeBuffer(Nd4j.getDataBufferFactory().createFloat(lwork), Nd4j.getShapeInfoProvider().createShapeInformation(new int[] { 1, lwork }).getFirst());
        status = LAPACKE_ssyev(getColumnOrder(A), (byte) jobz, (byte) uplo, N, (FloatPointer) A.data().addressPointer(), getLda(A), (FloatPointer) work.data().addressPointer());
        if (status == 0) {
            R.assign(work.get(NDArrayIndex.interval(0, N)));
        }
    }
    return status;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) FloatPointer(org.bytedeco.javacpp.FloatPointer)

Example 9 with FloatPointer

use of org.bytedeco.javacpp.FloatPointer in project nd4j by deeplearning4j.

the class JcublasLapack method spotrf.

// =========================
// CHOLESKY DECOMP
@Override
public void spotrf(byte uplo, int N, INDArray A, INDArray INFO) {
    INDArray a = A;
    if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
        log.warn("DOUBLE potrf called in FLOAT environment");
    if (A.ordering() == 'c')
        a = A.dup('f');
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
    // Get context for current thread
    CudaContext ctx = (CudaContext) allocator.getDeviceContext().getContext();
    // setup the solver handles for cuSolver calls
    cusolverDnHandle_t handle = ctx.getSolverHandle();
    cusolverDnContext solverDn = new cusolverDnContext(handle);
    // synchronized on the solver
    synchronized (handle) {
        int result = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getOldStream()));
        if (result != 0)
            throw new BlasException("solverSetStream failed");
        // transfer the INDArray into GPU memory
        CublasPointer xAPointer = new CublasPointer(a, ctx);
        // this output - indicates how much memory we'll need for the real operation
        DataBuffer worksizeBuffer = Nd4j.getDataBufferFactory().createInt(1);
        int stat = cusolverDnSpotrf_bufferSize(solverDn, uplo, N, (FloatPointer) xAPointer.getDevicePointer(), N, // we intentionally use host pointer here
        (IntPointer) worksizeBuffer.addressPointer());
        if (stat != CUSOLVER_STATUS_SUCCESS) {
            throw new BlasException("cusolverDnSpotrf_bufferSize failed", stat);
        }
        int worksize = worksizeBuffer.getInt(0);
        // Now allocate memory for the workspace, the permutation matrix and a return code
        Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType());
        // Do the actual decomp
        stat = cusolverDnSpotrf(solverDn, uplo, N, (FloatPointer) xAPointer.getDevicePointer(), N, new CudaPointer(workspace).asFloatPointer(), worksize, new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer());
        if (stat != CUSOLVER_STATUS_SUCCESS) {
            throw new BlasException("cusolverDnSpotrf failed", stat);
        }
    }
    allocator.registerAction(ctx, a);
    allocator.registerAction(ctx, INFO);
    if (a != A)
        A.assign(a);
    if (uplo == 'U') {
        A.assign(A.transpose());
        INDArrayIndex[] ix = new INDArrayIndex[2];
        for (int i = 1; i < Math.min(A.rows(), A.columns()); i++) {
            ix[0] = NDArrayIndex.point(i);
            ix[1] = NDArrayIndex.interval(0, i);
            A.put(ix, 0);
        }
    } else {
        INDArrayIndex[] ix = new INDArrayIndex[2];
        for (int i = 0; i < Math.min(A.rows(), A.columns() - 1); i++) {
            ix[0] = NDArrayIndex.point(i);
            ix[1] = NDArrayIndex.interval(i + 1, A.columns());
            A.put(ix, 0);
        }
    }
    log.info("A: {}", A);
}
Also used : CUstream_st(org.bytedeco.javacpp.cuda.CUstream_st) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) INDArrayIndex(org.nd4j.linalg.indexing.INDArrayIndex) CublasPointer(org.nd4j.linalg.jcublas.CublasPointer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) DoublePointer(org.bytedeco.javacpp.DoublePointer) IntPointer(org.bytedeco.javacpp.IntPointer) FloatPointer(org.bytedeco.javacpp.FloatPointer) Pointer(org.bytedeco.javacpp.Pointer) org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t(org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t) GridExecutioner(org.nd4j.linalg.api.ops.executioner.GridExecutioner) BlasException(org.nd4j.linalg.api.blas.BlasException) INDArray(org.nd4j.linalg.api.ndarray.INDArray) FloatPointer(org.bytedeco.javacpp.FloatPointer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) CublasPointer(org.nd4j.linalg.jcublas.CublasPointer) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer)

Example 10 with FloatPointer

use of org.bytedeco.javacpp.FloatPointer in project nd4j by deeplearning4j.

the class JcublasLapack method sgesvd.

@Override
public void sgesvd(byte jobu, byte jobvt, int M, int N, INDArray A, INDArray S, INDArray U, INDArray VT, INDArray INFO) {
    if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
        log.warn("FLOAT gesvd called in DOUBLE environment");
    INDArray a = A;
    INDArray u = U;
    INDArray vt = VT;
    // we should transpose & adjust outputs if M<N
    // cuda has a limitation, but it's OK we know
    // A = U S V'
    // transpose multiply rules give us ...
    // A' = V S' U'
    boolean hadToTransposeA = false;
    if (M < N) {
        hadToTransposeA = true;
        int tmp1 = N;
        N = M;
        M = tmp1;
        a = A.transpose().dup('f');
        u = VT.dup('f');
        vt = U.dup('f');
    } else {
        // cuda requires column ordering - we'll register a warning in case
        if (A.ordering() == 'c')
            a = A.dup('f');
        if (U != null && U.ordering() == 'c')
            u = U.dup('f');
        if (VT != null && VT.ordering() == 'c')
            vt = VT.dup('f');
    }
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
    // Get context for current thread
    CudaContext ctx = (CudaContext) allocator.getDeviceContext().getContext();
    // setup the solver handles for cuSolver calls
    cusolverDnHandle_t handle = ctx.getSolverHandle();
    cusolverDnContext solverDn = new cusolverDnContext(handle);
    // synchronized on the solver
    synchronized (handle) {
        int result = cusolverDnSetStream(new cusolverDnContext(handle), new CUstream_st(ctx.getOldStream()));
        if (result != 0)
            throw new BlasException("solverSetStream failed");
        // transfer the INDArray into GPU memory
        CublasPointer xAPointer = new CublasPointer(a, ctx);
        // this output - indicates how much memory we'll need for the real operation
        DataBuffer worksizeBuffer = Nd4j.getDataBufferFactory().createInt(1);
        int stat = cusolverDnSgesvd_bufferSize(// we intentionally use host pointer here
        solverDn, // we intentionally use host pointer here
        M, // we intentionally use host pointer here
        N, // we intentionally use host pointer here
        (IntPointer) worksizeBuffer.addressPointer());
        if (stat != CUSOLVER_STATUS_SUCCESS) {
            throw new BlasException("cusolverDnSgesvd_bufferSize failed", stat);
        }
        int worksize = worksizeBuffer.getInt(0);
        Pointer workspace = new Workspace(worksize * Nd4j.sizeOfDataType());
        DataBuffer rwork = Nd4j.getDataBufferFactory().createFloat((M < N ? M : N) - 1);
        // Do the actual decomp
        stat = cusolverDnSgesvd(solverDn, jobu, jobvt, M, N, (FloatPointer) xAPointer.getDevicePointer(), M, new CudaPointer(allocator.getPointer(S, ctx)).asFloatPointer(), U == null ? null : new CudaPointer(allocator.getPointer(u, ctx)).asFloatPointer(), M, VT == null ? null : new CudaPointer(allocator.getPointer(vt, ctx)).asFloatPointer(), N, new CudaPointer(workspace).asFloatPointer(), worksize, new CudaPointer(allocator.getPointer(rwork, ctx)).asFloatPointer(), new CudaPointer(allocator.getPointer(INFO, ctx)).asIntPointer());
        if (stat != CUSOLVER_STATUS_SUCCESS) {
            throw new BlasException("cusolverDnSgesvd failed", stat);
        }
    }
    allocator.registerAction(ctx, INFO);
    allocator.registerAction(ctx, S);
    if (U != null)
        allocator.registerAction(ctx, u);
    if (VT != null)
        allocator.registerAction(ctx, vt);
    // if we transposed A then swap & transpose U & V'
    if (hadToTransposeA) {
        U.assign(vt.transpose());
        VT.assign(u.transpose());
    } else {
        if (u != U)
            U.assign(u);
        if (vt != VT)
            VT.assign(vt);
    }
}
Also used : CUstream_st(org.bytedeco.javacpp.cuda.CUstream_st) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) CublasPointer(org.nd4j.linalg.jcublas.CublasPointer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) DoublePointer(org.bytedeco.javacpp.DoublePointer) IntPointer(org.bytedeco.javacpp.IntPointer) FloatPointer(org.bytedeco.javacpp.FloatPointer) Pointer(org.bytedeco.javacpp.Pointer) org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t(org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t) GridExecutioner(org.nd4j.linalg.api.ops.executioner.GridExecutioner) BlasException(org.nd4j.linalg.api.blas.BlasException) INDArray(org.nd4j.linalg.api.ndarray.INDArray) FloatPointer(org.bytedeco.javacpp.FloatPointer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) CublasPointer(org.nd4j.linalg.jcublas.CublasPointer) DataBuffer(org.nd4j.linalg.api.buffer.DataBuffer)

Aggregations

FloatPointer (org.bytedeco.javacpp.FloatPointer)30 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)15 CublasPointer (org.nd4j.linalg.jcublas.CublasPointer)14 IntPointer (org.bytedeco.javacpp.IntPointer)11 INDArray (org.nd4j.linalg.api.ndarray.INDArray)11 DoublePointer (org.bytedeco.javacpp.DoublePointer)9 CUstream_st (org.bytedeco.javacpp.cuda.CUstream_st)9 org.nd4j.jita.allocator.pointers.cuda.cublasHandle_t (org.nd4j.jita.allocator.pointers.cuda.cublasHandle_t)9 DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)7 Pointer (org.bytedeco.javacpp.Pointer)6 BlasException (org.nd4j.linalg.api.blas.BlasException)6 BytePointer (org.bytedeco.javacpp.BytePointer)5 ShortPointer (org.bytedeco.javacpp.ShortPointer)5 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)5 org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t (org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t)5 GridExecutioner (org.nd4j.linalg.api.ops.executioner.GridExecutioner)5 ByteBuffer (java.nio.ByteBuffer)4 DoubleBuffer (java.nio.DoubleBuffer)4 FloatBuffer (java.nio.FloatBuffer)4 IntBuffer (java.nio.IntBuffer)4