Search in sources :

Example 11 with FloatPointer

use of org.bytedeco.javacpp.FloatPointer in project nd4j by deeplearning4j.

the class JcublasLevel2 method sgemv.

@Override
protected void sgemv(char order, char TransA, int M, int N, float alpha, INDArray A, int lda, INDArray X, int incX, float beta, INDArray Y, int incY) {
    if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
        logger.warn("FLOAT gemv called");
    Nd4j.getExecutioner().push();
    CudaContext ctx = allocator.getFlowController().prepareAction(Y, A, X);
    CublasPointer cAPointer = new CublasPointer(A, ctx);
    CublasPointer cBPointer = new CublasPointer(X, ctx);
    CublasPointer cCPointer = new CublasPointer(Y, ctx);
    cublasHandle_t handle = ctx.getHandle();
    synchronized (handle) {
        cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
        cublasSgemv_v2(new cublasContext(handle), convertTranspose(TransA), M, N, new FloatPointer(alpha), (FloatPointer) cAPointer.getDevicePointer(), lda, (FloatPointer) cBPointer.getDevicePointer(), incX, new FloatPointer(beta), (FloatPointer) cCPointer.getDevicePointer(), incY);
    }
    allocator.registerAction(ctx, Y, A, X);
    OpExecutionerUtil.checkForAny(Y);
}
Also used : CUstream_st(org.bytedeco.javacpp.cuda.CUstream_st) org.nd4j.jita.allocator.pointers.cuda.cublasHandle_t(org.nd4j.jita.allocator.pointers.cuda.cublasHandle_t) FloatPointer(org.bytedeco.javacpp.FloatPointer) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) CublasPointer(org.nd4j.linalg.jcublas.CublasPointer)

Example 12 with FloatPointer

use of org.bytedeco.javacpp.FloatPointer in project nd4j by deeplearning4j.

the class JcublasLevel1 method sdot.

@Override
protected float sdot(int N, INDArray X, int incX, INDArray Y, int incY) {
    if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
        logger.warn("FLOAT dot called");
    DataTypeValidation.assertSameDataType(X, Y);
    Nd4j.getExecutioner().push();
    CudaContext ctx = allocator.getFlowController().prepareAction(null, X, Y);
    float ret = 1f;
    CublasPointer xCPointer = new CublasPointer(X, ctx);
    CublasPointer yCPointer = new CublasPointer(Y, ctx);
    cublasHandle_t handle = ctx.getHandle();
    synchronized (handle) {
        long result = cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
        if (result != 0)
            throw new IllegalStateException("cublasSetStream failed");
        FloatPointer resultPointer = new FloatPointer(0.0f);
        result = cublasSdot_v2(new cublasContext(handle), N, (FloatPointer) xCPointer.getDevicePointer(), incX, (FloatPointer) yCPointer.getDevicePointer(), incY, resultPointer);
        ret = resultPointer.get();
    }
    allocator.registerAction(ctx, null, X, Y);
    return ret;
}
Also used : CUstream_st(org.bytedeco.javacpp.cuda.CUstream_st) org.nd4j.jita.allocator.pointers.cuda.cublasHandle_t(org.nd4j.jita.allocator.pointers.cuda.cublasHandle_t) FloatPointer(org.bytedeco.javacpp.FloatPointer) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) CublasPointer(org.nd4j.linalg.jcublas.CublasPointer)

Example 13 with FloatPointer

use of org.bytedeco.javacpp.FloatPointer in project nd4j by deeplearning4j.

the class JcublasLevel3 method ssymm.

@Override
protected void ssymm(char Order, char Side, char Uplo, int M, int N, float alpha, INDArray A, int lda, INDArray B, int ldb, float beta, INDArray C, int ldc) {
    if (Nd4j.dataType() != DataBuffer.Type.FLOAT)
        logger.warn("FLOAT symm called");
    Nd4j.getExecutioner().push();
    CudaContext ctx = allocator.getFlowController().prepareAction(C, A, B);
    CublasPointer aPointer = new CublasPointer(A, ctx);
    CublasPointer bPointer = new CublasPointer(B, ctx);
    CublasPointer cPointer = new CublasPointer(C, ctx);
    cublasHandle_t handle = ctx.getHandle();
    synchronized (handle) {
        cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
        cublasSsymm_v2(new cublasContext(handle), convertSideMode(Side), convertUplo(Uplo), M, N, new FloatPointer(alpha), (FloatPointer) aPointer.getDevicePointer(), lda, (FloatPointer) bPointer.getDevicePointer(), ldb, new FloatPointer(beta), (FloatPointer) cPointer.getDevicePointer(), ldc);
    }
    allocator.registerAction(ctx, C, A, B);
    OpExecutionerUtil.checkForAny(C);
}
Also used : org.nd4j.jita.allocator.pointers.cuda.cublasHandle_t(org.nd4j.jita.allocator.pointers.cuda.cublasHandle_t) FloatPointer(org.bytedeco.javacpp.FloatPointer) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) CublasPointer(org.nd4j.linalg.jcublas.CublasPointer)

Example 14 with FloatPointer

use of org.bytedeco.javacpp.FloatPointer in project nd4j by deeplearning4j.

the class JcublasLevel3 method hgemm.

@Override
protected void hgemm(char Order, char TransA, char TransB, int M, int N, int K, float alpha, INDArray A, int lda, INDArray B, int ldb, float beta, INDArray C, int ldc) {
    // A = Shape.toOffsetZero(A);
    // B = Shape.toOffsetZero(B);
    Nd4j.getExecutioner().push();
    CudaContext ctx = allocator.getFlowController().prepareAction(C, A, B);
    CublasPointer cAPointer = new CublasPointer(A, ctx);
    CublasPointer cBPointer = new CublasPointer(B, ctx);
    CublasPointer cCPointer = new CublasPointer(C, ctx);
    cublasHandle_t handle = ctx.getHandle();
    synchronized (handle) {
        cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream()));
        int arch = CudaEnvironment.getInstance().getCurrentDeviceArchitecture();
        if (CUDA_VERSION >= 8000 && (arch == 53 || arch == 60)) {
            // on these selected archs we run with cublasHgemm
            __half alphaHalf = new __half();
            __half betaHalf = new __half();
            new ShortPointer(alphaHalf).put((short) HalfIndexer.fromFloat(alpha));
            new ShortPointer(betaHalf).put((short) HalfIndexer.fromFloat(beta));
            cublasHgemm(new cublasContext(handle), convertTranspose(TransA), convertTranspose(TransB), M, N, K, alphaHalf, new __half(cAPointer.getDevicePointer()), lda, new __half(cBPointer.getDevicePointer()), ldb, betaHalf, new __half(cCPointer.getDevicePointer()), ldc);
        } else {
            // CUDA_R_16F == 2 for CUDA 8
            // CUBLAS_DATA_HALF == 2 for CUDA 7.5
            cublasSgemmEx(new cublasContext(handle), convertTranspose(TransA), convertTranspose(TransB), M, N, K, new FloatPointer(alpha), (ShortPointer) cAPointer.getDevicePointer(), 2, lda, (ShortPointer) cBPointer.getDevicePointer(), 2, ldb, new FloatPointer(beta), (ShortPointer) cCPointer.getDevicePointer(), 2, ldc);
        }
    }
    allocator.registerAction(ctx, C, A, B);
    OpExecutionerUtil.checkForAny(C);
}
Also used : ShortPointer(org.bytedeco.javacpp.ShortPointer) org.nd4j.jita.allocator.pointers.cuda.cublasHandle_t(org.nd4j.jita.allocator.pointers.cuda.cublasHandle_t) FloatPointer(org.bytedeco.javacpp.FloatPointer) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) CublasPointer(org.nd4j.linalg.jcublas.CublasPointer)

Example 15 with FloatPointer

use of org.bytedeco.javacpp.FloatPointer in project bigbluebutton by bigbluebutton.

the class FFmpegFrameRecorder method recordSamples.

public boolean recordSamples(int sampleRate, int audioChannels, Buffer... samples) throws Exception {
    if (audio_st == null) {
        throw new Exception("No audio output stream (Is audioChannels > 0 and has start() been called?)");
    }
    int ret;
    if (sampleRate <= 0) {
        sampleRate = audio_c.sample_rate();
    }
    if (audioChannels <= 0) {
        audioChannels = audio_c.channels();
    }
    int inputSize = samples != null ? samples[0].limit() - samples[0].position() : 0;
    int inputFormat = AV_SAMPLE_FMT_NONE;
    int inputChannels = samples != null && samples.length > 1 ? 1 : audioChannels;
    int inputDepth = 0;
    int outputFormat = audio_c.sample_fmt();
    int outputChannels = samples_out.length > 1 ? 1 : audio_c.channels();
    int outputDepth = av_get_bytes_per_sample(outputFormat);
    if (samples != null && samples[0] instanceof ByteBuffer) {
        inputFormat = samples.length > 1 ? AV_SAMPLE_FMT_U8P : AV_SAMPLE_FMT_U8;
        inputDepth = 1;
        for (int i = 0; i < samples.length; i++) {
            ByteBuffer b = (ByteBuffer) samples[i];
            if (samples_in[i] instanceof BytePointer && samples_in[i].capacity() >= inputSize && b.hasArray()) {
                ((BytePointer) samples_in[i]).position(0).put(b.array(), b.position(), inputSize);
            } else {
                samples_in[i] = new BytePointer(b);
            }
        }
    } else if (samples != null && samples[0] instanceof ShortBuffer) {
        inputFormat = samples.length > 1 ? AV_SAMPLE_FMT_S16P : AV_SAMPLE_FMT_S16;
        inputDepth = 2;
        for (int i = 0; i < samples.length; i++) {
            ShortBuffer b = (ShortBuffer) samples[i];
            if (samples_in[i] instanceof ShortPointer && samples_in[i].capacity() >= inputSize && b.hasArray()) {
                ((ShortPointer) samples_in[i]).position(0).put(b.array(), samples[i].position(), inputSize);
            } else {
                samples_in[i] = new ShortPointer(b);
            }
        }
    } else if (samples != null && samples[0] instanceof IntBuffer) {
        inputFormat = samples.length > 1 ? AV_SAMPLE_FMT_S32P : AV_SAMPLE_FMT_S32;
        inputDepth = 4;
        for (int i = 0; i < samples.length; i++) {
            IntBuffer b = (IntBuffer) samples[i];
            if (samples_in[i] instanceof IntPointer && samples_in[i].capacity() >= inputSize && b.hasArray()) {
                ((IntPointer) samples_in[i]).position(0).put(b.array(), samples[i].position(), inputSize);
            } else {
                samples_in[i] = new IntPointer(b);
            }
        }
    } else if (samples != null && samples[0] instanceof FloatBuffer) {
        inputFormat = samples.length > 1 ? AV_SAMPLE_FMT_FLTP : AV_SAMPLE_FMT_FLT;
        inputDepth = 4;
        for (int i = 0; i < samples.length; i++) {
            FloatBuffer b = (FloatBuffer) samples[i];
            if (samples_in[i] instanceof FloatPointer && samples_in[i].capacity() >= inputSize && b.hasArray()) {
                ((FloatPointer) samples_in[i]).position(0).put(b.array(), b.position(), inputSize);
            } else {
                samples_in[i] = new FloatPointer(b);
            }
        }
    } else if (samples != null && samples[0] instanceof DoubleBuffer) {
        inputFormat = samples.length > 1 ? AV_SAMPLE_FMT_DBLP : AV_SAMPLE_FMT_DBL;
        inputDepth = 8;
        for (int i = 0; i < samples.length; i++) {
            DoubleBuffer b = (DoubleBuffer) samples[i];
            if (samples_in[i] instanceof DoublePointer && samples_in[i].capacity() >= inputSize && b.hasArray()) {
                ((DoublePointer) samples_in[i]).position(0).put(b.array(), b.position(), inputSize);
            } else {
                samples_in[i] = new DoublePointer(b);
            }
        }
    } else if (samples != null) {
        throw new Exception("Audio samples Buffer has unsupported type: " + samples);
    }
    if (samples_convert_ctx == null || samples_channels != audioChannels || samples_format != inputFormat || samples_rate != sampleRate) {
        samples_convert_ctx = swr_alloc_set_opts(samples_convert_ctx, audio_c.channel_layout(), outputFormat, audio_c.sample_rate(), av_get_default_channel_layout(audioChannels), inputFormat, sampleRate, 0, null);
        if (samples_convert_ctx == null) {
            throw new Exception("swr_alloc_set_opts() error: Cannot allocate the conversion context.");
        } else if ((ret = swr_init(samples_convert_ctx)) < 0) {
            throw new Exception("swr_init() error " + ret + ": Cannot initialize the conversion context.");
        }
        samples_channels = audioChannels;
        samples_format = inputFormat;
        samples_rate = sampleRate;
    }
    for (int i = 0; samples != null && i < samples.length; i++) {
        samples_in[i].position(samples_in[i].position() * inputDepth).limit((samples_in[i].position() + inputSize) * inputDepth);
    }
    while (true) {
        int inputCount = (int) Math.min(samples != null ? (samples_in[0].limit() - samples_in[0].position()) / (inputChannels * inputDepth) : 0, Integer.MAX_VALUE);
        int outputCount = (int) Math.min((samples_out[0].limit() - samples_out[0].position()) / (outputChannels * outputDepth), Integer.MAX_VALUE);
        inputCount = Math.min(inputCount, (outputCount * sampleRate + audio_c.sample_rate() - 1) / audio_c.sample_rate());
        for (int i = 0; samples != null && i < samples.length; i++) {
            samples_in_ptr.put(i, samples_in[i]);
        }
        for (int i = 0; i < samples_out.length; i++) {
            samples_out_ptr.put(i, samples_out[i]);
        }
        if ((ret = swr_convert(samples_convert_ctx, samples_out_ptr, outputCount, samples_in_ptr, inputCount)) < 0) {
            throw new Exception("swr_convert() error " + ret + ": Cannot convert audio samples.");
        } else if (ret == 0) {
            break;
        }
        for (int i = 0; samples != null && i < samples.length; i++) {
            samples_in[i].position(samples_in[i].position() + inputCount * inputChannels * inputDepth);
        }
        for (int i = 0; i < samples_out.length; i++) {
            samples_out[i].position(samples_out[i].position() + ret * outputChannels * outputDepth);
        }
        if (samples == null || samples_out[0].position() >= samples_out[0].limit()) {
            frame.nb_samples(audio_input_frame_size);
            avcodec_fill_audio_frame(frame, audio_c.channels(), outputFormat, samples_out[0], (int) Math.min(samples_out[0].limit(), Integer.MAX_VALUE), 0);
            for (int i = 0; i < samples_out.length; i++) {
                frame.data(i, samples_out[i].position(0));
                frame.linesize(i, (int) Math.min(samples_out[i].limit(), Integer.MAX_VALUE));
            }
            frame.quality(audio_c.global_quality());
            record(frame);
        }
    }
    return samples != null ? frame.key_frame() != 0 : record((AVFrame) null);
}
Also used : DoubleBuffer(java.nio.DoubleBuffer) BytePointer(org.bytedeco.javacpp.BytePointer) DoublePointer(org.bytedeco.javacpp.DoublePointer) FloatBuffer(java.nio.FloatBuffer) ByteBuffer(java.nio.ByteBuffer) ShortPointer(org.bytedeco.javacpp.ShortPointer) FloatPointer(org.bytedeco.javacpp.FloatPointer) IntBuffer(java.nio.IntBuffer) IntPointer(org.bytedeco.javacpp.IntPointer) ShortBuffer(java.nio.ShortBuffer)

Aggregations

FloatPointer (org.bytedeco.javacpp.FloatPointer)30 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)15 CublasPointer (org.nd4j.linalg.jcublas.CublasPointer)14 IntPointer (org.bytedeco.javacpp.IntPointer)11 INDArray (org.nd4j.linalg.api.ndarray.INDArray)11 DoublePointer (org.bytedeco.javacpp.DoublePointer)9 CUstream_st (org.bytedeco.javacpp.cuda.CUstream_st)9 org.nd4j.jita.allocator.pointers.cuda.cublasHandle_t (org.nd4j.jita.allocator.pointers.cuda.cublasHandle_t)9 DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)7 Pointer (org.bytedeco.javacpp.Pointer)6 BlasException (org.nd4j.linalg.api.blas.BlasException)6 BytePointer (org.bytedeco.javacpp.BytePointer)5 ShortPointer (org.bytedeco.javacpp.ShortPointer)5 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)5 org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t (org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t)5 GridExecutioner (org.nd4j.linalg.api.ops.executioner.GridExecutioner)5 ByteBuffer (java.nio.ByteBuffer)4 DoubleBuffer (java.nio.DoubleBuffer)4 FloatBuffer (java.nio.FloatBuffer)4 IntBuffer (java.nio.IntBuffer)4