Search in sources :

Example 1 with CLKernel

use of com.jogamp.opencl.CLKernel in project javacv by bytedeco.

the class ProjectiveTransformerCL method transform.

public void transform(CLImage2d srcImg, CLImage2d subImg, CLImage2d srcDotImg, CLImage2d transImg, CLImage2d dstImg, CLImage2d maskImg, ImageTransformer.Parameters[] parameters, boolean[] inverses, InputData inputData, OutputData outputData) {
    prepareHomographies(HBuffer, inputData.pyramidLevel, parameters, inverses);
    final int dotSize = parameters[0].size();
    final int localSize = parameters.length > 1 ? parameters.length : (inputData.roiWidth > 32 ? 64 : 32);
    final int globalSize = JavaCVCL.alignCeil(inputData.roiWidth, localSize);
    final int reduceSize = globalSize / localSize;
    // allocate buffers if necessary
    CLBuffer inputBuffer = inputData.getBuffer(context);
    CLBuffer outputBuffer = outputData.getBuffer(context, dotSize, reduceSize);
    CLEventList list = new CLEventList(1);
    // setup kernel
    // upload H
    context.writeBuffer(HBuffer, false);
    if (inputData.autoWrite) {
        inputData.writeBuffer(context);
    }
    CLKernel kernel = null;
    if (subImg == null) {
        assert parameters.length == 1;
        kernel = oneKernel.putArg(srcImg).putArg(dstImg == null ? transImg : dstImg).putArg(maskImg).putArg(HBuffer).putArg(inputBuffer).putArg(outputBuffer).rewind();
    } else if (srcDotImg == null) {
        assert parameters.length == 1;
        kernel = subKernel.putArg(srcImg).putArg(subImg).putArg(transImg).putArg(dstImg).putArg(maskImg).putArg(HBuffer).putArg(inputBuffer).putArg(outputBuffer).rewind();
    } else {
        assert parameters.length == dotSize;
        kernel = dotKernel.putArg(srcImg).putArg(subImg).putArg(srcDotImg).putArg(maskImg).putArg(HBuffer).putArg(inputBuffer).putArg(outputBuffer).rewind();
    }
    context.executeKernel(kernel, inputData.roiX, 0, 0, globalSize, 1, parameters.length, localSize, 1, parameters.length, // execute program
    list);
    if (reduceSize > 1) {
        reduceKernel.putArg(outputBuffer).rewind();
        context.executeKernel(reduceKernel, 0, reduceSize, reduceSize);
    }
    if (outputData.autoRead) {
        outputData.readBuffer(context);
    }
// CLEvent event = list.getEvent(0);
// System.out.println(kernel + " " + (event.getProfilingInfo(CLEvent.ProfilingCommand.END) -
// event.getProfilingInfo(CLEvent.ProfilingCommand.START))/1000000.0);
// long res = q.getDevice().getProfilingTimerResolution();
// System.out.println(res);
}
Also used : CLBuffer(com.jogamp.opencl.CLBuffer) CLEventList(com.jogamp.opencl.CLEventList) CLKernel(com.jogamp.opencl.CLKernel)

Example 2 with CLKernel

use of com.jogamp.opencl.CLKernel in project ffx by mjschnie.

the class Complex3DOpenCL method run.

@Override
public void run() {
    CLContext context = null;
    try {
        // Choose a platform.
        CLPlatform[] platforms = CLPlatform.listCLPlatforms();
        CLPlatform platform = platforms[0];
        // Prefer NV
        try {
            for (CLPlatform p : platforms) {
                if (p.getICDSuffix().equals("NV")) {
                    platform = p;
                    break;
                }
            }
        } catch (Exception e) {
        // ignore.
        }
        logger.info(String.format("   Platform: %s", platform));
        // Choose a device.
        CLDevice[] devices = platform.listCLDevices(CLDevice.Type.ACCELERATOR, CLDevice.Type.GPU);
        CLDevice device = devices[0];
        for (CLDevice dev : devices) {
            if (dev.getVendor().startsWith("NV")) {
                device = dev;
                break;
            }
        }
        logger.info(String.format("   Device:   %s", device));
        // Initialize the OpenCL Context
        context = CLContext.create(device);
        CLCommandQueue queue = device.createCommandQueue();
        // Allocate memory on the device.
        int bufferSize = len * 2;
        clData = context.createDoubleBuffer(bufferSize, Mem.READ_WRITE);
        DoubleBuffer doubleBuffer = clData.getBuffer();
        int MB = 1024 * 1024;
        logger.info(String.format("   FFT data buffer        [direct: %b, write: %b, size: %d MB]", doubleBuffer.isDirect(), !doubleBuffer.isReadOnly(), clData.getCLSize() / MB));
        clRecip = context.createDoubleBuffer(len, Mem.READ_WRITE);
        doubleBuffer = clRecip.getBuffer();
        logger.info(String.format("   Reciprocal data buffer [direct: %b, write: %b, size: %d MB]", doubleBuffer.isDirect(), !doubleBuffer.isReadOnly(), clRecip.getCLSize() / MB));
        // Initialize the OpenCL FFT library.
        setup();
        int[] dims = { nX, nY, nZ };
        planHandle = createDefaultPlan(context, Complex3DOpenCL_DIMENSION.Complex3DOpenCL_3D, dims);
        // Initialize the Reciprocal Space Multitply Kernal
        URL source = getClass().getClassLoader().getResource("ffx/numerics/fft/VectorMultiply.cl");
        InputStream input = source.openStream();
        CLProgram program = context.createProgram(input).build();
        // Get a reference to the kernel function with the name 'VectorMultiply'
        CLKernel kernel = program.createCLKernel("VectorMultiply");
        int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 128);
        int globalWorkSize = roundUp(localWorkSize, len);
        synchronized (this) {
            while (!free) {
                if (mode != null) {
                    switch(mode) {
                        case RECIP:
                            clRecip.getBuffer().put(recip).rewind();
                            queue.putWriteBuffer(clRecip, true);
                            break;
                        case FFT:
                            clData.getBuffer().rewind();
                            queue.putWriteBuffer(clData, true);
                            if (!transferOnly) {
                                executeTransform(Complex3DOpenCL_DIRECTION.FORWARD, queue, clData, clData);
                                queue.finish();
                            }
                            clData.getBuffer().rewind();
                            queue.putReadBuffer(clData, true);
                            clData.getBuffer().rewind();
                            queue.finish();
                            break;
                        case CONVOLUTION:
                            queue.putWriteBuffer(clData, true);
                            // Forward FFT
                            if (!transferOnly) {
                                // long time = -System.nanoTime();
                                executeTransform(Complex3DOpenCL_DIRECTION.FORWARD, queue, clData, clData);
                                // Reciprocal Space Multiply
                                kernel.rewind().putArgs(clData, clRecip).putArg(len);
                                queue.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize);
                                queue.putBarrier();
                                // Backward FFT
                                executeTransform(Complex3DOpenCL_DIRECTION.BACKWARD, queue, clData, clData);
                            // time += System.nanoTime();
                            // logger.info(String.format(" Compute Time %6.3f sec", time * 1.0e-9));
                            }
                            queue.putReadBuffer(clData, true);
                            break;
                        case IFFT:
                            queue.putWriteBuffer(clData, true);
                            if (!transferOnly) {
                                executeTransform(Complex3DOpenCL_DIRECTION.BACKWARD, queue, clData, clData);
                            }
                            queue.putReadBuffer(clData, true);
                    }
                    // Reset the mode to null and notify the calling thread.
                    mode = null;
                    notify();
                }
                // The OpenCL thread will wait until it's notified again.
                try {
                    wait();
                } catch (InterruptedException e) {
                    logger.severe(e.toString());
                }
            }
            queue.finish();
            clData.release();
            clRecip.release();
            destroyPlan();
            teardown();
            dead = true;
            notify();
        }
    } catch (IOException e) {
        logger.warning(e.toString());
    } finally {
        if (context != null) {
            context.release();
        }
    }
    logger.info(" OpenCL FFT/convolution thread is done.");
}
Also used : DoubleBuffer(java.nio.DoubleBuffer) InputStream(java.io.InputStream) CLDevice(com.jogamp.opencl.CLDevice) IOException(java.io.IOException) IOException(java.io.IOException) URL(java.net.URL) CLKernel(com.jogamp.opencl.CLKernel) CLProgram(com.jogamp.opencl.CLProgram) CLContext(com.jogamp.opencl.CLContext) CLPlatform(com.jogamp.opencl.CLPlatform) CLCommandQueue(com.jogamp.opencl.CLCommandQueue)

Example 3 with CLKernel

use of com.jogamp.opencl.CLKernel in project Glowstone by GlowstoneMC.

the class OverworldGenerator method generateChunkData.

@Override
public ChunkData generateChunkData(World world, Random random, int chunkX, int chunkZ, BiomeGrid biomes) {
    ChunkData chunkData = generateRawTerrain(world, chunkX, chunkZ);
    int cx = chunkX << 4;
    int cz = chunkZ << 4;
    SimplexOctaveGenerator octaveGenerator = ((SimplexOctaveGenerator) getWorldOctaves(world).get("surface"));
    int sizeX = octaveGenerator.getSizeX();
    int sizeZ = octaveGenerator.getSizeZ();
    if (((GlowServer) ServerProvider.getServer()).doesUseGraphicsCompute()) {
        CLKernel noiseGen = null;
        CLBuffer<FloatBuffer> noise = null;
        try {
            // Initialize OpenCL stuff and put args
            CLProgram program = OpenCompute.getProgram("net/glowstone/CLRandom.cl");
            int workSize = sizeX * octaveGenerator.getSizeY() * sizeZ;
            noise = OpenCompute.getContext().createFloatBuffer(workSize, CLMemory.Mem.WRITE_ONLY);
            noiseGen = OpenCompute.getKernel(program, "GenerateNoise");
            noiseGen.putArg(random.nextFloat()).putArg(random.nextFloat()).putArg(noise).putArg(workSize);
            // Calculate noise on GPU
            OpenCompute.getQueue().put1DRangeKernel(noiseGen, 0, OpenCompute.getGlobalSize(workSize), OpenCompute.getLocalSize()).putReadBuffer(noise, true);
            // Use noise
            for (int x = 0; x < sizeX; x++) {
                for (int z = 0; z < sizeZ; z++) {
                    if (GROUND_MAP.containsKey(biomes.getBiome(x, z))) {
                        GROUND_MAP.get(biomes.getBiome(x, z)).generateTerrainColumn(chunkData, world, random, cx + x, cz + z, biomes.getBiome(x, z), noise.getBuffer().get(x | z << 4));
                    } else {
                        groundGen.generateTerrainColumn(chunkData, world, random, cx + x, cz + z, biomes.getBiome(x, z), noise.getBuffer().get(x | z << 4));
                    }
                }
            }
        } finally {
            // Clean up
            if (noise != null) {
                ServerProvider.getServer().getScheduler().runTaskAsynchronously(null, noise::release);
            }
            if (noiseGen != null) {
                noiseGen.rewind();
            }
        }
    } else {
        double[] surfaceNoise = octaveGenerator.getFractalBrownianMotion(cx, cz, 0.5D, 0.5D);
        for (int x = 0; x < sizeX; x++) {
            for (int z = 0; z < sizeZ; z++) {
                if (GROUND_MAP.containsKey(biomes.getBiome(x, z))) {
                    GROUND_MAP.get(biomes.getBiome(x, z)).generateTerrainColumn(chunkData, world, random, cx + x, cz + z, biomes.getBiome(x, z), surfaceNoise[x | z << 4]);
                } else {
                    groundGen.generateTerrainColumn(chunkData, world, random, cx + x, cz + z, biomes.getBiome(x, z), surfaceNoise[x | z << 4]);
                }
            }
        }
    }
    return chunkData;
}
Also used : CLProgram(com.jogamp.opencl.CLProgram) SimplexOctaveGenerator(net.glowstone.util.noise.SimplexOctaveGenerator) FloatBuffer(java.nio.FloatBuffer) GlowServer(net.glowstone.GlowServer) CLKernel(com.jogamp.opencl.CLKernel)

Example 4 with CLKernel

use of com.jogamp.opencl.CLKernel in project Glowstone by GlowstoneMC.

the class OpenCompute method getKernel.

/**
 * Returns a {@link CLKernel} that is part of the given {@link CLProgram}.
 *
 * @param program  the {@link CLProgram} that contains the kernel
 * @param name     the name of the kernel
 * @param threaded if true, always create a new {@link CLKernel} instance
 * @return the {@link CLKernel}
 */
public static CLKernel getKernel(CLProgram program, String name, boolean threaded) {
    if (kernels.containsKey(program)) {
        HashMap<String, CLKernel> kernel = kernels.get(program);
        if (kernel.containsKey(name) && !threaded) {
            return kernel.get(name);
        } else {
            CLKernel clKernel = program.createCLKernel(name);
            kernel.put(name, clKernel);
            return clKernel;
        }
    } else {
        kernels.put(program, new HashMap<>());
        CLKernel clKernel = program.createCLKernel(name);
        kernels.get(program).put(name, clKernel);
        return clKernel;
    }
}
Also used : CLKernel(com.jogamp.opencl.CLKernel)

Example 5 with CLKernel

use of com.jogamp.opencl.CLKernel in project javacv by bytedeco.

the class JavaCVCL method buildKernels.

public CLKernel[] buildKernels(String compilerOptions, Class resourceClass, String resourceNames, String... kernelNames) {
    try {
        // load and compile program for the chosen device
        InputStream s;
        String[] a = resourceNames.split(":");
        if (a.length == 1) {
            s = resourceClass.getResourceAsStream(a[0]);
        } else {
            Vector<InputStream> vs = new Vector<InputStream>(a.length);
            for (String name : a) {
                vs.addElement(resourceClass.getResourceAsStream(name));
            }
            s = new SequenceInputStream(vs.elements());
        }
        CLProgram program = context.createProgram(s);
        // System.out.println("Building " + resourceNames + "...");
        program.build(compilerOptions);
        // System.out.println(program.getBuildLog());
        assert program.isExecutable();
        // create kernel and set function parameters
        CLKernel[] kernels = new CLKernel[kernelNames.length];
        for (int i = 0; i < kernelNames.length; i++) {
            kernels[i] = program.createCLKernel(kernelNames[i]);
        }
        return kernels;
    } catch (IOException ex) {
        throw (Error) new LinkageError(ex.toString()).initCause(ex);
    }
}
Also used : CLProgram(com.jogamp.opencl.CLProgram) SequenceInputStream(java.io.SequenceInputStream) SequenceInputStream(java.io.SequenceInputStream) InputStream(java.io.InputStream) IOException(java.io.IOException) Vector(java.util.Vector) CLKernel(com.jogamp.opencl.CLKernel)

Aggregations

CLKernel (com.jogamp.opencl.CLKernel)8 CLEventList (com.jogamp.opencl.CLEventList)4 CLBuffer (com.jogamp.opencl.CLBuffer)3 CLProgram (com.jogamp.opencl.CLProgram)3 IOException (java.io.IOException)2 InputStream (java.io.InputStream)2 CLCommandQueue (com.jogamp.opencl.CLCommandQueue)1 CLContext (com.jogamp.opencl.CLContext)1 CLDevice (com.jogamp.opencl.CLDevice)1 CLImage2d (com.jogamp.opencl.CLImage2d)1 CLPlatform (com.jogamp.opencl.CLPlatform)1 SequenceInputStream (java.io.SequenceInputStream)1 URL (java.net.URL)1 DoubleBuffer (java.nio.DoubleBuffer)1 FloatBuffer (java.nio.FloatBuffer)1 Vector (java.util.Vector)1 GlowServer (net.glowstone.GlowServer)1 SimplexOctaveGenerator (net.glowstone.util.noise.SimplexOctaveGenerator)1