use of com.jogamp.opencl.CLKernel in project javacv by bytedeco.
the class ProjectiveTransformerCL method transform.
public void transform(CLImage2d srcImg, CLImage2d subImg, CLImage2d srcDotImg, CLImage2d transImg, CLImage2d dstImg, CLImage2d maskImg, ImageTransformer.Parameters[] parameters, boolean[] inverses, InputData inputData, OutputData outputData) {
prepareHomographies(HBuffer, inputData.pyramidLevel, parameters, inverses);
final int dotSize = parameters[0].size();
final int localSize = parameters.length > 1 ? parameters.length : (inputData.roiWidth > 32 ? 64 : 32);
final int globalSize = JavaCVCL.alignCeil(inputData.roiWidth, localSize);
final int reduceSize = globalSize / localSize;
// allocate buffers if necessary
CLBuffer inputBuffer = inputData.getBuffer(context);
CLBuffer outputBuffer = outputData.getBuffer(context, dotSize, reduceSize);
CLEventList list = new CLEventList(1);
// setup kernel
// upload H
context.writeBuffer(HBuffer, false);
if (inputData.autoWrite) {
inputData.writeBuffer(context);
}
CLKernel kernel = null;
if (subImg == null) {
assert parameters.length == 1;
kernel = oneKernel.putArg(srcImg).putArg(dstImg == null ? transImg : dstImg).putArg(maskImg).putArg(HBuffer).putArg(inputBuffer).putArg(outputBuffer).rewind();
} else if (srcDotImg == null) {
assert parameters.length == 1;
kernel = subKernel.putArg(srcImg).putArg(subImg).putArg(transImg).putArg(dstImg).putArg(maskImg).putArg(HBuffer).putArg(inputBuffer).putArg(outputBuffer).rewind();
} else {
assert parameters.length == dotSize;
kernel = dotKernel.putArg(srcImg).putArg(subImg).putArg(srcDotImg).putArg(maskImg).putArg(HBuffer).putArg(inputBuffer).putArg(outputBuffer).rewind();
}
context.executeKernel(kernel, inputData.roiX, 0, 0, globalSize, 1, parameters.length, localSize, 1, parameters.length, // execute program
list);
if (reduceSize > 1) {
reduceKernel.putArg(outputBuffer).rewind();
context.executeKernel(reduceKernel, 0, reduceSize, reduceSize);
}
if (outputData.autoRead) {
outputData.readBuffer(context);
}
// CLEvent event = list.getEvent(0);
// System.out.println(kernel + " " + (event.getProfilingInfo(CLEvent.ProfilingCommand.END) -
// event.getProfilingInfo(CLEvent.ProfilingCommand.START))/1000000.0);
// long res = q.getDevice().getProfilingTimerResolution();
// System.out.println(res);
}
use of com.jogamp.opencl.CLKernel in project ffx by mjschnie.
the class Complex3DOpenCL method run.
@Override
public void run() {
CLContext context = null;
try {
// Choose a platform.
CLPlatform[] platforms = CLPlatform.listCLPlatforms();
CLPlatform platform = platforms[0];
// Prefer NV
try {
for (CLPlatform p : platforms) {
if (p.getICDSuffix().equals("NV")) {
platform = p;
break;
}
}
} catch (Exception e) {
// ignore.
}
logger.info(String.format(" Platform: %s", platform));
// Choose a device.
CLDevice[] devices = platform.listCLDevices(CLDevice.Type.ACCELERATOR, CLDevice.Type.GPU);
CLDevice device = devices[0];
for (CLDevice dev : devices) {
if (dev.getVendor().startsWith("NV")) {
device = dev;
break;
}
}
logger.info(String.format(" Device: %s", device));
// Initialize the OpenCL Context
context = CLContext.create(device);
CLCommandQueue queue = device.createCommandQueue();
// Allocate memory on the device.
int bufferSize = len * 2;
clData = context.createDoubleBuffer(bufferSize, Mem.READ_WRITE);
DoubleBuffer doubleBuffer = clData.getBuffer();
int MB = 1024 * 1024;
logger.info(String.format(" FFT data buffer [direct: %b, write: %b, size: %d MB]", doubleBuffer.isDirect(), !doubleBuffer.isReadOnly(), clData.getCLSize() / MB));
clRecip = context.createDoubleBuffer(len, Mem.READ_WRITE);
doubleBuffer = clRecip.getBuffer();
logger.info(String.format(" Reciprocal data buffer [direct: %b, write: %b, size: %d MB]", doubleBuffer.isDirect(), !doubleBuffer.isReadOnly(), clRecip.getCLSize() / MB));
// Initialize the OpenCL FFT library.
setup();
int[] dims = { nX, nY, nZ };
planHandle = createDefaultPlan(context, Complex3DOpenCL_DIMENSION.Complex3DOpenCL_3D, dims);
// Initialize the Reciprocal Space Multitply Kernal
URL source = getClass().getClassLoader().getResource("ffx/numerics/fft/VectorMultiply.cl");
InputStream input = source.openStream();
CLProgram program = context.createProgram(input).build();
// Get a reference to the kernel function with the name 'VectorMultiply'
CLKernel kernel = program.createCLKernel("VectorMultiply");
int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 128);
int globalWorkSize = roundUp(localWorkSize, len);
synchronized (this) {
while (!free) {
if (mode != null) {
switch(mode) {
case RECIP:
clRecip.getBuffer().put(recip).rewind();
queue.putWriteBuffer(clRecip, true);
break;
case FFT:
clData.getBuffer().rewind();
queue.putWriteBuffer(clData, true);
if (!transferOnly) {
executeTransform(Complex3DOpenCL_DIRECTION.FORWARD, queue, clData, clData);
queue.finish();
}
clData.getBuffer().rewind();
queue.putReadBuffer(clData, true);
clData.getBuffer().rewind();
queue.finish();
break;
case CONVOLUTION:
queue.putWriteBuffer(clData, true);
// Forward FFT
if (!transferOnly) {
// long time = -System.nanoTime();
executeTransform(Complex3DOpenCL_DIRECTION.FORWARD, queue, clData, clData);
// Reciprocal Space Multiply
kernel.rewind().putArgs(clData, clRecip).putArg(len);
queue.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize);
queue.putBarrier();
// Backward FFT
executeTransform(Complex3DOpenCL_DIRECTION.BACKWARD, queue, clData, clData);
// time += System.nanoTime();
// logger.info(String.format(" Compute Time %6.3f sec", time * 1.0e-9));
}
queue.putReadBuffer(clData, true);
break;
case IFFT:
queue.putWriteBuffer(clData, true);
if (!transferOnly) {
executeTransform(Complex3DOpenCL_DIRECTION.BACKWARD, queue, clData, clData);
}
queue.putReadBuffer(clData, true);
}
// Reset the mode to null and notify the calling thread.
mode = null;
notify();
}
// The OpenCL thread will wait until it's notified again.
try {
wait();
} catch (InterruptedException e) {
logger.severe(e.toString());
}
}
queue.finish();
clData.release();
clRecip.release();
destroyPlan();
teardown();
dead = true;
notify();
}
} catch (IOException e) {
logger.warning(e.toString());
} finally {
if (context != null) {
context.release();
}
}
logger.info(" OpenCL FFT/convolution thread is done.");
}
use of com.jogamp.opencl.CLKernel in project Glowstone by GlowstoneMC.
the class OverworldGenerator method generateChunkData.
@Override
public ChunkData generateChunkData(World world, Random random, int chunkX, int chunkZ, BiomeGrid biomes) {
ChunkData chunkData = generateRawTerrain(world, chunkX, chunkZ);
int cx = chunkX << 4;
int cz = chunkZ << 4;
SimplexOctaveGenerator octaveGenerator = ((SimplexOctaveGenerator) getWorldOctaves(world).get("surface"));
int sizeX = octaveGenerator.getSizeX();
int sizeZ = octaveGenerator.getSizeZ();
if (((GlowServer) ServerProvider.getServer()).doesUseGraphicsCompute()) {
CLKernel noiseGen = null;
CLBuffer<FloatBuffer> noise = null;
try {
// Initialize OpenCL stuff and put args
CLProgram program = OpenCompute.getProgram("net/glowstone/CLRandom.cl");
int workSize = sizeX * octaveGenerator.getSizeY() * sizeZ;
noise = OpenCompute.getContext().createFloatBuffer(workSize, CLMemory.Mem.WRITE_ONLY);
noiseGen = OpenCompute.getKernel(program, "GenerateNoise");
noiseGen.putArg(random.nextFloat()).putArg(random.nextFloat()).putArg(noise).putArg(workSize);
// Calculate noise on GPU
OpenCompute.getQueue().put1DRangeKernel(noiseGen, 0, OpenCompute.getGlobalSize(workSize), OpenCompute.getLocalSize()).putReadBuffer(noise, true);
// Use noise
for (int x = 0; x < sizeX; x++) {
for (int z = 0; z < sizeZ; z++) {
if (GROUND_MAP.containsKey(biomes.getBiome(x, z))) {
GROUND_MAP.get(biomes.getBiome(x, z)).generateTerrainColumn(chunkData, world, random, cx + x, cz + z, biomes.getBiome(x, z), noise.getBuffer().get(x | z << 4));
} else {
groundGen.generateTerrainColumn(chunkData, world, random, cx + x, cz + z, biomes.getBiome(x, z), noise.getBuffer().get(x | z << 4));
}
}
}
} finally {
// Clean up
if (noise != null) {
ServerProvider.getServer().getScheduler().runTaskAsynchronously(null, noise::release);
}
if (noiseGen != null) {
noiseGen.rewind();
}
}
} else {
double[] surfaceNoise = octaveGenerator.getFractalBrownianMotion(cx, cz, 0.5D, 0.5D);
for (int x = 0; x < sizeX; x++) {
for (int z = 0; z < sizeZ; z++) {
if (GROUND_MAP.containsKey(biomes.getBiome(x, z))) {
GROUND_MAP.get(biomes.getBiome(x, z)).generateTerrainColumn(chunkData, world, random, cx + x, cz + z, biomes.getBiome(x, z), surfaceNoise[x | z << 4]);
} else {
groundGen.generateTerrainColumn(chunkData, world, random, cx + x, cz + z, biomes.getBiome(x, z), surfaceNoise[x | z << 4]);
}
}
}
}
return chunkData;
}
use of com.jogamp.opencl.CLKernel in project Glowstone by GlowstoneMC.
the class OpenCompute method getKernel.
/**
* Returns a {@link CLKernel} that is part of the given {@link CLProgram}.
*
* @param program the {@link CLProgram} that contains the kernel
* @param name the name of the kernel
* @param threaded if true, always create a new {@link CLKernel} instance
* @return the {@link CLKernel}
*/
public static CLKernel getKernel(CLProgram program, String name, boolean threaded) {
if (kernels.containsKey(program)) {
HashMap<String, CLKernel> kernel = kernels.get(program);
if (kernel.containsKey(name) && !threaded) {
return kernel.get(name);
} else {
CLKernel clKernel = program.createCLKernel(name);
kernel.put(name, clKernel);
return clKernel;
}
} else {
kernels.put(program, new HashMap<>());
CLKernel clKernel = program.createCLKernel(name);
kernels.get(program).put(name, clKernel);
return clKernel;
}
}
use of com.jogamp.opencl.CLKernel in project javacv by bytedeco.
the class JavaCVCL method buildKernels.
public CLKernel[] buildKernels(String compilerOptions, Class resourceClass, String resourceNames, String... kernelNames) {
try {
// load and compile program for the chosen device
InputStream s;
String[] a = resourceNames.split(":");
if (a.length == 1) {
s = resourceClass.getResourceAsStream(a[0]);
} else {
Vector<InputStream> vs = new Vector<InputStream>(a.length);
for (String name : a) {
vs.addElement(resourceClass.getResourceAsStream(name));
}
s = new SequenceInputStream(vs.elements());
}
CLProgram program = context.createProgram(s);
// System.out.println("Building " + resourceNames + "...");
program.build(compilerOptions);
// System.out.println(program.getBuildLog());
assert program.isExecutable();
// create kernel and set function parameters
CLKernel[] kernels = new CLKernel[kernelNames.length];
for (int i = 0; i < kernelNames.length; i++) {
kernels[i] = program.createCLKernel(kernelNames[i]);
}
return kernels;
} catch (IOException ex) {
throw (Error) new LinkageError(ex.toString()).initCause(ex);
}
}
Aggregations