use of com.jogamp.opencl.CLCommandQueue in project ffx by mjschnie.
the class Complex3DOpenCL method run.
@Override
public void run() {
CLContext context = null;
try {
// Choose a platform.
CLPlatform[] platforms = CLPlatform.listCLPlatforms();
CLPlatform platform = platforms[0];
// Prefer NV
try {
for (CLPlatform p : platforms) {
if (p.getICDSuffix().equals("NV")) {
platform = p;
break;
}
}
} catch (Exception e) {
// ignore.
}
logger.info(String.format(" Platform: %s", platform));
// Choose a device.
CLDevice[] devices = platform.listCLDevices(CLDevice.Type.ACCELERATOR, CLDevice.Type.GPU);
CLDevice device = devices[0];
for (CLDevice dev : devices) {
if (dev.getVendor().startsWith("NV")) {
device = dev;
break;
}
}
logger.info(String.format(" Device: %s", device));
// Initialize the OpenCL Context
context = CLContext.create(device);
CLCommandQueue queue = device.createCommandQueue();
// Allocate memory on the device.
int bufferSize = len * 2;
clData = context.createDoubleBuffer(bufferSize, Mem.READ_WRITE);
DoubleBuffer doubleBuffer = clData.getBuffer();
int MB = 1024 * 1024;
logger.info(String.format(" FFT data buffer [direct: %b, write: %b, size: %d MB]", doubleBuffer.isDirect(), !doubleBuffer.isReadOnly(), clData.getCLSize() / MB));
clRecip = context.createDoubleBuffer(len, Mem.READ_WRITE);
doubleBuffer = clRecip.getBuffer();
logger.info(String.format(" Reciprocal data buffer [direct: %b, write: %b, size: %d MB]", doubleBuffer.isDirect(), !doubleBuffer.isReadOnly(), clRecip.getCLSize() / MB));
// Initialize the OpenCL FFT library.
setup();
int[] dims = { nX, nY, nZ };
planHandle = createDefaultPlan(context, Complex3DOpenCL_DIMENSION.Complex3DOpenCL_3D, dims);
// Initialize the Reciprocal Space Multitply Kernal
URL source = getClass().getClassLoader().getResource("ffx/numerics/fft/VectorMultiply.cl");
InputStream input = source.openStream();
CLProgram program = context.createProgram(input).build();
// Get a reference to the kernel function with the name 'VectorMultiply'
CLKernel kernel = program.createCLKernel("VectorMultiply");
int localWorkSize = Math.min(device.getMaxWorkGroupSize(), 128);
int globalWorkSize = roundUp(localWorkSize, len);
synchronized (this) {
while (!free) {
if (mode != null) {
switch(mode) {
case RECIP:
clRecip.getBuffer().put(recip).rewind();
queue.putWriteBuffer(clRecip, true);
break;
case FFT:
clData.getBuffer().rewind();
queue.putWriteBuffer(clData, true);
if (!transferOnly) {
executeTransform(Complex3DOpenCL_DIRECTION.FORWARD, queue, clData, clData);
queue.finish();
}
clData.getBuffer().rewind();
queue.putReadBuffer(clData, true);
clData.getBuffer().rewind();
queue.finish();
break;
case CONVOLUTION:
queue.putWriteBuffer(clData, true);
// Forward FFT
if (!transferOnly) {
// long time = -System.nanoTime();
executeTransform(Complex3DOpenCL_DIRECTION.FORWARD, queue, clData, clData);
// Reciprocal Space Multiply
kernel.rewind().putArgs(clData, clRecip).putArg(len);
queue.put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize);
queue.putBarrier();
// Backward FFT
executeTransform(Complex3DOpenCL_DIRECTION.BACKWARD, queue, clData, clData);
// time += System.nanoTime();
// logger.info(String.format(" Compute Time %6.3f sec", time * 1.0e-9));
}
queue.putReadBuffer(clData, true);
break;
case IFFT:
queue.putWriteBuffer(clData, true);
if (!transferOnly) {
executeTransform(Complex3DOpenCL_DIRECTION.BACKWARD, queue, clData, clData);
}
queue.putReadBuffer(clData, true);
}
// Reset the mode to null and notify the calling thread.
mode = null;
notify();
}
// The OpenCL thread will wait until it's notified again.
try {
wait();
} catch (InterruptedException e) {
logger.severe(e.toString());
}
}
queue.finish();
clData.release();
clRecip.release();
destroyPlan();
teardown();
dead = true;
notify();
}
} catch (IOException e) {
logger.warning(e.toString());
} finally {
if (context != null) {
context.release();
}
}
logger.info(" OpenCL FFT/convolution thread is done.");
}
Aggregations