use of com.simiacryptus.mindseye.lang.cudnn.CudaMemory in project MindsEye by SimiaCryptus.
the class CudaLayerTester method buildIrregularCudaTensor.
/**
* Build irregular cuda tensor cuda tensor list.
*
* @param gpu the gpu
* @param precision the precision
* @param original the original
* @return the cuda tensor list
*/
public CudaTensorList buildIrregularCudaTensor(final CudnnHandle gpu, final Precision precision, final Tensor original) {
TensorArray data = TensorArray.create(original);
int[] inputSize = original.getDimensions();
int channels = inputSize.length < 3 ? 1 : inputSize[2];
int height = inputSize.length < 2 ? 1 : inputSize[1];
int width = inputSize.length < 1 ? 1 : inputSize[0];
final int listLength = 1;
final int elementLength = data.getElements();
MemoryType memoryType = MemoryType.Managed;
@Nonnull final CudaMemory ptr0 = gpu.allocate((long) elementLength * listLength * precision.size, memoryType, false);
@Nonnull final CudaDevice.CudaTensorDescriptor descriptor0 = gpu.newTensorDescriptor(precision, listLength, channels, height, width, channels * height * width, height * width, width, 1);
for (int i = 0; i < listLength; i++) {
Tensor tensor = data.get(i);
assert null != data;
assert null != tensor;
assert Arrays.equals(tensor.getDimensions(), data.getDimensions()) : Arrays.toString(tensor.getDimensions()) + " != " + Arrays.toString(data.getDimensions());
ptr0.write(precision, tensor.getData(), (long) i * elementLength);
tensor.freeRef();
}
data.freeRef();
Random r = new Random();
int c = r.nextInt(5);
int v = r.nextInt(5);
int h = r.nextInt(5);
@Nonnull final CudaMemory ptr1 = gpu.allocate((long) (channels + c) * (height + v) * (width + h) * listLength * precision.size, memoryType, false);
@Nonnull final CudaDevice.CudaTensorDescriptor descriptor1 = gpu.newTensorDescriptor(precision, listLength, channels, height, width, (height + v) * (width + h) * (channels + c), (height + v) * (width + h), width + h, 1);
gpu.cudnnTransformTensor(precision.getPointer(1.0), descriptor0.getPtr(), ptr0.getPtr(), precision.getPointer(0.0), descriptor1.getPtr(), ptr1.getPtr());
assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
ptr0.dirty();
ptr1.dirty();
descriptor0.freeRef();
ptr0.freeRef();
return CudaTensorList.wrap(CudaTensor.wrap(ptr1, descriptor1, precision), 1, original.getDimensions(), precision);
}
Aggregations