use of org.bytedeco.javacpp.Pointer in project deeplearning4j by deeplearning4j.
the class CudnnLocalResponseNormalizationHelper method activate.
@Override
public INDArray activate(INDArray input, boolean training, double k, double n, double alpha, double beta) {
if (n < CUDNN_LRN_MIN_N) {
throw new IllegalArgumentException("Error: n < CUDNN_LRN_MIN_N (" + n + " < " + CUDNN_LRN_MIN_N + ")");
}
if (n > CUDNN_LRN_MAX_N) {
throw new IllegalArgumentException("Error: n > CUDNN_LRN_MAX_N (" + n + " > " + CUDNN_LRN_MAX_N + ")");
}
if (k < CUDNN_LRN_MIN_K) {
throw new IllegalArgumentException("Error: k < CUDNN_LRN_MIN_K (" + k + " < " + CUDNN_LRN_MIN_K + ")");
}
if (beta < CUDNN_LRN_MIN_BETA) {
throw new IllegalArgumentException("Error: beta < CUDNN_LRN_MIN_BETA (" + beta + " < " + CUDNN_LRN_MIN_BETA + ")");
}
int miniBatch = input.size(0);
int inDepth = input.size(1);
int inH = input.size(2);
int inW = input.size(3);
int[] srcStride = input.stride();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, inDepth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
activations = Nd4j.createUninitialized(new int[] { miniBatch, inDepth, inH, inW }, 'c');
int[] dstStride = activations.stride();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, inDepth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
checkCudnn(cudnnSetLRNDescriptor(cudnnContext.lrnDesc, (int) n, alpha, beta, k));
Allocator allocator = AtomicAllocator.getInstance();
CudaContext context = allocator.getFlowController().prepareActionAllWrite(input, activations);
Pointer srcData = allocator.getPointer(input, context);
Pointer dstData = allocator.getPointer(activations, context);
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
checkCudnn(cudnnLRNCrossChannelForward(cudnnContext, cudnnContext.lrnDesc, CUDNN_LRN_CROSS_CHANNEL_DIM1, this.alpha, cudnnContext.srcTensorDesc, srcData, this.beta, cudnnContext.dstTensorDesc, dstData));
allocator.getFlowController().registerActionAllWrite(context, input, activations);
return activations;
}
use of org.bytedeco.javacpp.Pointer in project deeplearning4j by deeplearning4j.
the class CudnnLocalResponseNormalizationHelper method backpropGradient.
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, double k, double n, double alpha, double beta) {
if (n < CUDNN_LRN_MIN_N) {
throw new IllegalArgumentException("Error: n < CUDNN_LRN_MIN_N (" + n + " < " + CUDNN_LRN_MIN_N + ")");
}
if (n > CUDNN_LRN_MAX_N) {
throw new IllegalArgumentException("Error: n > CUDNN_LRN_MAX_N (" + n + " > " + CUDNN_LRN_MAX_N + ")");
}
if (k < CUDNN_LRN_MIN_K) {
throw new IllegalArgumentException("Error: k < CUDNN_LRN_MIN_K (" + k + " < " + CUDNN_LRN_MIN_K + ")");
}
if (beta < CUDNN_LRN_MIN_BETA) {
throw new IllegalArgumentException("Error: beta < CUDNN_LRN_MIN_BETA (" + beta + " < " + CUDNN_LRN_MIN_BETA + ")");
}
int miniBatch = input.size(0);
int depth = input.size(1);
int inH = input.size(2);
int inW = input.size(3);
Gradient retGradient = new DefaultGradient();
if (!Shape.strideDescendingCAscendingF(epsilon)) {
// apparently not supported by cuDNN
epsilon = epsilon.dup();
}
int[] srcStride = input.stride();
int[] deltaStride = epsilon.stride();
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, depth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.deltaTensorDesc, dataType, miniBatch, depth, inH, inW, deltaStride[0], deltaStride[1], deltaStride[2], deltaStride[3]));
checkCudnn(cudnnSetLRNDescriptor(cudnnContext.lrnDesc, (int) n, alpha, beta, k));
INDArray nextEpsilon = Nd4j.createUninitialized(new int[] { miniBatch, depth, inH, inW }, 'c');
int[] dstStride = nextEpsilon.stride();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, depth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
Allocator allocator = AtomicAllocator.getInstance();
CudaContext context = allocator.getFlowController().prepareActionAllWrite(input, epsilon, activations, nextEpsilon);
Pointer srcData = allocator.getPointer(input, context);
Pointer epsData = allocator.getPointer(epsilon, context);
Pointer zData = allocator.getPointer(activations, context);
Pointer dstData = allocator.getPointer(nextEpsilon, context);
checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
checkCudnn(cudnnLRNCrossChannelBackward(cudnnContext, cudnnContext.lrnDesc, CUDNN_LRN_CROSS_CHANNEL_DIM1, this.alpha, cudnnContext.deltaTensorDesc, zData, cudnnContext.deltaTensorDesc, epsData, cudnnContext.srcTensorDesc, srcData, this.beta, cudnnContext.dstTensorDesc, dstData));
allocator.getFlowController().registerActionAllWrite(context, input, epsilon, activations, nextEpsilon);
return new Pair<>(retGradient, nextEpsilon);
}
use of org.bytedeco.javacpp.Pointer in project deeplearning4j by deeplearning4j.
the class BaseStatsListener method doInit.
private void doInit(Model model) {
boolean backpropParamsOnly = backpropParamsOnly(model);
//TODO support NTP
long initTime = System.currentTimeMillis();
StatsInitializationReport initReport = getNewInitializationReport();
initReport.reportIDs(getSessionID(model), TYPE_ID, workerID, initTime);
if (initConfig.collectSoftwareInfo()) {
OperatingSystemMXBean osBean = ManagementFactory.getOperatingSystemMXBean();
RuntimeMXBean runtime = ManagementFactory.getRuntimeMXBean();
String arch = osBean.getArch();
String osName = osBean.getName();
String jvmName = runtime.getVmName();
String jvmVersion = System.getProperty("java.version");
String jvmSpecVersion = runtime.getSpecVersion();
String nd4jBackendClass = Nd4j.getNDArrayFactory().getClass().getName();
String nd4jDataTypeName = DataTypeUtil.getDtypeFromContext().name();
String hostname = System.getenv("COMPUTERNAME");
if (hostname == null || hostname.isEmpty()) {
try {
Process proc = Runtime.getRuntime().exec("hostname");
try (InputStream stream = proc.getInputStream()) {
hostname = IOUtils.toString(stream);
}
} catch (Exception e) {
}
}
Properties p = Nd4j.getExecutioner().getEnvironmentInformation();
Map<String, String> envInfo = new HashMap<>();
for (Map.Entry<Object, Object> e : p.entrySet()) {
Object v = e.getValue();
String value = (v == null ? "" : v.toString());
envInfo.put(e.getKey().toString(), value);
}
initReport.reportSoftwareInfo(arch, osName, jvmName, jvmVersion, jvmSpecVersion, nd4jBackendClass, nd4jDataTypeName, hostname, UIDProvider.getJVMUID(), envInfo);
}
if (initConfig.collectHardwareInfo()) {
int availableProcessors = Runtime.getRuntime().availableProcessors();
NativeOps nativeOps = NativeOpsHolder.getInstance().getDeviceNativeOps();
int nDevices = nativeOps.getAvailableDevices();
long[] deviceTotalMem = null;
//TODO
String[] deviceDescription = null;
if (nDevices > 0) {
deviceTotalMem = new long[nDevices];
deviceDescription = new String[nDevices];
for (int i = 0; i < nDevices; i++) {
try {
Pointer p = getDevicePointer(i);
if (p == null) {
deviceTotalMem[i] = 0;
deviceDescription[i] = "Device(" + i + ")";
} else {
deviceTotalMem[i] = nativeOps.getDeviceTotalMemory(p);
deviceDescription[i] = nativeOps.getDeviceName(p);
if (nDevices > 1) {
deviceDescription[i] = deviceDescription[i] + " (" + i + ")";
}
}
} catch (Exception e) {
log.debug("Error getting device info", e);
}
}
}
long jvmMaxMemory = Runtime.getRuntime().maxMemory();
long offheapMaxMemory = Pointer.maxBytes();
initReport.reportHardwareInfo(availableProcessors, nDevices, jvmMaxMemory, offheapMaxMemory, deviceTotalMem, deviceDescription, UIDProvider.getHardwareUID());
}
if (initConfig.collectModelInfo()) {
String jsonConf;
int numLayers;
int numParams;
if (model instanceof MultiLayerNetwork) {
MultiLayerNetwork net = ((MultiLayerNetwork) model);
jsonConf = net.getLayerWiseConfigurations().toJson();
numLayers = net.getnLayers();
numParams = net.numParams();
} else if (model instanceof ComputationGraph) {
ComputationGraph cg = ((ComputationGraph) model);
jsonConf = cg.getConfiguration().toJson();
numLayers = cg.getNumLayers();
numParams = cg.numParams();
} else if (model instanceof Layer) {
Layer l = (Layer) model;
jsonConf = l.conf().toJson();
numLayers = 1;
numParams = l.numParams();
} else {
throw new RuntimeException("Invalid model: Expected MultiLayerNetwork or ComputationGraph. Got: " + (model == null ? null : model.getClass()));
}
Map<String, INDArray> paramMap = model.paramTable(backpropParamsOnly);
String[] paramNames = new String[paramMap.size()];
int i = 0;
for (String s : paramMap.keySet()) {
//Assuming sensible iteration order - LinkedHashMaps are used in MLN/CG for example
paramNames[i++] = s;
}
initReport.reportModelInfo(model.getClass().getName(), jsonConf, paramNames, numLayers, numParams);
}
StorageMetaData meta = getNewStorageMetaData(initTime, getSessionID(model), workerID);
router.putStorageMetaData(meta);
//TODO error handling
router.putStaticInfo(initReport);
}
use of org.bytedeco.javacpp.Pointer in project nd4j by deeplearning4j.
the class CudaZeroHandler method memcpyAsync.
/**
* Asynchronous version of memcpy
*
* PLEASE NOTE: This is device-dependent method, if it's not supported in your environment, blocking call will be used instead.
*
* @param dstBuffer
* @param srcPointer
* @param length
* @param dstOffset
*/
@Override
public void memcpyAsync(DataBuffer dstBuffer, Pointer srcPointer, long length, long dstOffset) {
AllocationPoint point = ((BaseCudaDataBuffer) dstBuffer).getAllocationPoint();
// we update host memory regardless.
// Pointer dP = new Pointer((point.getAllocationStatus() == AllocationStatus.DEVICE ? point.getPointers().getDevicePointer().address() : point.getPointers().getHostPointer().address()) + dstOffset);
Pointer dP = new CudaPointer((point.getPointers().getHostPointer().address()) + dstOffset);
// Pointer sP = new Pointer(srcPointer.getNativePointer());
// log.info("Location: " + point.getAllocationStatus());
// if (length > 4)
// log.info("memcpyAsync: ["+ srcPointer.getNativePointer()+"] -> ["+ dP.getNativePointer()+"], length: [" + length+ "], offset: ["+ dstOffset+"], dstBufferOffset: ["+(dstBuffer.getElementSize() * dstBuffer.offset()) + "/" + dstBuffer.offset() +"]");
CudaContext tContext = null;
if (dstBuffer.isConstant()) {
org.bytedeco.javacpp.Pointer dstPointer = new CudaPointer(point.getPointers().getHostPointer().address() + dstOffset, 0L);
org.bytedeco.javacpp.Pointer srcPointerJ = new CudaPointer(srcPointer, length);
// log.info("JCPP Memcpy: [{}] -> [{}], length: [{}]", srcPointerJ.address(), dstPointer.address(), length);
org.bytedeco.javacpp.Pointer.memcpy(dstPointer, srcPointerJ, length);
point.tickHostRead();
} else {
// log.info("Memcpy pointers: [{}] -> [{}]", srcPointer.address(), dP.address());
CudaContext context = flowController.prepareAction(point);
tContext = context;
if (nativeOps.memcpyAsync(dP, srcPointer, length, CudaConstants.cudaMemcpyHostToHost, context.getSpecialStream()) == 0)
throw new IllegalStateException("MemcpyAsync H2H failed: [" + srcPointer.address() + "] -> [" + dP.address() + "]");
flowController.commitTransfer(tContext.getSpecialStream());
if (point.getAllocationStatus() == AllocationStatus.HOST)
flowController.registerAction(context, point);
}
// if we're copying something into host memory, but we're on device - we need to provide exact copy to device as well
if (point.getAllocationStatus() == AllocationStatus.DEVICE) {
// TODO: this sounds wrong, and probably memcpy whould check initial direction, like relocate did before
Pointer rDP = new CudaPointer(point.getPointers().getDevicePointer().address() + dstOffset);
if (tContext == null)
tContext = flowController.prepareAction(point);
if (nativeOps.memcpyAsync(rDP, dP, length, CudaConstants.cudaMemcpyHostToDevice, tContext.getSpecialStream()) == 0)
throw new IllegalStateException("MemcpyAsync H2D failed: [" + dP.address() + "] -> [" + rDP.address() + "]");
flowController.commitTransfer(tContext.getSpecialStream());
flowController.registerAction(tContext, point);
}
point.tickDeviceWrite();
}
use of org.bytedeco.javacpp.Pointer in project nd4j by deeplearning4j.
the class CudaZeroHandler method memcpyDevice.
@Override
public void memcpyDevice(DataBuffer dstBuffer, Pointer srcPointer, long length, long dstOffset, CudaContext context) {
// log.info("Memcpy device: {} bytes ", length);
AllocationPoint point = ((BaseCudaDataBuffer) dstBuffer).getAllocationPoint();
Pointer dP = new CudaPointer((point.getPointers().getDevicePointer().address()) + dstOffset);
if (nativeOps.memcpyAsync(dP, srcPointer, length, CudaConstants.cudaMemcpyDeviceToDevice, context.getOldStream()) == 0)
throw new ND4JIllegalStateException("memcpyAsync failed");
point.tickDeviceWrite();
}
Aggregations