Search in sources :

Example 31 with Pointer

use of org.bytedeco.javacpp.Pointer in project deeplearning4j by deeplearning4j.

the class CudnnLocalResponseNormalizationHelper method activate.

@Override
public INDArray activate(INDArray input, boolean training, double k, double n, double alpha, double beta) {
    if (n < CUDNN_LRN_MIN_N) {
        throw new IllegalArgumentException("Error: n < CUDNN_LRN_MIN_N (" + n + " < " + CUDNN_LRN_MIN_N + ")");
    }
    if (n > CUDNN_LRN_MAX_N) {
        throw new IllegalArgumentException("Error: n > CUDNN_LRN_MAX_N (" + n + " > " + CUDNN_LRN_MAX_N + ")");
    }
    if (k < CUDNN_LRN_MIN_K) {
        throw new IllegalArgumentException("Error: k < CUDNN_LRN_MIN_K (" + k + " < " + CUDNN_LRN_MIN_K + ")");
    }
    if (beta < CUDNN_LRN_MIN_BETA) {
        throw new IllegalArgumentException("Error: beta < CUDNN_LRN_MIN_BETA (" + beta + " < " + CUDNN_LRN_MIN_BETA + ")");
    }
    int miniBatch = input.size(0);
    int inDepth = input.size(1);
    int inH = input.size(2);
    int inW = input.size(3);
    int[] srcStride = input.stride();
    checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, inDepth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
    activations = Nd4j.createUninitialized(new int[] { miniBatch, inDepth, inH, inW }, 'c');
    int[] dstStride = activations.stride();
    checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, inDepth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
    checkCudnn(cudnnSetLRNDescriptor(cudnnContext.lrnDesc, (int) n, alpha, beta, k));
    Allocator allocator = AtomicAllocator.getInstance();
    CudaContext context = allocator.getFlowController().prepareActionAllWrite(input, activations);
    Pointer srcData = allocator.getPointer(input, context);
    Pointer dstData = allocator.getPointer(activations, context);
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
    checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
    checkCudnn(cudnnLRNCrossChannelForward(cudnnContext, cudnnContext.lrnDesc, CUDNN_LRN_CROSS_CHANNEL_DIM1, this.alpha, cudnnContext.srcTensorDesc, srcData, this.beta, cudnnContext.dstTensorDesc, dstData));
    allocator.getFlowController().registerActionAllWrite(context, input, activations);
    return activations;
}
Also used : AtomicAllocator(org.nd4j.jita.allocator.impl.AtomicAllocator) Allocator(org.nd4j.jita.allocator.Allocator) GridExecutioner(org.nd4j.linalg.api.ops.executioner.GridExecutioner) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) DoublePointer(org.bytedeco.javacpp.DoublePointer) FloatPointer(org.bytedeco.javacpp.FloatPointer) ShortPointer(org.bytedeco.javacpp.ShortPointer) Pointer(org.bytedeco.javacpp.Pointer)

Example 32 with Pointer

use of org.bytedeco.javacpp.Pointer in project deeplearning4j by deeplearning4j.

the class CudnnLocalResponseNormalizationHelper method backpropGradient.

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, double k, double n, double alpha, double beta) {
    if (n < CUDNN_LRN_MIN_N) {
        throw new IllegalArgumentException("Error: n < CUDNN_LRN_MIN_N (" + n + " < " + CUDNN_LRN_MIN_N + ")");
    }
    if (n > CUDNN_LRN_MAX_N) {
        throw new IllegalArgumentException("Error: n > CUDNN_LRN_MAX_N (" + n + " > " + CUDNN_LRN_MAX_N + ")");
    }
    if (k < CUDNN_LRN_MIN_K) {
        throw new IllegalArgumentException("Error: k < CUDNN_LRN_MIN_K (" + k + " < " + CUDNN_LRN_MIN_K + ")");
    }
    if (beta < CUDNN_LRN_MIN_BETA) {
        throw new IllegalArgumentException("Error: beta < CUDNN_LRN_MIN_BETA (" + beta + " < " + CUDNN_LRN_MIN_BETA + ")");
    }
    int miniBatch = input.size(0);
    int depth = input.size(1);
    int inH = input.size(2);
    int inW = input.size(3);
    Gradient retGradient = new DefaultGradient();
    if (!Shape.strideDescendingCAscendingF(epsilon)) {
        // apparently not supported by cuDNN
        epsilon = epsilon.dup();
    }
    int[] srcStride = input.stride();
    int[] deltaStride = epsilon.stride();
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
    checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, depth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
    checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.deltaTensorDesc, dataType, miniBatch, depth, inH, inW, deltaStride[0], deltaStride[1], deltaStride[2], deltaStride[3]));
    checkCudnn(cudnnSetLRNDescriptor(cudnnContext.lrnDesc, (int) n, alpha, beta, k));
    INDArray nextEpsilon = Nd4j.createUninitialized(new int[] { miniBatch, depth, inH, inW }, 'c');
    int[] dstStride = nextEpsilon.stride();
    checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, depth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
    Allocator allocator = AtomicAllocator.getInstance();
    CudaContext context = allocator.getFlowController().prepareActionAllWrite(input, epsilon, activations, nextEpsilon);
    Pointer srcData = allocator.getPointer(input, context);
    Pointer epsData = allocator.getPointer(epsilon, context);
    Pointer zData = allocator.getPointer(activations, context);
    Pointer dstData = allocator.getPointer(nextEpsilon, context);
    checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
    checkCudnn(cudnnLRNCrossChannelBackward(cudnnContext, cudnnContext.lrnDesc, CUDNN_LRN_CROSS_CHANNEL_DIM1, this.alpha, cudnnContext.deltaTensorDesc, zData, cudnnContext.deltaTensorDesc, epsData, cudnnContext.srcTensorDesc, srcData, this.beta, cudnnContext.dstTensorDesc, dstData));
    allocator.getFlowController().registerActionAllWrite(context, input, epsilon, activations, nextEpsilon);
    return new Pair<>(retGradient, nextEpsilon);
}
Also used : AtomicAllocator(org.nd4j.jita.allocator.impl.AtomicAllocator) Allocator(org.nd4j.jita.allocator.Allocator) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) GridExecutioner(org.nd4j.linalg.api.ops.executioner.GridExecutioner) INDArray(org.nd4j.linalg.api.ndarray.INDArray) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) DoublePointer(org.bytedeco.javacpp.DoublePointer) FloatPointer(org.bytedeco.javacpp.FloatPointer) ShortPointer(org.bytedeco.javacpp.ShortPointer) Pointer(org.bytedeco.javacpp.Pointer) Pair(org.deeplearning4j.berkeley.Pair)

Example 33 with Pointer

use of org.bytedeco.javacpp.Pointer in project deeplearning4j by deeplearning4j.

the class BaseStatsListener method doInit.

private void doInit(Model model) {
    boolean backpropParamsOnly = backpropParamsOnly(model);
    //TODO support NTP
    long initTime = System.currentTimeMillis();
    StatsInitializationReport initReport = getNewInitializationReport();
    initReport.reportIDs(getSessionID(model), TYPE_ID, workerID, initTime);
    if (initConfig.collectSoftwareInfo()) {
        OperatingSystemMXBean osBean = ManagementFactory.getOperatingSystemMXBean();
        RuntimeMXBean runtime = ManagementFactory.getRuntimeMXBean();
        String arch = osBean.getArch();
        String osName = osBean.getName();
        String jvmName = runtime.getVmName();
        String jvmVersion = System.getProperty("java.version");
        String jvmSpecVersion = runtime.getSpecVersion();
        String nd4jBackendClass = Nd4j.getNDArrayFactory().getClass().getName();
        String nd4jDataTypeName = DataTypeUtil.getDtypeFromContext().name();
        String hostname = System.getenv("COMPUTERNAME");
        if (hostname == null || hostname.isEmpty()) {
            try {
                Process proc = Runtime.getRuntime().exec("hostname");
                try (InputStream stream = proc.getInputStream()) {
                    hostname = IOUtils.toString(stream);
                }
            } catch (Exception e) {
            }
        }
        Properties p = Nd4j.getExecutioner().getEnvironmentInformation();
        Map<String, String> envInfo = new HashMap<>();
        for (Map.Entry<Object, Object> e : p.entrySet()) {
            Object v = e.getValue();
            String value = (v == null ? "" : v.toString());
            envInfo.put(e.getKey().toString(), value);
        }
        initReport.reportSoftwareInfo(arch, osName, jvmName, jvmVersion, jvmSpecVersion, nd4jBackendClass, nd4jDataTypeName, hostname, UIDProvider.getJVMUID(), envInfo);
    }
    if (initConfig.collectHardwareInfo()) {
        int availableProcessors = Runtime.getRuntime().availableProcessors();
        NativeOps nativeOps = NativeOpsHolder.getInstance().getDeviceNativeOps();
        int nDevices = nativeOps.getAvailableDevices();
        long[] deviceTotalMem = null;
        //TODO
        String[] deviceDescription = null;
        if (nDevices > 0) {
            deviceTotalMem = new long[nDevices];
            deviceDescription = new String[nDevices];
            for (int i = 0; i < nDevices; i++) {
                try {
                    Pointer p = getDevicePointer(i);
                    if (p == null) {
                        deviceTotalMem[i] = 0;
                        deviceDescription[i] = "Device(" + i + ")";
                    } else {
                        deviceTotalMem[i] = nativeOps.getDeviceTotalMemory(p);
                        deviceDescription[i] = nativeOps.getDeviceName(p);
                        if (nDevices > 1) {
                            deviceDescription[i] = deviceDescription[i] + " (" + i + ")";
                        }
                    }
                } catch (Exception e) {
                    log.debug("Error getting device info", e);
                }
            }
        }
        long jvmMaxMemory = Runtime.getRuntime().maxMemory();
        long offheapMaxMemory = Pointer.maxBytes();
        initReport.reportHardwareInfo(availableProcessors, nDevices, jvmMaxMemory, offheapMaxMemory, deviceTotalMem, deviceDescription, UIDProvider.getHardwareUID());
    }
    if (initConfig.collectModelInfo()) {
        String jsonConf;
        int numLayers;
        int numParams;
        if (model instanceof MultiLayerNetwork) {
            MultiLayerNetwork net = ((MultiLayerNetwork) model);
            jsonConf = net.getLayerWiseConfigurations().toJson();
            numLayers = net.getnLayers();
            numParams = net.numParams();
        } else if (model instanceof ComputationGraph) {
            ComputationGraph cg = ((ComputationGraph) model);
            jsonConf = cg.getConfiguration().toJson();
            numLayers = cg.getNumLayers();
            numParams = cg.numParams();
        } else if (model instanceof Layer) {
            Layer l = (Layer) model;
            jsonConf = l.conf().toJson();
            numLayers = 1;
            numParams = l.numParams();
        } else {
            throw new RuntimeException("Invalid model: Expected MultiLayerNetwork or ComputationGraph. Got: " + (model == null ? null : model.getClass()));
        }
        Map<String, INDArray> paramMap = model.paramTable(backpropParamsOnly);
        String[] paramNames = new String[paramMap.size()];
        int i = 0;
        for (String s : paramMap.keySet()) {
            //Assuming sensible iteration order - LinkedHashMaps are used in MLN/CG for example
            paramNames[i++] = s;
        }
        initReport.reportModelInfo(model.getClass().getName(), jsonConf, paramNames, numLayers, numParams);
    }
    StorageMetaData meta = getNewStorageMetaData(initTime, getSessionID(model), workerID);
    router.putStorageMetaData(meta);
    //TODO error handling
    router.putStaticInfo(initReport);
}
Also used : Pointer(org.bytedeco.javacpp.Pointer) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) NativeOps(org.nd4j.nativeblas.NativeOps) InputStream(java.io.InputStream) RuntimeMXBean(java.lang.management.RuntimeMXBean) Layer(org.deeplearning4j.nn.api.Layer) StorageMetaData(org.deeplearning4j.api.storage.StorageMetaData) INDArray(org.nd4j.linalg.api.ndarray.INDArray) OperatingSystemMXBean(java.lang.management.OperatingSystemMXBean)

Example 34 with Pointer

use of org.bytedeco.javacpp.Pointer in project nd4j by deeplearning4j.

the class CudaZeroHandler method memcpyAsync.

/**
 * Asynchronous version of memcpy
 *
 * PLEASE NOTE: This is device-dependent method, if it's not supported in your environment, blocking call will be used instead.
 *
 * @param dstBuffer
 * @param srcPointer
 * @param length
 * @param dstOffset
 */
@Override
public void memcpyAsync(DataBuffer dstBuffer, Pointer srcPointer, long length, long dstOffset) {
    AllocationPoint point = ((BaseCudaDataBuffer) dstBuffer).getAllocationPoint();
    // we update host memory regardless.
    // Pointer dP = new Pointer((point.getAllocationStatus() == AllocationStatus.DEVICE ? point.getPointers().getDevicePointer().address() : point.getPointers().getHostPointer().address()) + dstOffset);
    Pointer dP = new CudaPointer((point.getPointers().getHostPointer().address()) + dstOffset);
    // Pointer sP = new Pointer(srcPointer.getNativePointer());
    // log.info("Location: " + point.getAllocationStatus());
    // if (length > 4)
    // log.info("memcpyAsync:  ["+ srcPointer.getNativePointer()+"] -> ["+ dP.getNativePointer()+"], length: [" + length+ "], offset: ["+ dstOffset+"], dstBufferOffset: ["+(dstBuffer.getElementSize() * dstBuffer.offset()) + "/" + dstBuffer.offset() +"]");
    CudaContext tContext = null;
    if (dstBuffer.isConstant()) {
        org.bytedeco.javacpp.Pointer dstPointer = new CudaPointer(point.getPointers().getHostPointer().address() + dstOffset, 0L);
        org.bytedeco.javacpp.Pointer srcPointerJ = new CudaPointer(srcPointer, length);
        // log.info("JCPP Memcpy: [{}] -> [{}], length: [{}]", srcPointerJ.address(), dstPointer.address(), length);
        org.bytedeco.javacpp.Pointer.memcpy(dstPointer, srcPointerJ, length);
        point.tickHostRead();
    } else {
        // log.info("Memcpy pointers: [{}] -> [{}]", srcPointer.address(),  dP.address());
        CudaContext context = flowController.prepareAction(point);
        tContext = context;
        if (nativeOps.memcpyAsync(dP, srcPointer, length, CudaConstants.cudaMemcpyHostToHost, context.getSpecialStream()) == 0)
            throw new IllegalStateException("MemcpyAsync H2H failed: [" + srcPointer.address() + "] -> [" + dP.address() + "]");
        flowController.commitTransfer(tContext.getSpecialStream());
        if (point.getAllocationStatus() == AllocationStatus.HOST)
            flowController.registerAction(context, point);
    }
    // if we're copying something into host memory, but we're on device - we need to provide exact copy to device as well
    if (point.getAllocationStatus() == AllocationStatus.DEVICE) {
        // TODO: this sounds wrong, and probably memcpy whould check initial direction, like relocate did before
        Pointer rDP = new CudaPointer(point.getPointers().getDevicePointer().address() + dstOffset);
        if (tContext == null)
            tContext = flowController.prepareAction(point);
        if (nativeOps.memcpyAsync(rDP, dP, length, CudaConstants.cudaMemcpyHostToDevice, tContext.getSpecialStream()) == 0)
            throw new IllegalStateException("MemcpyAsync H2D failed: [" + dP.address() + "] -> [" + rDP.address() + "]");
        flowController.commitTransfer(tContext.getSpecialStream());
        flowController.registerAction(tContext, point);
    }
    point.tickDeviceWrite();
}
Also used : ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) Pointer(org.bytedeco.javacpp.Pointer) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) BaseCudaDataBuffer(org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) Pointer(org.bytedeco.javacpp.Pointer) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Example 35 with Pointer

use of org.bytedeco.javacpp.Pointer in project nd4j by deeplearning4j.

the class CudaZeroHandler method memcpyDevice.

@Override
public void memcpyDevice(DataBuffer dstBuffer, Pointer srcPointer, long length, long dstOffset, CudaContext context) {
    // log.info("Memcpy device: {} bytes ", length);
    AllocationPoint point = ((BaseCudaDataBuffer) dstBuffer).getAllocationPoint();
    Pointer dP = new CudaPointer((point.getPointers().getDevicePointer().address()) + dstOffset);
    if (nativeOps.memcpyAsync(dP, srcPointer, length, CudaConstants.cudaMemcpyDeviceToDevice, context.getOldStream()) == 0)
        throw new ND4JIllegalStateException("memcpyAsync failed");
    point.tickDeviceWrite();
}
Also used : BaseCudaDataBuffer(org.nd4j.linalg.jcublas.buffer.BaseCudaDataBuffer) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer) Pointer(org.bytedeco.javacpp.Pointer) ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) AllocationPoint(org.nd4j.jita.allocator.impl.AllocationPoint) CudaPointer(org.nd4j.jita.allocator.pointers.CudaPointer)

Aggregations

Pointer (org.bytedeco.javacpp.Pointer)61 FloatPointer (org.bytedeco.javacpp.FloatPointer)29 DoublePointer (org.bytedeco.javacpp.DoublePointer)27 IntPointer (org.bytedeco.javacpp.IntPointer)23 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)23 INDArray (org.nd4j.linalg.api.ndarray.INDArray)21 CudaPointer (org.nd4j.jita.allocator.pointers.CudaPointer)19 BytePointer (org.bytedeco.javacpp.BytePointer)18 DataBuffer (org.nd4j.linalg.api.buffer.DataBuffer)18 ShortPointer (org.bytedeco.javacpp.ShortPointer)16 GridExecutioner (org.nd4j.linalg.api.ops.executioner.GridExecutioner)16 PointerPointer (org.bytedeco.javacpp.PointerPointer)11 ByteBuffer (java.nio.ByteBuffer)10 CUstream_st (org.bytedeco.javacpp.cuda.CUstream_st)10 org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t (org.nd4j.jita.allocator.pointers.cuda.cusolverDnHandle_t)10 CublasPointer (org.nd4j.linalg.jcublas.CublasPointer)10 FunctionPointer (org.bytedeco.javacpp.FunctionPointer)9 BoolPointer (org.bytedeco.javacpp.BoolPointer)8 CLongPointer (org.bytedeco.javacpp.CLongPointer)8 CharPointer (org.bytedeco.javacpp.CharPointer)8