Search in sources :

Example 1 with Result

use of com.simiacryptus.mindseye.lang.Result in project MindsEye by SimiaCryptus.

the class GramianLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(final Result... inObj) {
    assert 1 == inObj.length;
    TensorList inputData = inObj[0].getData();
    int[] inputDimensions = inputData.getDimensions();
    assert 3 == inputDimensions.length;
    return new Result(CudaSystem.run(gpu -> {
        CudaTensor tensor = gpu.getTensor(inputData, precision, MemoryType.Device, false);
        CudaTensorList output = getOutput(gpu, tensor);
        tensor.freeRef();
        return output;
    }, inputData), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList delta) -> {
        @Nonnull final int[] outputDimensions = { 1, 1, inputDimensions[2] * inputDimensions[2] };
        if (!Arrays.equals(delta.getDimensions(), outputDimensions)) {
            throw new AssertionError(Arrays.toString(delta.getDimensions()) + " != " + Arrays.toString(outputDimensions));
        }
        if (inObj[0].isAlive()) {
            final TensorList passbackTensorList = CudaSystem.run(gpu -> {
                @Nullable final CudaTensor inputTensor = gpu.getTensor(inputData, precision, MemoryType.Device, false);
                CudaTensor deltaTensor = gpu.getTensor(delta, precision, MemoryType.Device, true);
                delta.freeRef();
                CudaTensorList feedback = getFeedback(gpu, inputTensor, deltaTensor);
                deltaTensor.freeRef();
                inputTensor.freeRef();
                return feedback;
            }, delta);
            inObj[0].accumulate(buffer, passbackTensorList);
        } else {
            delta.freeRef();
        }
    }) {

        @Override
        public final void accumulate(DeltaSet<Layer> buffer, TensorList delta) {
            getAccumulator().accept(buffer, delta);
        }

        @Override
        protected void _free() {
            inputData.freeRef();
            Arrays.stream(inObj).forEach(nnResult -> nnResult.freeRef());
        }

        @Override
        public boolean isAlive() {
            return Arrays.stream(inObj).anyMatch(x -> x.isAlive());
        }
    };
}
Also used : IntStream(java.util.stream.IntStream) JsonObject(com.google.gson.JsonObject) Arrays(java.util.Arrays) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) jcuda.jcudnn.cudnnReduceTensorDescriptor(jcuda.jcudnn.cudnnReduceTensorDescriptor) LoggerFactory(org.slf4j.LoggerFactory) jcuda.jcudnn.cudnnReduceTensorOp(jcuda.jcudnn.cudnnReduceTensorOp) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) Precision(com.simiacryptus.mindseye.lang.cudnn.Precision) CudnnHandle(com.simiacryptus.mindseye.lang.cudnn.CudnnHandle) Map(java.util.Map) Layer(com.simiacryptus.mindseye.lang.Layer) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) CudaResource(com.simiacryptus.mindseye.lang.cudnn.CudaResource) Logger(org.slf4j.Logger) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) jcuda.jcudnn.cudnnOpTensorOp(jcuda.jcudnn.cudnnOpTensorOp) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) jcuda.jcudnn.cudnnIndicesType(jcuda.jcudnn.cudnnIndicesType) jcuda.jcudnn.cudnnNanPropagation(jcuda.jcudnn.cudnnNanPropagation) jcuda.jcudnn.cudnnReduceTensorIndices(jcuda.jcudnn.cudnnReduceTensorIndices) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) CudaSystem(com.simiacryptus.mindseye.lang.cudnn.CudaSystem) TensorList(com.simiacryptus.mindseye.lang.TensorList) MemoryType(com.simiacryptus.mindseye.lang.cudnn.MemoryType) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) jcuda.jcudnn.cudnnOpTensorDescriptor(jcuda.jcudnn.cudnnOpTensorDescriptor) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) Nonnull(javax.annotation.Nonnull) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Nullable(javax.annotation.Nullable) Result(com.simiacryptus.mindseye.lang.Result) Nullable(javax.annotation.Nullable)

Example 2 with Result

use of com.simiacryptus.mindseye.lang.Result in project MindsEye by SimiaCryptus.

the class ImgBandBiasLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(@Nonnull final Result... inObj) {
    if (!CudaSystem.isEnabled())
        return getCompatibilityLayer().evalAndFree(inObj);
    if (inObj.length != 1) {
        throw new IllegalArgumentException("inObj.length=" + inObj.length);
    }
    Result input = inObj[0];
    final TensorList leftData = input.getData();
    @Nonnull final int[] inputDimensions = leftData.getDimensions();
    final int length = leftData.length();
    if (3 != inputDimensions.length) {
        throw new IllegalArgumentException("dimensions=" + Arrays.toString(inputDimensions));
    }
    // assert !right.isAlive();
    return new Result(CudaSystem.run(gpu -> {
        @Nonnull final CudaResource<cudnnOpTensorDescriptor> opDescriptor = gpu.newOpDescriptor(cudnnOpTensorOp.CUDNN_OP_TENSOR_ADD, precision);
        @Nonnull final CudaDevice.CudaTensorDescriptor outputDescriptor = gpu.newTensorDescriptor(precision, length, inputDimensions[2], inputDimensions[1], inputDimensions[0], inputDimensions[2] * inputDimensions[1] * inputDimensions[0], inputDimensions[1] * inputDimensions[0], inputDimensions[0], 1);
        @Nullable final CudaTensor inputTensor = gpu.getTensor(leftData, precision, MemoryType.Device, false);
        CudaMemory biasMem = gpu.allocate(bias.length() * precision.size, MemoryType.Device, true).write(precision, bias.getData());
        int[] biasDim = bias.getDimensions();
        CudaDevice.CudaTensorDescriptor biasDescriptor = gpu.newTensorDescriptor(precision, 1, biasDim[2], biasDim[1], biasDim[0], biasDim[2] * biasDim[1] * biasDim[0], biasDim[1] * biasDim[0], biasDim[0], 1);
        // assert lPtr.size == rPtr.size;
        @Nonnull final CudaMemory outputPtr = gpu.allocate((long) precision.size * outputDescriptor.nStride * length, MemoryType.Managed.normalize(), true);
        CudaMemory inputMemory = inputTensor.getMemory(gpu);
        CudaSystem.handle(gpu.cudnnOpTensor(opDescriptor.getPtr(), precision.getPointer(1.0), inputTensor.descriptor.getPtr(), inputMemory.getPtr(), precision.getPointer(1.0), biasDescriptor.getPtr(), biasMem.getPtr(), precision.getPointer(0.0), outputDescriptor.getPtr(), outputPtr.getPtr()));
        assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
        inputMemory.dirty();
        biasMem.dirty();
        outputPtr.dirty();
        inputMemory.freeRef();
        biasMem.freeRef();
        biasDescriptor.freeRef();
        inputTensor.freeRef();
        opDescriptor.freeRef();
        CudaTensor cudaTensor = CudaTensor.wrap(outputPtr, outputDescriptor, precision);
        return CudaTensorList.wrap(cudaTensor, length, inputDimensions, precision);
    }, leftData), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList delta) -> {
        if (!isFrozen()) {
            @Nonnull double[] biasDelta = CudaSystem.run(gpu -> {
                @Nullable final CudaTensor deltaTensor = gpu.getTensor(delta, precision, MemoryType.Device, false);
                CudaMemory biasMem = gpu.allocate(bias.length() * precision.size, MemoryType.Device, true).write(precision, bias.getData());
                int[] biasDim = bias.getDimensions();
                CudaDevice.CudaTensorDescriptor biasDescriptor = gpu.newTensorDescriptor(precision, 1, biasDim[2], biasDim[1], biasDim[0], biasDim[2] * biasDim[1] * biasDim[0], biasDim[1] * biasDim[0], biasDim[0], 1);
                CudaMemory deltaTensorMemory = deltaTensor.getMemory(gpu);
                gpu.cudnnConvolutionBackwardBias(precision.getPointer(1.0), deltaTensor.descriptor.getPtr(), deltaTensorMemory.getPtr(), precision.getPointer(0.0), biasDescriptor.getPtr(), biasMem.getPtr());
                assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
                biasMem.dirty();
                double[] biasV = new double[bias.length()];
                biasMem.read(precision, biasV);
                Stream.<ReferenceCounting>of(biasMem, deltaTensorMemory, deltaTensor, biasDescriptor).forEach(ReferenceCounting::freeRef);
                return biasV;
            }, delta);
            buffer.get(ImgBandBiasLayer.this, bias).addInPlace(biasDelta).freeRef();
        }
        if (input.isAlive()) {
            input.accumulate(buffer, delta);
        } else {
            delta.freeRef();
        }
    }) {

        @Override
        public final void accumulate(DeltaSet<Layer> buffer, TensorList delta) {
            getAccumulator().accept(buffer, delta);
        }

        @Override
        protected void _free() {
            leftData.freeRef();
            input.freeRef();
        }

        @Override
        public boolean isAlive() {
            for (@Nonnull final Result element : inObj) if (element.isAlive()) {
                return true;
            }
            return false;
        }
    };
}
Also used : JsonObject(com.google.gson.JsonObject) Arrays(java.util.Arrays) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) Tensor(com.simiacryptus.mindseye.lang.Tensor) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) Precision(com.simiacryptus.mindseye.lang.cudnn.Precision) Map(java.util.Map) Layer(com.simiacryptus.mindseye.lang.Layer) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) CudaResource(com.simiacryptus.mindseye.lang.cudnn.CudaResource) Util(com.simiacryptus.util.Util) IntToDoubleFunction(java.util.function.IntToDoubleFunction) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) jcuda.jcudnn.cudnnOpTensorOp(jcuda.jcudnn.cudnnOpTensorOp) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) FastRandom(com.simiacryptus.util.FastRandom) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) Stream(java.util.stream.Stream) CudaSystem(com.simiacryptus.mindseye.lang.cudnn.CudaSystem) TensorList(com.simiacryptus.mindseye.lang.TensorList) DoubleSupplier(java.util.function.DoubleSupplier) MemoryType(com.simiacryptus.mindseye.lang.cudnn.MemoryType) ProductInputsLayer(com.simiacryptus.mindseye.layers.java.ProductInputsLayer) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) jcuda.jcudnn.cudnnOpTensorDescriptor(jcuda.jcudnn.cudnnOpTensorDescriptor) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) Nonnull(javax.annotation.Nonnull) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) CudaResource(com.simiacryptus.mindseye.lang.cudnn.CudaResource) Nullable(javax.annotation.Nullable) Nullable(javax.annotation.Nullable)

Example 3 with Result

use of com.simiacryptus.mindseye.lang.Result in project MindsEye by SimiaCryptus.

the class ImgLinearSubnetLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(@Nonnull final Result... inObj) {
    assert 1 == inObj.length;
    Result input = inObj[0];
    TensorList inputData = input.getData();
    @Nonnull final int[] inputDims = inputData.getDimensions();
    assert 3 == inputDims.length;
    int length = inputData.length();
    int maxBand = legs.stream().mapToInt(x -> x.toBand).max().getAsInt();
    assert maxBand == inputDims[2] : maxBand + " != " + inputDims[2];
    assert IntStream.range(0, maxBand).allMatch(i -> 1 == legs.stream().filter(x -> x.fromBand <= i && x.toBand > i).count());
    CudaTensor passback = CudaSystem.run(gpu -> {
        return CudaTensor.wrap(gpu.allocate(inputData.getElements() * precision.size, MemoryType.Device, true), gpu.newTensorDescriptor(precision, length, inputDims[2], inputDims[1], inputDims[0]), precision);
    });
    try {
        AtomicInteger counter = new AtomicInteger(0);
        SumInputsLayer sumInputsLayer = new SumInputsLayer();
        try {
            Result[] legResults = legs.stream().map(leg -> {
                passback.addRef();
                ImgBandSelectLayer imgBandSelectLayer = new ImgBandSelectLayer(leg.fromBand, leg.toBand);
                input.addRef();
                TensorList legData = imgBandSelectLayer.eval(input).getDataAndFree();
                imgBandSelectLayer.freeRef();
                return leg.inner.evalAndFree(new Result(legData, (DeltaSet<Layer> ctx, TensorList delta) -> {
                    int[] outputDimensions = delta.getDimensions();
                    int[] inputDimensions = inputDims;
                    synchronized (passback) {
                        CudaSystem.run(gpu -> {
                            @Nonnull final CudaDevice.CudaTensorDescriptor viewDescriptor = gpu.newTensorDescriptor(// 
                            precision, // 
                            length, // 
                            outputDimensions[2], // 
                            outputDimensions[1], // 
                            outputDimensions[0], // 
                            inputDimensions[2] * inputDimensions[1] * inputDimensions[0], // 
                            inputDimensions[1] * inputDimensions[0], // 
                            inputDimensions[0], 1);
                            final int byteOffset = viewDescriptor.cStride * leg.fromBand * precision.size;
                            assert delta.length() == inputData.length();
                            assert passback.getDeviceId() == gpu.getDeviceId();
                            // assert error.stream().flatMapToDouble(x-> Arrays.stream(x.getData())).allMatch(Double::isFinite);
                            @Nullable final CudaTensor deltaTensor = gpu.getTensor(delta, precision, MemoryType.Device, true);
                            @Nonnull final CudaMemory passbackBuffer = passback.getMemory(gpu);
                            CudaMemory errorPtrMemory = deltaTensor.getMemory(gpu);
                            passbackBuffer.synchronize();
                            gpu.cudnnTransformTensor(precision.getPointer(1.0), deltaTensor.descriptor.getPtr(), errorPtrMemory.getPtr(), precision.getPointer(0.0), viewDescriptor.getPtr(), passbackBuffer.getPtr().withByteOffset(byteOffset));
                            errorPtrMemory.dirty();
                            passbackBuffer.dirty();
                            Stream.<ReferenceCounting>of(deltaTensor, viewDescriptor, passbackBuffer, errorPtrMemory).forEach(ReferenceCounting::freeRef);
                        }, passback);
                    }
                    if (counter.incrementAndGet() >= legs.size()) {
                        counter.set(0);
                        input.accumulate(ctx, CudaTensorList.create(passback, length, inputDims, precision));
                    }
                }) {

                    @Override
                    protected void _free() {
                        super._free();
                        input.freeRef();
                        passback.freeRef();
                    }
                });
            }).toArray(i -> new Result[i]);
            return sumInputsLayer.setParallel(parallel).setPrecision(precision).evalAndFree(legResults);
        } finally {
            sumInputsLayer.freeRef();
            input.freeRef();
        }
    } finally {
        passback.freeRef();
    }
}
Also used : IntStream(java.util.stream.IntStream) JsonObject(com.google.gson.JsonObject) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) LoggerFactory(org.slf4j.LoggerFactory) ReferenceCountingBase(com.simiacryptus.mindseye.lang.ReferenceCountingBase) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) ArrayList(java.util.ArrayList) Precision(com.simiacryptus.mindseye.lang.cudnn.Precision) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Layer(com.simiacryptus.mindseye.lang.Layer) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) Logger(org.slf4j.Logger) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) JsonArray(com.google.gson.JsonArray) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) Stream(java.util.stream.Stream) CudaSystem(com.simiacryptus.mindseye.lang.cudnn.CudaSystem) TensorList(com.simiacryptus.mindseye.lang.TensorList) MemoryType(com.simiacryptus.mindseye.lang.cudnn.MemoryType) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) Nonnull(javax.annotation.Nonnull) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Nullable(javax.annotation.Nullable) Nullable(javax.annotation.Nullable)

Example 4 with Result

use of com.simiacryptus.mindseye.lang.Result in project MindsEye by SimiaCryptus.

the class ConvolutionLayer method eval.

@Nonnull
@Override
public Result eval(@Nonnull final Result... inObj) {
    Arrays.stream(inObj).forEach(nnResult -> nnResult.addRef());
    final Result input = inObj[0];
    final TensorList batch = input.getData();
    batch.addRef();
    @Nonnull final int[] inputDims = batch.get(0).getDimensions();
    @Nonnull final int[] kernelDims = kernel.getDimensions();
    @Nullable final double[] kernelData = ConvolutionLayer.this.kernel.getData();
    @Nonnull final ConvolutionController convolutionController = new ConvolutionController(inputDims, kernelDims, paddingX, paddingY);
    final Tensor[] output = IntStream.range(0, batch.length()).mapToObj(dataIndex -> new Tensor(convolutionController.getOutputDims())).toArray(i -> new Tensor[i]);
    try {
        final double[][] inputBuffers = batch.stream().map(x -> {
            @Nullable double[] data = x.getData();
            x.detach();
            return data;
        }).toArray(i -> new double[i][]);
        final double[][] outputBuffers = Arrays.stream(output).map(x -> x.getData()).toArray(i -> new double[i][]);
        convolutionController.convolve(inputBuffers, kernelData, outputBuffers);
    } catch (@Nonnull final Throwable e) {
        throw new RuntimeException("Error mapCoords image res " + Arrays.toString(inputDims), e);
    }
    int outputLength = output.length;
    return new Result(TensorArray.wrap(output), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList error) -> {
        if (!isFrozen()) {
            final double[][] inputBuffers = batch.stream().map(x -> {
                @Nullable double[] data = x.getData();
                x.freeRef();
                return data;
            }).toArray(i -> new double[i][]);
            final double[][] outputBuffers = error.stream().map(x -> {
                @Nullable double[] data = x.getData();
                x.freeRef();
                return data;
            }).toArray(i -> new double[i][]);
            @Nonnull final Tensor weightGradient = new Tensor(kernelDims);
            convolutionController.gradient(inputBuffers, weightGradient.getData(), outputBuffers);
            buffer.get(ConvolutionLayer.this, kernelData).addInPlace(weightGradient.getData()).freeRef();
            weightGradient.freeRef();
        }
        if (input.isAlive()) {
            final Tensor[] inputBufferTensors = IntStream.range(0, outputLength).mapToObj(dataIndex -> new Tensor(inputDims)).toArray(i -> new Tensor[i]);
            final double[][] inputBuffers = Arrays.stream(inputBufferTensors).map(x -> {
                @Nullable double[] data = x.getData();
                return data;
            }).toArray(i -> new double[i][]);
            final double[][] outputBuffers = error.stream().map(x -> {
                @Nullable double[] data = x.getData();
                x.freeRef();
                return data;
            }).toArray(i -> new double[i][]);
            convolutionController.backprop(inputBuffers, kernelData, outputBuffers);
            @Nonnull TensorArray tensorArray = TensorArray.wrap(inputBufferTensors);
            input.accumulate(buffer, tensorArray);
        }
    }) {

        @Override
        protected void _free() {
            Arrays.stream(inObj).forEach(nnResult -> nnResult.freeRef());
            batch.freeRef();
        }

        @Override
        public boolean isAlive() {
            return input.isAlive() || !isFrozen();
        }
    };
}
Also used : IntStream(java.util.stream.IntStream) JsonObject(com.google.gson.JsonObject) Util(com.simiacryptus.util.Util) Coordinate(com.simiacryptus.mindseye.lang.Coordinate) Arrays(java.util.Arrays) Tensor(com.simiacryptus.mindseye.lang.Tensor) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) JsonElement(com.google.gson.JsonElement) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) ToDoubleFunction(java.util.function.ToDoubleFunction) TensorList(com.simiacryptus.mindseye.lang.TensorList) Map(java.util.Map) DoubleSupplier(java.util.function.DoubleSupplier) Layer(com.simiacryptus.mindseye.lang.Layer) TensorArray(com.simiacryptus.mindseye.lang.TensorArray) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) Tensor(com.simiacryptus.mindseye.lang.Tensor) Nonnull(javax.annotation.Nonnull) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) TensorArray(com.simiacryptus.mindseye.lang.TensorArray) Nullable(javax.annotation.Nullable) Nonnull(javax.annotation.Nonnull)

Example 5 with Result

use of com.simiacryptus.mindseye.lang.Result in project MindsEye by SimiaCryptus.

the class ActivationLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(@Nonnull final Result... inObj) {
    if (!CudaSystem.isEnabled())
        return getCompatibilityLayer().evalAndFree(inObj);
    // assert Arrays.stream(inObj).flatMapToDouble(input->input.data.stream().flatMapToDouble(x-> Arrays.stream(x.getData()))).allMatch(v->Double.isFinite(v));
    final Result inputResult = inObj[0];
    final TensorList inputData = inputResult.getData();
    @Nonnull final int[] inputSize = inputData.getDimensions();
    @Nonnull final int[] outputSize = inputSize;
    final int length = inputData.length();
    final int inputDims = Tensor.length(inputSize);
    try {
        final CudaTensor outPtr = CudaSystem.run(gpu -> {
            @Nullable final CudaTensor inputTensor = gpu.getTensor(inputData, precision, MemoryType.Device, false);
            final CudaTensor outputTensor;
            if (1 == inputData.currentRefCount() && 1 == inputTensor.currentRefCount() && (!inputResult.isAlive() || mode == Mode.RELU.id)) {
                inputTensor.addRef();
                outputTensor = inputTensor;
            } else {
                @Nonnull final CudaDevice.CudaTensorDescriptor outputDescriptor = gpu.newTensorDescriptor(precision, length, inputSize[2], inputSize[1], inputSize[0], inputSize[2] * inputSize[1] * inputSize[0], inputSize[1] * inputSize[0], inputSize[0], 1);
                @Nonnull final CudaMemory outputData = gpu.allocate((long) precision.size * inputDims * length, MemoryType.Managed.normalize(), true);
                outputTensor = CudaTensor.wrap(outputData, outputDescriptor, precision);
            }
            @Nonnull final CudaResource<cudnnActivationDescriptor> activationDesc = gpu.newActivationDescriptor(mode, cudnnNanPropagation.CUDNN_NOT_PROPAGATE_NAN, 0);
            try {
                CudaMemory memory = inputTensor.getMemory(gpu);
                CudaMemory tensorMemory = outputTensor.getMemory(gpu);
                CudaSystem.handle(gpu.cudnnActivationForward(activationDesc.getPtr(), precision.getPointer(1.0), inputTensor.descriptor.getPtr(), memory.getPtr(), precision.getPointer(0.0), outputTensor.descriptor.getPtr(), tensorMemory.getPtr()));
                assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
                memory.dirty();
                tensorMemory.dirty();
                tensorMemory.freeRef();
                memory.freeRef();
                return outputTensor;
            } catch (@Nonnull final Throwable e) {
                throw new ComponentException("Error apply " + Arrays.toString(inputSize), e);
            } finally {
                activationDesc.freeRef();
                inputTensor.freeRef();
            }
        }, inputData);
        return new Result(CudaTensorList.create(outPtr, length, outputSize, precision), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList delta) -> {
            if (inputResult.isAlive()) {
                final TensorList data = CudaSystem.run(gpu -> {
                    @Nullable CudaTensor inputTensor = gpu.getTensor(inputData, precision, MemoryType.Device, true);
                    @Nullable CudaTensor deltaTensor = gpu.getTensor(delta, precision, MemoryType.Device, true);
                    assert length == delta.length();
                    CudaTensor localOut = outPtr.getDense(gpu);
                    delta.freeRef();
                    CudaTensor passbackTensor;
                    // if (sameStrides(deltaTensor.descriptor, inputTensor.descriptor)) {
                    // passbackTensor = deltaTensor;
                    // passbackTensor.addRef();
                    // }
                    // else {
                    // passbackTensor = deltaTensor.getDense(gpu);
                    // inputTensor = inputTensor.getDenseAndFree(gpu);
                    // }
                    passbackTensor = CudaTensor.wrap(gpu.allocate((long) Tensor.length(inputSize) * length * precision.size, MemoryType.Managed.normalize(), false), gpu.newTensorDescriptor(precision, length, inputSize[2], inputSize[1], inputSize[0], inputSize[2] * inputSize[1] * inputSize[0], inputSize[1] * inputSize[0], inputSize[0], 1), precision);
                    @Nonnull final CudaResource<cudnnActivationDescriptor> activationDesc = gpu.newActivationDescriptor(mode, cudnnNanPropagation.CUDNN_NOT_PROPAGATE_NAN, 0);
                    try {
                        CudaMemory localOutMemory = localOut.getMemory(gpu);
                        CudaMemory deltaTensorMemory = deltaTensor.getMemory(gpu);
                        CudaMemory inputTensorMemory = inputTensor.getMemory(gpu);
                        CudaMemory passbackTensorMemory = passbackTensor.getMemory(gpu);
                        CudaSystem.handle(gpu.cudnnActivationBackward(activationDesc.getPtr(), precision.getPointer(1.0), localOut.descriptor.getPtr(), localOutMemory.getPtr(), deltaTensor.descriptor.getPtr(), deltaTensorMemory.getPtr(), inputTensor.descriptor.getPtr(), inputTensorMemory.getPtr(), precision.getPointer(0.0), passbackTensor.descriptor.getPtr(), passbackTensorMemory.getPtr()));
                        assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
                        localOutMemory.dirty();
                        deltaTensorMemory.dirty();
                        inputTensorMemory.dirty();
                        passbackTensorMemory.dirty();
                        localOutMemory.freeRef();
                        deltaTensorMemory.freeRef();
                        inputTensorMemory.freeRef();
                        passbackTensorMemory.freeRef();
                    } catch (@Nonnull final Throwable e) {
                        throw new ComponentException("Error apply " + Arrays.toString(inputSize), e);
                    } finally {
                        localOut.freeRef();
                        inputTensor.freeRef();
                        deltaTensor.freeRef();
                        activationDesc.freeRef();
                    }
                    return CudaTensorList.wrap(passbackTensor, length, inputSize, precision);
                }, delta);
                inputResult.accumulate(buffer, data);
            } else {
                delta.freeRef();
            }
        }) {

            @Override
            public final void accumulate(DeltaSet<Layer> buffer, TensorList delta) {
                getAccumulator().accept(buffer, delta);
            }

            @Override
            protected void _free() {
                inputData.freeRef();
                outPtr.freeRef();
                inputResult.freeRef();
            }

            @Override
            public boolean isAlive() {
                return inputResult.isAlive() || !isFrozen();
            }
        };
    } catch (@Nonnull final Throwable e) {
        throw new ComponentException("Error apply image res " + Arrays.toString(inputSize), e);
    }
}
Also used : CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) jcuda.jcudnn.cudnnActivationDescriptor(jcuda.jcudnn.cudnnActivationDescriptor) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) Nonnull(javax.annotation.Nonnull) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) ComponentException(com.simiacryptus.mindseye.lang.ComponentException) Nullable(javax.annotation.Nullable) Nullable(javax.annotation.Nullable)

Aggregations

Result (com.simiacryptus.mindseye.lang.Result)123 Nonnull (javax.annotation.Nonnull)120 Nullable (javax.annotation.Nullable)113 Layer (com.simiacryptus.mindseye.lang.Layer)101 TensorList (com.simiacryptus.mindseye.lang.TensorList)100 DeltaSet (com.simiacryptus.mindseye.lang.DeltaSet)96 Arrays (java.util.Arrays)94 Tensor (com.simiacryptus.mindseye.lang.Tensor)91 List (java.util.List)88 IntStream (java.util.stream.IntStream)80 Map (java.util.Map)77 JsonObject (com.google.gson.JsonObject)70 TensorArray (com.simiacryptus.mindseye.lang.TensorArray)70 DataSerializer (com.simiacryptus.mindseye.lang.DataSerializer)69 LayerBase (com.simiacryptus.mindseye.lang.LayerBase)65 Logger (org.slf4j.Logger)59 LoggerFactory (org.slf4j.LoggerFactory)59 ReferenceCounting (com.simiacryptus.mindseye.lang.ReferenceCounting)30 ConstantResult (com.simiacryptus.mindseye.lang.ConstantResult)25 Stream (java.util.stream.Stream)25