Search in sources :

Example 1 with Layer

use of com.simiacryptus.mindseye.lang.Layer in project MindsEye by SimiaCryptus.

the class GramianLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(final Result... inObj) {
    assert 1 == inObj.length;
    TensorList inputData = inObj[0].getData();
    int[] inputDimensions = inputData.getDimensions();
    assert 3 == inputDimensions.length;
    return new Result(CudaSystem.run(gpu -> {
        CudaTensor tensor = gpu.getTensor(inputData, precision, MemoryType.Device, false);
        CudaTensorList output = getOutput(gpu, tensor);
        tensor.freeRef();
        return output;
    }, inputData), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList delta) -> {
        @Nonnull final int[] outputDimensions = { 1, 1, inputDimensions[2] * inputDimensions[2] };
        if (!Arrays.equals(delta.getDimensions(), outputDimensions)) {
            throw new AssertionError(Arrays.toString(delta.getDimensions()) + " != " + Arrays.toString(outputDimensions));
        }
        if (inObj[0].isAlive()) {
            final TensorList passbackTensorList = CudaSystem.run(gpu -> {
                @Nullable final CudaTensor inputTensor = gpu.getTensor(inputData, precision, MemoryType.Device, false);
                CudaTensor deltaTensor = gpu.getTensor(delta, precision, MemoryType.Device, true);
                delta.freeRef();
                CudaTensorList feedback = getFeedback(gpu, inputTensor, deltaTensor);
                deltaTensor.freeRef();
                inputTensor.freeRef();
                return feedback;
            }, delta);
            inObj[0].accumulate(buffer, passbackTensorList);
        } else {
            delta.freeRef();
        }
    }) {

        @Override
        public final void accumulate(DeltaSet<Layer> buffer, TensorList delta) {
            getAccumulator().accept(buffer, delta);
        }

        @Override
        protected void _free() {
            inputData.freeRef();
            Arrays.stream(inObj).forEach(nnResult -> nnResult.freeRef());
        }

        @Override
        public boolean isAlive() {
            return Arrays.stream(inObj).anyMatch(x -> x.isAlive());
        }
    };
}
Also used : IntStream(java.util.stream.IntStream) JsonObject(com.google.gson.JsonObject) Arrays(java.util.Arrays) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) jcuda.jcudnn.cudnnReduceTensorDescriptor(jcuda.jcudnn.cudnnReduceTensorDescriptor) LoggerFactory(org.slf4j.LoggerFactory) jcuda.jcudnn.cudnnReduceTensorOp(jcuda.jcudnn.cudnnReduceTensorOp) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) Precision(com.simiacryptus.mindseye.lang.cudnn.Precision) CudnnHandle(com.simiacryptus.mindseye.lang.cudnn.CudnnHandle) Map(java.util.Map) Layer(com.simiacryptus.mindseye.lang.Layer) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) CudaResource(com.simiacryptus.mindseye.lang.cudnn.CudaResource) Logger(org.slf4j.Logger) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) jcuda.jcudnn.cudnnOpTensorOp(jcuda.jcudnn.cudnnOpTensorOp) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) jcuda.jcudnn.cudnnIndicesType(jcuda.jcudnn.cudnnIndicesType) jcuda.jcudnn.cudnnNanPropagation(jcuda.jcudnn.cudnnNanPropagation) jcuda.jcudnn.cudnnReduceTensorIndices(jcuda.jcudnn.cudnnReduceTensorIndices) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) CudaSystem(com.simiacryptus.mindseye.lang.cudnn.CudaSystem) TensorList(com.simiacryptus.mindseye.lang.TensorList) MemoryType(com.simiacryptus.mindseye.lang.cudnn.MemoryType) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) jcuda.jcudnn.cudnnOpTensorDescriptor(jcuda.jcudnn.cudnnOpTensorDescriptor) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) Nonnull(javax.annotation.Nonnull) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Nullable(javax.annotation.Nullable) Result(com.simiacryptus.mindseye.lang.Result) Nullable(javax.annotation.Nullable)

Example 2 with Layer

use of com.simiacryptus.mindseye.lang.Layer in project MindsEye by SimiaCryptus.

the class ImgBandBiasLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(@Nonnull final Result... inObj) {
    if (!CudaSystem.isEnabled())
        return getCompatibilityLayer().evalAndFree(inObj);
    if (inObj.length != 1) {
        throw new IllegalArgumentException("inObj.length=" + inObj.length);
    }
    Result input = inObj[0];
    final TensorList leftData = input.getData();
    @Nonnull final int[] inputDimensions = leftData.getDimensions();
    final int length = leftData.length();
    if (3 != inputDimensions.length) {
        throw new IllegalArgumentException("dimensions=" + Arrays.toString(inputDimensions));
    }
    // assert !right.isAlive();
    return new Result(CudaSystem.run(gpu -> {
        @Nonnull final CudaResource<cudnnOpTensorDescriptor> opDescriptor = gpu.newOpDescriptor(cudnnOpTensorOp.CUDNN_OP_TENSOR_ADD, precision);
        @Nonnull final CudaDevice.CudaTensorDescriptor outputDescriptor = gpu.newTensorDescriptor(precision, length, inputDimensions[2], inputDimensions[1], inputDimensions[0], inputDimensions[2] * inputDimensions[1] * inputDimensions[0], inputDimensions[1] * inputDimensions[0], inputDimensions[0], 1);
        @Nullable final CudaTensor inputTensor = gpu.getTensor(leftData, precision, MemoryType.Device, false);
        CudaMemory biasMem = gpu.allocate(bias.length() * precision.size, MemoryType.Device, true).write(precision, bias.getData());
        int[] biasDim = bias.getDimensions();
        CudaDevice.CudaTensorDescriptor biasDescriptor = gpu.newTensorDescriptor(precision, 1, biasDim[2], biasDim[1], biasDim[0], biasDim[2] * biasDim[1] * biasDim[0], biasDim[1] * biasDim[0], biasDim[0], 1);
        // assert lPtr.size == rPtr.size;
        @Nonnull final CudaMemory outputPtr = gpu.allocate((long) precision.size * outputDescriptor.nStride * length, MemoryType.Managed.normalize(), true);
        CudaMemory inputMemory = inputTensor.getMemory(gpu);
        CudaSystem.handle(gpu.cudnnOpTensor(opDescriptor.getPtr(), precision.getPointer(1.0), inputTensor.descriptor.getPtr(), inputMemory.getPtr(), precision.getPointer(1.0), biasDescriptor.getPtr(), biasMem.getPtr(), precision.getPointer(0.0), outputDescriptor.getPtr(), outputPtr.getPtr()));
        assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
        inputMemory.dirty();
        biasMem.dirty();
        outputPtr.dirty();
        inputMemory.freeRef();
        biasMem.freeRef();
        biasDescriptor.freeRef();
        inputTensor.freeRef();
        opDescriptor.freeRef();
        CudaTensor cudaTensor = CudaTensor.wrap(outputPtr, outputDescriptor, precision);
        return CudaTensorList.wrap(cudaTensor, length, inputDimensions, precision);
    }, leftData), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList delta) -> {
        if (!isFrozen()) {
            @Nonnull double[] biasDelta = CudaSystem.run(gpu -> {
                @Nullable final CudaTensor deltaTensor = gpu.getTensor(delta, precision, MemoryType.Device, false);
                CudaMemory biasMem = gpu.allocate(bias.length() * precision.size, MemoryType.Device, true).write(precision, bias.getData());
                int[] biasDim = bias.getDimensions();
                CudaDevice.CudaTensorDescriptor biasDescriptor = gpu.newTensorDescriptor(precision, 1, biasDim[2], biasDim[1], biasDim[0], biasDim[2] * biasDim[1] * biasDim[0], biasDim[1] * biasDim[0], biasDim[0], 1);
                CudaMemory deltaTensorMemory = deltaTensor.getMemory(gpu);
                gpu.cudnnConvolutionBackwardBias(precision.getPointer(1.0), deltaTensor.descriptor.getPtr(), deltaTensorMemory.getPtr(), precision.getPointer(0.0), biasDescriptor.getPtr(), biasMem.getPtr());
                assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
                biasMem.dirty();
                double[] biasV = new double[bias.length()];
                biasMem.read(precision, biasV);
                Stream.<ReferenceCounting>of(biasMem, deltaTensorMemory, deltaTensor, biasDescriptor).forEach(ReferenceCounting::freeRef);
                return biasV;
            }, delta);
            buffer.get(ImgBandBiasLayer.this, bias).addInPlace(biasDelta).freeRef();
        }
        if (input.isAlive()) {
            input.accumulate(buffer, delta);
        } else {
            delta.freeRef();
        }
    }) {

        @Override
        public final void accumulate(DeltaSet<Layer> buffer, TensorList delta) {
            getAccumulator().accept(buffer, delta);
        }

        @Override
        protected void _free() {
            leftData.freeRef();
            input.freeRef();
        }

        @Override
        public boolean isAlive() {
            for (@Nonnull final Result element : inObj) if (element.isAlive()) {
                return true;
            }
            return false;
        }
    };
}
Also used : JsonObject(com.google.gson.JsonObject) Arrays(java.util.Arrays) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) Tensor(com.simiacryptus.mindseye.lang.Tensor) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) Precision(com.simiacryptus.mindseye.lang.cudnn.Precision) Map(java.util.Map) Layer(com.simiacryptus.mindseye.lang.Layer) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) CudaResource(com.simiacryptus.mindseye.lang.cudnn.CudaResource) Util(com.simiacryptus.util.Util) IntToDoubleFunction(java.util.function.IntToDoubleFunction) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) jcuda.jcudnn.cudnnOpTensorOp(jcuda.jcudnn.cudnnOpTensorOp) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) FastRandom(com.simiacryptus.util.FastRandom) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) Stream(java.util.stream.Stream) CudaSystem(com.simiacryptus.mindseye.lang.cudnn.CudaSystem) TensorList(com.simiacryptus.mindseye.lang.TensorList) DoubleSupplier(java.util.function.DoubleSupplier) MemoryType(com.simiacryptus.mindseye.lang.cudnn.MemoryType) ProductInputsLayer(com.simiacryptus.mindseye.layers.java.ProductInputsLayer) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) jcuda.jcudnn.cudnnOpTensorDescriptor(jcuda.jcudnn.cudnnOpTensorDescriptor) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) Nonnull(javax.annotation.Nonnull) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) CudaResource(com.simiacryptus.mindseye.lang.cudnn.CudaResource) Nullable(javax.annotation.Nullable) Nullable(javax.annotation.Nullable)

Example 3 with Layer

use of com.simiacryptus.mindseye.lang.Layer in project MindsEye by SimiaCryptus.

the class ImgLinearSubnetLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(@Nonnull final Result... inObj) {
    assert 1 == inObj.length;
    Result input = inObj[0];
    TensorList inputData = input.getData();
    @Nonnull final int[] inputDims = inputData.getDimensions();
    assert 3 == inputDims.length;
    int length = inputData.length();
    int maxBand = legs.stream().mapToInt(x -> x.toBand).max().getAsInt();
    assert maxBand == inputDims[2] : maxBand + " != " + inputDims[2];
    assert IntStream.range(0, maxBand).allMatch(i -> 1 == legs.stream().filter(x -> x.fromBand <= i && x.toBand > i).count());
    CudaTensor passback = CudaSystem.run(gpu -> {
        return CudaTensor.wrap(gpu.allocate(inputData.getElements() * precision.size, MemoryType.Device, true), gpu.newTensorDescriptor(precision, length, inputDims[2], inputDims[1], inputDims[0]), precision);
    });
    try {
        AtomicInteger counter = new AtomicInteger(0);
        SumInputsLayer sumInputsLayer = new SumInputsLayer();
        try {
            Result[] legResults = legs.stream().map(leg -> {
                passback.addRef();
                ImgBandSelectLayer imgBandSelectLayer = new ImgBandSelectLayer(leg.fromBand, leg.toBand);
                input.addRef();
                TensorList legData = imgBandSelectLayer.eval(input).getDataAndFree();
                imgBandSelectLayer.freeRef();
                return leg.inner.evalAndFree(new Result(legData, (DeltaSet<Layer> ctx, TensorList delta) -> {
                    int[] outputDimensions = delta.getDimensions();
                    int[] inputDimensions = inputDims;
                    synchronized (passback) {
                        CudaSystem.run(gpu -> {
                            @Nonnull final CudaDevice.CudaTensorDescriptor viewDescriptor = gpu.newTensorDescriptor(// 
                            precision, // 
                            length, // 
                            outputDimensions[2], // 
                            outputDimensions[1], // 
                            outputDimensions[0], // 
                            inputDimensions[2] * inputDimensions[1] * inputDimensions[0], // 
                            inputDimensions[1] * inputDimensions[0], // 
                            inputDimensions[0], 1);
                            final int byteOffset = viewDescriptor.cStride * leg.fromBand * precision.size;
                            assert delta.length() == inputData.length();
                            assert passback.getDeviceId() == gpu.getDeviceId();
                            // assert error.stream().flatMapToDouble(x-> Arrays.stream(x.getData())).allMatch(Double::isFinite);
                            @Nullable final CudaTensor deltaTensor = gpu.getTensor(delta, precision, MemoryType.Device, true);
                            @Nonnull final CudaMemory passbackBuffer = passback.getMemory(gpu);
                            CudaMemory errorPtrMemory = deltaTensor.getMemory(gpu);
                            passbackBuffer.synchronize();
                            gpu.cudnnTransformTensor(precision.getPointer(1.0), deltaTensor.descriptor.getPtr(), errorPtrMemory.getPtr(), precision.getPointer(0.0), viewDescriptor.getPtr(), passbackBuffer.getPtr().withByteOffset(byteOffset));
                            errorPtrMemory.dirty();
                            passbackBuffer.dirty();
                            Stream.<ReferenceCounting>of(deltaTensor, viewDescriptor, passbackBuffer, errorPtrMemory).forEach(ReferenceCounting::freeRef);
                        }, passback);
                    }
                    if (counter.incrementAndGet() >= legs.size()) {
                        counter.set(0);
                        input.accumulate(ctx, CudaTensorList.create(passback, length, inputDims, precision));
                    }
                }) {

                    @Override
                    protected void _free() {
                        super._free();
                        input.freeRef();
                        passback.freeRef();
                    }
                });
            }).toArray(i -> new Result[i]);
            return sumInputsLayer.setParallel(parallel).setPrecision(precision).evalAndFree(legResults);
        } finally {
            sumInputsLayer.freeRef();
            input.freeRef();
        }
    } finally {
        passback.freeRef();
    }
}
Also used : IntStream(java.util.stream.IntStream) JsonObject(com.google.gson.JsonObject) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) LoggerFactory(org.slf4j.LoggerFactory) ReferenceCountingBase(com.simiacryptus.mindseye.lang.ReferenceCountingBase) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) ArrayList(java.util.ArrayList) Precision(com.simiacryptus.mindseye.lang.cudnn.Precision) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Layer(com.simiacryptus.mindseye.lang.Layer) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) Logger(org.slf4j.Logger) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) JsonArray(com.google.gson.JsonArray) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) Stream(java.util.stream.Stream) CudaSystem(com.simiacryptus.mindseye.lang.cudnn.CudaSystem) TensorList(com.simiacryptus.mindseye.lang.TensorList) MemoryType(com.simiacryptus.mindseye.lang.cudnn.MemoryType) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) Nonnull(javax.annotation.Nonnull) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Nullable(javax.annotation.Nullable) Nullable(javax.annotation.Nullable)

Example 4 with Layer

use of com.simiacryptus.mindseye.lang.Layer in project MindsEye by SimiaCryptus.

the class ConvolutionLayer method eval.

@Nonnull
@Override
public Result eval(@Nonnull final Result... inObj) {
    Arrays.stream(inObj).forEach(nnResult -> nnResult.addRef());
    final Result input = inObj[0];
    final TensorList batch = input.getData();
    batch.addRef();
    @Nonnull final int[] inputDims = batch.get(0).getDimensions();
    @Nonnull final int[] kernelDims = kernel.getDimensions();
    @Nullable final double[] kernelData = ConvolutionLayer.this.kernel.getData();
    @Nonnull final ConvolutionController convolutionController = new ConvolutionController(inputDims, kernelDims, paddingX, paddingY);
    final Tensor[] output = IntStream.range(0, batch.length()).mapToObj(dataIndex -> new Tensor(convolutionController.getOutputDims())).toArray(i -> new Tensor[i]);
    try {
        final double[][] inputBuffers = batch.stream().map(x -> {
            @Nullable double[] data = x.getData();
            x.detach();
            return data;
        }).toArray(i -> new double[i][]);
        final double[][] outputBuffers = Arrays.stream(output).map(x -> x.getData()).toArray(i -> new double[i][]);
        convolutionController.convolve(inputBuffers, kernelData, outputBuffers);
    } catch (@Nonnull final Throwable e) {
        throw new RuntimeException("Error mapCoords image res " + Arrays.toString(inputDims), e);
    }
    int outputLength = output.length;
    return new Result(TensorArray.wrap(output), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList error) -> {
        if (!isFrozen()) {
            final double[][] inputBuffers = batch.stream().map(x -> {
                @Nullable double[] data = x.getData();
                x.freeRef();
                return data;
            }).toArray(i -> new double[i][]);
            final double[][] outputBuffers = error.stream().map(x -> {
                @Nullable double[] data = x.getData();
                x.freeRef();
                return data;
            }).toArray(i -> new double[i][]);
            @Nonnull final Tensor weightGradient = new Tensor(kernelDims);
            convolutionController.gradient(inputBuffers, weightGradient.getData(), outputBuffers);
            buffer.get(ConvolutionLayer.this, kernelData).addInPlace(weightGradient.getData()).freeRef();
            weightGradient.freeRef();
        }
        if (input.isAlive()) {
            final Tensor[] inputBufferTensors = IntStream.range(0, outputLength).mapToObj(dataIndex -> new Tensor(inputDims)).toArray(i -> new Tensor[i]);
            final double[][] inputBuffers = Arrays.stream(inputBufferTensors).map(x -> {
                @Nullable double[] data = x.getData();
                return data;
            }).toArray(i -> new double[i][]);
            final double[][] outputBuffers = error.stream().map(x -> {
                @Nullable double[] data = x.getData();
                x.freeRef();
                return data;
            }).toArray(i -> new double[i][]);
            convolutionController.backprop(inputBuffers, kernelData, outputBuffers);
            @Nonnull TensorArray tensorArray = TensorArray.wrap(inputBufferTensors);
            input.accumulate(buffer, tensorArray);
        }
    }) {

        @Override
        protected void _free() {
            Arrays.stream(inObj).forEach(nnResult -> nnResult.freeRef());
            batch.freeRef();
        }

        @Override
        public boolean isAlive() {
            return input.isAlive() || !isFrozen();
        }
    };
}
Also used : IntStream(java.util.stream.IntStream) JsonObject(com.google.gson.JsonObject) Util(com.simiacryptus.util.Util) Coordinate(com.simiacryptus.mindseye.lang.Coordinate) Arrays(java.util.Arrays) Tensor(com.simiacryptus.mindseye.lang.Tensor) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) JsonElement(com.google.gson.JsonElement) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) ToDoubleFunction(java.util.function.ToDoubleFunction) TensorList(com.simiacryptus.mindseye.lang.TensorList) Map(java.util.Map) DoubleSupplier(java.util.function.DoubleSupplier) Layer(com.simiacryptus.mindseye.lang.Layer) TensorArray(com.simiacryptus.mindseye.lang.TensorArray) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) Tensor(com.simiacryptus.mindseye.lang.Tensor) Nonnull(javax.annotation.Nonnull) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) TensorArray(com.simiacryptus.mindseye.lang.TensorArray) Nullable(javax.annotation.Nullable) Nonnull(javax.annotation.Nonnull)

Example 5 with Layer

use of com.simiacryptus.mindseye.lang.Layer in project MindsEye by SimiaCryptus.

the class BandAvgReducerLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(final Result... inObj) {
    if (!CudaSystem.isEnabled())
        return getCompatibilityLayer().evalAndFree(inObj);
    final Result input = inObj[0];
    TensorList inputData = input.getData();
    @Nonnull final int[] inputSize = inputData.getDimensions();
    int length = inputData.length();
    final int bands = inputSize[2];
    CudaTensorList result = CudaSystem.run(gpu -> {
        CudaTensor inputTensor = gpu.getTensor(inputData, precision, MemoryType.Device, false);
        @Nonnull final CudaDevice.CudaTensorDescriptor outputDescriptor = gpu.newTensorDescriptor(precision, length, bands, 1, 1);
        long size = (long) precision.size * outputDescriptor.nStride * length;
        @Nonnull final CudaMemory outputPtr = gpu.allocate(size, MemoryType.Managed, true);
        CudaResource<cudnnReduceTensorDescriptor> reduceTensorDescriptor = gpu.cudnnCreateReduceTensorDescriptor(cudnnReduceTensorOp.CUDNN_REDUCE_TENSOR_AVG, precision.code, cudnnNanPropagation.CUDNN_NOT_PROPAGATE_NAN, cudnnReduceTensorIndices.CUDNN_REDUCE_TENSOR_NO_INDICES, cudnnIndicesType.CUDNN_32BIT_INDICES);
        CudaMemory inputMemory = inputTensor.getMemory(gpu);
        @Nonnull final CudaMemory workspacePtr = gpu.allocate(inputMemory.size, MemoryType.Device, true);
        @Nonnull final CudaMemory indexPtr = gpu.allocate(12 * length, MemoryType.Device, false);
        gpu.cudnnReduceTensor(reduceTensorDescriptor.getPtr(), indexPtr.getPtr(), indexPtr.size, workspacePtr.getPtr(), workspacePtr.size, precision.getPointer(alpha), inputTensor.descriptor.getPtr(), inputMemory.getPtr(), precision.getPointer(0.0), outputDescriptor.getPtr(), outputPtr.getPtr());
        outputPtr.dirty();
        inputMemory.dirty();
        Stream.of(inputMemory, inputTensor, reduceTensorDescriptor, workspacePtr, indexPtr, inputData).forEach(ReferenceCounting::freeRef);
        return CudaTensorList.wrap(CudaTensor.wrap(outputPtr, outputDescriptor, precision), length, new int[] { 1, 1, bands }, precision);
    });
    int pixels = inputSize[0] * inputSize[1];
    return new Result(result, (DeltaSet<Layer> ctx, TensorList delta) -> {
        TensorList passback;
        passback = TensorArray.wrap(delta.stream().map(x -> {
            Tensor tensor = new Tensor(inputSize[0], inputSize[1], inputSize[2]).setByCoord(c -> x.get(c.getCoords()[2]) * alpha / pixels);
            x.freeRef();
            return tensor;
        }).toArray(i -> new Tensor[i]));
        // passback = CudaSystem.run(gpu -> {
        // CudaTensor deltaTensor = gpu.getTensor(delta, precision, MemoryType.Device, true);
        // @Nonnull final CudaDevice.CudaTensorDescriptor outputDescriptor = gpu.newTensorDescriptor(precision,
        // length, inputSize[2], inputSize[1], inputSize[0]);
        // @Nonnull final CudaMemory outputPtr = gpu.allocate((long) precision.size * outputDescriptor.nStride * length, MemoryType.Device, true);
        // CudaMemory deltaMemory = deltaTensor.getMemory(gpu);
        // @Nonnull final CudaDevice.CudaTensorDescriptor inputDescriptor = gpu.newTensorDescriptor(precision,
        // 1, 1, inputSize[1], inputSize[0]);
        // for(int batch=0;batch<length;batch++){
        // Tensor tensor = delta.get(batch);
        // for(int band=0;band<bands;band++){
        // int i = batch * bands + band;
        // CudaMemory img = outputPtr.withByteOffset(precision.size * i * outputDescriptor.cStride);
        // CudaMemory val = deltaMemory.withByteOffset(precision.size * i);
        // gpu.cudnnSetTensor(inputDescriptor.getPtr(), img.getPtr(), precision.getPointer(tensor.get(band) / outputDescriptor.cStride));
        // img.freeRef();
        // val.freeRef();
        // outputPtr.dirty().synchronize();
        // }
        // }
        // Stream.of(deltaMemory, deltaTensor, inputDescriptor).forEach(ReferenceCounting::freeRef);
        // return CudaTensorList.wrap(CudaTensor.wrap(outputPtr, outputDescriptor, precision), length, inputSize, precision);
        // });
        input.accumulate(ctx, passback);
    }) {

        @Override
        protected void _free() {
            super._free();
            input.freeRef();
        }
    };
}
Also used : JsonObject(com.google.gson.JsonObject) Arrays(java.util.Arrays) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) jcuda.jcudnn.cudnnReduceTensorDescriptor(jcuda.jcudnn.cudnnReduceTensorDescriptor) Tensor(com.simiacryptus.mindseye.lang.Tensor) jcuda.jcudnn.cudnnReduceTensorOp(jcuda.jcudnn.cudnnReduceTensorOp) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) Precision(com.simiacryptus.mindseye.lang.cudnn.Precision) Map(java.util.Map) Layer(com.simiacryptus.mindseye.lang.Layer) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) CudaResource(com.simiacryptus.mindseye.lang.cudnn.CudaResource) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) jcuda.jcudnn.cudnnIndicesType(jcuda.jcudnn.cudnnIndicesType) jcuda.jcudnn.cudnnNanPropagation(jcuda.jcudnn.cudnnNanPropagation) jcuda.jcudnn.cudnnReduceTensorIndices(jcuda.jcudnn.cudnnReduceTensorIndices) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) Stream(java.util.stream.Stream) CudaSystem(com.simiacryptus.mindseye.lang.cudnn.CudaSystem) TensorList(com.simiacryptus.mindseye.lang.TensorList) MemoryType(com.simiacryptus.mindseye.lang.cudnn.MemoryType) TensorArray(com.simiacryptus.mindseye.lang.TensorArray) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) Tensor(com.simiacryptus.mindseye.lang.Tensor) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) Nonnull(javax.annotation.Nonnull) jcuda.jcudnn.cudnnReduceTensorDescriptor(jcuda.jcudnn.cudnnReduceTensorDescriptor) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) Nullable(javax.annotation.Nullable)

Aggregations

Layer (com.simiacryptus.mindseye.lang.Layer)167 Nonnull (javax.annotation.Nonnull)159 Nullable (javax.annotation.Nullable)128 Arrays (java.util.Arrays)117 Tensor (com.simiacryptus.mindseye.lang.Tensor)116 List (java.util.List)108 Result (com.simiacryptus.mindseye.lang.Result)103 IntStream (java.util.stream.IntStream)98 DeltaSet (com.simiacryptus.mindseye.lang.DeltaSet)95 TensorList (com.simiacryptus.mindseye.lang.TensorList)93 Map (java.util.Map)83 TensorArray (com.simiacryptus.mindseye.lang.TensorArray)76 Logger (org.slf4j.Logger)76 LoggerFactory (org.slf4j.LoggerFactory)76 JsonObject (com.google.gson.JsonObject)70 DataSerializer (com.simiacryptus.mindseye.lang.DataSerializer)66 LayerBase (com.simiacryptus.mindseye.lang.LayerBase)64 NotebookOutput (com.simiacryptus.util.io.NotebookOutput)51 Collectors (java.util.stream.Collectors)42 Stream (java.util.stream.Stream)37