Search in sources :

Example 6 with TensorList

use of com.simiacryptus.mindseye.lang.TensorList in project MindsEye by SimiaCryptus.

the class ActivationLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(@Nonnull final Result... inObj) {
    if (!CudaSystem.isEnabled())
        return getCompatibilityLayer().evalAndFree(inObj);
    // assert Arrays.stream(inObj).flatMapToDouble(input->input.data.stream().flatMapToDouble(x-> Arrays.stream(x.getData()))).allMatch(v->Double.isFinite(v));
    final Result inputResult = inObj[0];
    final TensorList inputData = inputResult.getData();
    @Nonnull final int[] inputSize = inputData.getDimensions();
    @Nonnull final int[] outputSize = inputSize;
    final int length = inputData.length();
    final int inputDims = Tensor.length(inputSize);
    try {
        final CudaTensor outPtr = CudaSystem.run(gpu -> {
            @Nullable final CudaTensor inputTensor = gpu.getTensor(inputData, precision, MemoryType.Device, false);
            final CudaTensor outputTensor;
            if (1 == inputData.currentRefCount() && 1 == inputTensor.currentRefCount() && (!inputResult.isAlive() || mode == Mode.RELU.id)) {
                inputTensor.addRef();
                outputTensor = inputTensor;
            } else {
                @Nonnull final CudaDevice.CudaTensorDescriptor outputDescriptor = gpu.newTensorDescriptor(precision, length, inputSize[2], inputSize[1], inputSize[0], inputSize[2] * inputSize[1] * inputSize[0], inputSize[1] * inputSize[0], inputSize[0], 1);
                @Nonnull final CudaMemory outputData = gpu.allocate((long) precision.size * inputDims * length, MemoryType.Managed.normalize(), true);
                outputTensor = CudaTensor.wrap(outputData, outputDescriptor, precision);
            }
            @Nonnull final CudaResource<cudnnActivationDescriptor> activationDesc = gpu.newActivationDescriptor(mode, cudnnNanPropagation.CUDNN_NOT_PROPAGATE_NAN, 0);
            try {
                CudaMemory memory = inputTensor.getMemory(gpu);
                CudaMemory tensorMemory = outputTensor.getMemory(gpu);
                CudaSystem.handle(gpu.cudnnActivationForward(activationDesc.getPtr(), precision.getPointer(1.0), inputTensor.descriptor.getPtr(), memory.getPtr(), precision.getPointer(0.0), outputTensor.descriptor.getPtr(), tensorMemory.getPtr()));
                assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
                memory.dirty();
                tensorMemory.dirty();
                tensorMemory.freeRef();
                memory.freeRef();
                return outputTensor;
            } catch (@Nonnull final Throwable e) {
                throw new ComponentException("Error apply " + Arrays.toString(inputSize), e);
            } finally {
                activationDesc.freeRef();
                inputTensor.freeRef();
            }
        }, inputData);
        return new Result(CudaTensorList.create(outPtr, length, outputSize, precision), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList delta) -> {
            if (inputResult.isAlive()) {
                final TensorList data = CudaSystem.run(gpu -> {
                    @Nullable CudaTensor inputTensor = gpu.getTensor(inputData, precision, MemoryType.Device, true);
                    @Nullable CudaTensor deltaTensor = gpu.getTensor(delta, precision, MemoryType.Device, true);
                    assert length == delta.length();
                    CudaTensor localOut = outPtr.getDense(gpu);
                    delta.freeRef();
                    CudaTensor passbackTensor;
                    // if (sameStrides(deltaTensor.descriptor, inputTensor.descriptor)) {
                    // passbackTensor = deltaTensor;
                    // passbackTensor.addRef();
                    // }
                    // else {
                    // passbackTensor = deltaTensor.getDense(gpu);
                    // inputTensor = inputTensor.getDenseAndFree(gpu);
                    // }
                    passbackTensor = CudaTensor.wrap(gpu.allocate((long) Tensor.length(inputSize) * length * precision.size, MemoryType.Managed.normalize(), false), gpu.newTensorDescriptor(precision, length, inputSize[2], inputSize[1], inputSize[0], inputSize[2] * inputSize[1] * inputSize[0], inputSize[1] * inputSize[0], inputSize[0], 1), precision);
                    @Nonnull final CudaResource<cudnnActivationDescriptor> activationDesc = gpu.newActivationDescriptor(mode, cudnnNanPropagation.CUDNN_NOT_PROPAGATE_NAN, 0);
                    try {
                        CudaMemory localOutMemory = localOut.getMemory(gpu);
                        CudaMemory deltaTensorMemory = deltaTensor.getMemory(gpu);
                        CudaMemory inputTensorMemory = inputTensor.getMemory(gpu);
                        CudaMemory passbackTensorMemory = passbackTensor.getMemory(gpu);
                        CudaSystem.handle(gpu.cudnnActivationBackward(activationDesc.getPtr(), precision.getPointer(1.0), localOut.descriptor.getPtr(), localOutMemory.getPtr(), deltaTensor.descriptor.getPtr(), deltaTensorMemory.getPtr(), inputTensor.descriptor.getPtr(), inputTensorMemory.getPtr(), precision.getPointer(0.0), passbackTensor.descriptor.getPtr(), passbackTensorMemory.getPtr()));
                        assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
                        localOutMemory.dirty();
                        deltaTensorMemory.dirty();
                        inputTensorMemory.dirty();
                        passbackTensorMemory.dirty();
                        localOutMemory.freeRef();
                        deltaTensorMemory.freeRef();
                        inputTensorMemory.freeRef();
                        passbackTensorMemory.freeRef();
                    } catch (@Nonnull final Throwable e) {
                        throw new ComponentException("Error apply " + Arrays.toString(inputSize), e);
                    } finally {
                        localOut.freeRef();
                        inputTensor.freeRef();
                        deltaTensor.freeRef();
                        activationDesc.freeRef();
                    }
                    return CudaTensorList.wrap(passbackTensor, length, inputSize, precision);
                }, delta);
                inputResult.accumulate(buffer, data);
            } else {
                delta.freeRef();
            }
        }) {

            @Override
            public final void accumulate(DeltaSet<Layer> buffer, TensorList delta) {
                getAccumulator().accept(buffer, delta);
            }

            @Override
            protected void _free() {
                inputData.freeRef();
                outPtr.freeRef();
                inputResult.freeRef();
            }

            @Override
            public boolean isAlive() {
                return inputResult.isAlive() || !isFrozen();
            }
        };
    } catch (@Nonnull final Throwable e) {
        throw new ComponentException("Error apply image res " + Arrays.toString(inputSize), e);
    }
}
Also used : CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) jcuda.jcudnn.cudnnActivationDescriptor(jcuda.jcudnn.cudnnActivationDescriptor) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) Nonnull(javax.annotation.Nonnull) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) ComponentException(com.simiacryptus.mindseye.lang.ComponentException) Nullable(javax.annotation.Nullable) Nullable(javax.annotation.Nullable)

Example 7 with TensorList

use of com.simiacryptus.mindseye.lang.TensorList in project MindsEye by SimiaCryptus.

the class BandAvgReducerLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(final Result... inObj) {
    if (!CudaSystem.isEnabled())
        return getCompatibilityLayer().evalAndFree(inObj);
    final Result input = inObj[0];
    TensorList inputData = input.getData();
    @Nonnull final int[] inputSize = inputData.getDimensions();
    int length = inputData.length();
    final int bands = inputSize[2];
    CudaTensorList result = CudaSystem.run(gpu -> {
        CudaTensor inputTensor = gpu.getTensor(inputData, precision, MemoryType.Device, false);
        @Nonnull final CudaDevice.CudaTensorDescriptor outputDescriptor = gpu.newTensorDescriptor(precision, length, bands, 1, 1);
        long size = (long) precision.size * outputDescriptor.nStride * length;
        @Nonnull final CudaMemory outputPtr = gpu.allocate(size, MemoryType.Managed, true);
        CudaResource<cudnnReduceTensorDescriptor> reduceTensorDescriptor = gpu.cudnnCreateReduceTensorDescriptor(cudnnReduceTensorOp.CUDNN_REDUCE_TENSOR_AVG, precision.code, cudnnNanPropagation.CUDNN_NOT_PROPAGATE_NAN, cudnnReduceTensorIndices.CUDNN_REDUCE_TENSOR_NO_INDICES, cudnnIndicesType.CUDNN_32BIT_INDICES);
        CudaMemory inputMemory = inputTensor.getMemory(gpu);
        @Nonnull final CudaMemory workspacePtr = gpu.allocate(inputMemory.size, MemoryType.Device, true);
        @Nonnull final CudaMemory indexPtr = gpu.allocate(12 * length, MemoryType.Device, false);
        gpu.cudnnReduceTensor(reduceTensorDescriptor.getPtr(), indexPtr.getPtr(), indexPtr.size, workspacePtr.getPtr(), workspacePtr.size, precision.getPointer(alpha), inputTensor.descriptor.getPtr(), inputMemory.getPtr(), precision.getPointer(0.0), outputDescriptor.getPtr(), outputPtr.getPtr());
        outputPtr.dirty();
        inputMemory.dirty();
        Stream.of(inputMemory, inputTensor, reduceTensorDescriptor, workspacePtr, indexPtr, inputData).forEach(ReferenceCounting::freeRef);
        return CudaTensorList.wrap(CudaTensor.wrap(outputPtr, outputDescriptor, precision), length, new int[] { 1, 1, bands }, precision);
    });
    int pixels = inputSize[0] * inputSize[1];
    return new Result(result, (DeltaSet<Layer> ctx, TensorList delta) -> {
        TensorList passback;
        passback = TensorArray.wrap(delta.stream().map(x -> {
            Tensor tensor = new Tensor(inputSize[0], inputSize[1], inputSize[2]).setByCoord(c -> x.get(c.getCoords()[2]) * alpha / pixels);
            x.freeRef();
            return tensor;
        }).toArray(i -> new Tensor[i]));
        // passback = CudaSystem.run(gpu -> {
        // CudaTensor deltaTensor = gpu.getTensor(delta, precision, MemoryType.Device, true);
        // @Nonnull final CudaDevice.CudaTensorDescriptor outputDescriptor = gpu.newTensorDescriptor(precision,
        // length, inputSize[2], inputSize[1], inputSize[0]);
        // @Nonnull final CudaMemory outputPtr = gpu.allocate((long) precision.size * outputDescriptor.nStride * length, MemoryType.Device, true);
        // CudaMemory deltaMemory = deltaTensor.getMemory(gpu);
        // @Nonnull final CudaDevice.CudaTensorDescriptor inputDescriptor = gpu.newTensorDescriptor(precision,
        // 1, 1, inputSize[1], inputSize[0]);
        // for(int batch=0;batch<length;batch++){
        // Tensor tensor = delta.get(batch);
        // for(int band=0;band<bands;band++){
        // int i = batch * bands + band;
        // CudaMemory img = outputPtr.withByteOffset(precision.size * i * outputDescriptor.cStride);
        // CudaMemory val = deltaMemory.withByteOffset(precision.size * i);
        // gpu.cudnnSetTensor(inputDescriptor.getPtr(), img.getPtr(), precision.getPointer(tensor.get(band) / outputDescriptor.cStride));
        // img.freeRef();
        // val.freeRef();
        // outputPtr.dirty().synchronize();
        // }
        // }
        // Stream.of(deltaMemory, deltaTensor, inputDescriptor).forEach(ReferenceCounting::freeRef);
        // return CudaTensorList.wrap(CudaTensor.wrap(outputPtr, outputDescriptor, precision), length, inputSize, precision);
        // });
        input.accumulate(ctx, passback);
    }) {

        @Override
        protected void _free() {
            super._free();
            input.freeRef();
        }
    };
}
Also used : JsonObject(com.google.gson.JsonObject) Arrays(java.util.Arrays) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) jcuda.jcudnn.cudnnReduceTensorDescriptor(jcuda.jcudnn.cudnnReduceTensorDescriptor) Tensor(com.simiacryptus.mindseye.lang.Tensor) jcuda.jcudnn.cudnnReduceTensorOp(jcuda.jcudnn.cudnnReduceTensorOp) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) Precision(com.simiacryptus.mindseye.lang.cudnn.Precision) Map(java.util.Map) Layer(com.simiacryptus.mindseye.lang.Layer) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) CudaResource(com.simiacryptus.mindseye.lang.cudnn.CudaResource) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) jcuda.jcudnn.cudnnIndicesType(jcuda.jcudnn.cudnnIndicesType) jcuda.jcudnn.cudnnNanPropagation(jcuda.jcudnn.cudnnNanPropagation) jcuda.jcudnn.cudnnReduceTensorIndices(jcuda.jcudnn.cudnnReduceTensorIndices) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) Stream(java.util.stream.Stream) CudaSystem(com.simiacryptus.mindseye.lang.cudnn.CudaSystem) TensorList(com.simiacryptus.mindseye.lang.TensorList) MemoryType(com.simiacryptus.mindseye.lang.cudnn.MemoryType) TensorArray(com.simiacryptus.mindseye.lang.TensorArray) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) Tensor(com.simiacryptus.mindseye.lang.Tensor) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) Nonnull(javax.annotation.Nonnull) jcuda.jcudnn.cudnnReduceTensorDescriptor(jcuda.jcudnn.cudnnReduceTensorDescriptor) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) Nullable(javax.annotation.Nullable)

Example 8 with TensorList

use of com.simiacryptus.mindseye.lang.TensorList in project MindsEye by SimiaCryptus.

the class BinarySumLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(@Nonnull final Result... inObj) {
    if (inObj.length == 1) {
        if (rightFactor != 1)
            throw new IllegalStateException();
        if (leftFactor != 1)
            throw new IllegalStateException();
        return inObj[0];
    }
    if (inObj.length > 2) {
        if (rightFactor != 1)
            throw new IllegalStateException();
        if (leftFactor != 1)
            throw new IllegalStateException();
        return Arrays.stream(inObj).reduce((a, b) -> evalAndFree(a, b)).get();
    }
    assert (inObj.length == 2);
    final TensorList leftData = inObj[0].getData();
    final TensorList rightData = inObj[1].getData();
    int[] leftDimensions = leftData.getDimensions();
    if (3 < leftDimensions.length) {
        throw new IllegalArgumentException("dimensions=" + Arrays.toString(leftDimensions));
    }
    @Nonnull final int[] dimensions = { leftDimensions.length < 1 ? 0 : leftDimensions[0], leftDimensions.length < 2 ? 1 : leftDimensions[1], leftDimensions.length < 3 ? 1 : leftDimensions[2] };
    final int length = leftData.length();
    if (length != rightData.length())
        throw new IllegalArgumentException();
    if (3 != dimensions.length) {
        throw new IllegalArgumentException("dimensions=" + Arrays.toString(dimensions));
    }
    for (int i = 1; i < inObj.length; i++) {
        if (Tensor.length(dimensions) != Tensor.length(inObj[i].getData().getDimensions())) {
            throw new IllegalArgumentException(Arrays.toString(dimensions) + " != " + Arrays.toString(inObj[i].getData().getDimensions()));
        }
    }
    if (!CudaSystem.isEnabled())
        return getCompatibilityLayer().evalAndFree(inObj);
    return new Result(CudaSystem.run(gpu -> {
        @Nonnull final CudaResource<cudnnOpTensorDescriptor> opDescriptor = gpu.newOpDescriptor(cudnnOpTensorOp.CUDNN_OP_TENSOR_ADD, precision);
        @Nonnull final CudaDevice.CudaTensorDescriptor outputDescriptor = gpu.newTensorDescriptor(precision, length, dimensions[2], dimensions[1], dimensions[0], dimensions[2] * dimensions[1] * dimensions[0], dimensions[1] * dimensions[0], dimensions[0], 1);
        // .getDenseAndFree(gpu);//.moveTo(gpu.getDeviceNumber());
        @Nullable final CudaTensor lPtr = gpu.getTensor(leftData, precision, MemoryType.Device, false);
        // .getDenseAndFree(gpu);//.moveTo(gpu.getDeviceNumber());
        @Nullable final CudaTensor rPtr = gpu.getTensor(rightData, precision, MemoryType.Device, false);
        @Nonnull final CudaMemory outputPtr = gpu.allocate(precision.size * Tensor.length(dimensions) * length, MemoryType.Managed, true);
        CudaMemory lPtrMemory = lPtr.getMemory(gpu);
        CudaMemory rPtrMemory = rPtr.getMemory(gpu);
        gpu.cudnnOpTensor(opDescriptor.getPtr(), precision.getPointer(leftFactor), lPtr.descriptor.getPtr(), lPtrMemory.getPtr(), precision.getPointer(rightFactor), rPtr.descriptor.getPtr(), rPtrMemory.getPtr(), precision.getPointer(0.0), outputDescriptor.getPtr(), outputPtr.getPtr());
        assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
        lPtrMemory.dirty();
        rPtrMemory.dirty();
        outputPtr.dirty();
        rPtrMemory.freeRef();
        lPtrMemory.freeRef();
        CudaTensor cudaTensor = CudaTensor.wrap(outputPtr, outputDescriptor, precision);
        Stream.<ReferenceCounting>of(opDescriptor, lPtr, rPtr).forEach(ReferenceCounting::freeRef);
        return CudaTensorList.wrap(cudaTensor, length, dimensions, precision);
    }, leftData), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList delta) -> {
        Runnable a = () -> {
            if (inObj[0].isAlive()) {
                CudaTensorList tensorList = CudaSystem.run(gpu -> {
                    @Nullable final CudaTensor lPtr = gpu.getTensor(delta, precision, MemoryType.Device, false);
                    @Nonnull final CudaMemory passbackPtr = gpu.allocate(precision.size * Tensor.length(dimensions) * length, MemoryType.Managed.normalize(), true);
                    @Nonnull final CudaDevice.CudaTensorDescriptor passbackDescriptor = gpu.newTensorDescriptor(precision, length, dimensions[2], dimensions[1], dimensions[0], dimensions[2] * dimensions[1] * dimensions[0], dimensions[1] * dimensions[0], dimensions[0], 1);
                    CudaMemory lPtrMemory = lPtr.getMemory(gpu);
                    gpu.cudnnTransformTensor(precision.getPointer(leftFactor), lPtr.descriptor.getPtr(), lPtrMemory.getPtr(), precision.getPointer(0.0), passbackDescriptor.getPtr(), passbackPtr.getPtr());
                    assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
                    passbackPtr.dirty();
                    lPtrMemory.dirty();
                    lPtrMemory.freeRef();
                    CudaTensor cudaTensor = CudaTensor.wrap(passbackPtr, passbackDescriptor, precision);
                    lPtr.freeRef();
                    return CudaTensorList.wrap(cudaTensor, length, dimensions, precision);
                }, delta);
                inObj[0].accumulate(buffer, tensorList);
            }
        };
        Runnable b = () -> {
            if (inObj[1].isAlive()) {
                CudaTensorList tensorList = CudaSystem.run(gpu -> {
                    @Nullable final CudaTensor lPtr = gpu.getTensor(delta, precision, MemoryType.Device, false);
                    @Nonnull final CudaMemory outputPtr = gpu.allocate(precision.size * Tensor.length(dimensions) * length, MemoryType.Managed.normalize(), true);
                    @Nonnull final CudaDevice.CudaTensorDescriptor passbackDescriptor = gpu.newTensorDescriptor(precision, length, dimensions[2], dimensions[1], dimensions[0], dimensions[2] * dimensions[1] * dimensions[0], dimensions[1] * dimensions[0], dimensions[0], 1);
                    CudaMemory lPtrMemory = lPtr.getMemory(gpu);
                    gpu.cudnnTransformTensor(precision.getPointer(rightFactor), lPtr.descriptor.getPtr(), lPtrMemory.getPtr(), precision.getPointer(0.0), passbackDescriptor.getPtr(), outputPtr.getPtr());
                    outputPtr.dirty();
                    lPtrMemory.dirty();
                    lPtrMemory.freeRef();
                    CudaTensor cudaTensor = CudaTensor.wrap(outputPtr, passbackDescriptor, precision);
                    lPtr.freeRef();
                    return CudaTensorList.wrap(cudaTensor, length, dimensions, precision);
                }, delta);
                inObj[1].accumulate(buffer, tensorList);
            }
        };
        if (CoreSettings.INSTANCE.isSingleThreaded())
            TestUtil.runAllSerial(a, b);
        else
            TestUtil.runAllParallel(a, b);
    }) {

        @Override
        protected void _free() {
            Arrays.stream(inObj).forEach(x -> x.freeRef());
            leftData.freeRef();
            rightData.freeRef();
        }

        @Override
        public boolean isAlive() {
            for (@Nonnull final Result element : inObj) if (element.isAlive()) {
                return true;
            }
            return false;
        }
    };
}
Also used : PipelineNetwork(com.simiacryptus.mindseye.network.PipelineNetwork) JsonObject(com.google.gson.JsonObject) Arrays(java.util.Arrays) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) Tensor(com.simiacryptus.mindseye.lang.Tensor) SumInputsLayer(com.simiacryptus.mindseye.layers.java.SumInputsLayer) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) Precision(com.simiacryptus.mindseye.lang.cudnn.Precision) Map(java.util.Map) Layer(com.simiacryptus.mindseye.lang.Layer) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) CudaResource(com.simiacryptus.mindseye.lang.cudnn.CudaResource) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) jcuda.jcudnn.cudnnOpTensorOp(jcuda.jcudnn.cudnnOpTensorOp) TestUtil(com.simiacryptus.mindseye.test.TestUtil) CoreSettings(com.simiacryptus.mindseye.lang.CoreSettings) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) Stream(java.util.stream.Stream) CudaSystem(com.simiacryptus.mindseye.lang.cudnn.CudaSystem) TensorList(com.simiacryptus.mindseye.lang.TensorList) LinearActivationLayer(com.simiacryptus.mindseye.layers.java.LinearActivationLayer) MemoryType(com.simiacryptus.mindseye.lang.cudnn.MemoryType) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) jcuda.jcudnn.cudnnOpTensorDescriptor(jcuda.jcudnn.cudnnOpTensorDescriptor) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) Nonnull(javax.annotation.Nonnull) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) CudaResource(com.simiacryptus.mindseye.lang.cudnn.CudaResource) Nullable(javax.annotation.Nullable)

Example 9 with TensorList

use of com.simiacryptus.mindseye.lang.TensorList in project MindsEye by SimiaCryptus.

the class MaxImageBandLayer method eval.

@Nonnull
@Override
public Result eval(@Nonnull final Result... inObj) {
    assert 1 == inObj.length;
    final TensorList inputData = inObj[0].getData();
    inputData.addRef();
    inputData.length();
    @Nonnull final int[] inputDims = inputData.getDimensions();
    assert 3 == inputDims.length;
    Arrays.stream(inObj).forEach(nnResult -> nnResult.addRef());
    final Coordinate[][] maxCoords = inputData.stream().map(data -> {
        Coordinate[] coordinates = IntStream.range(0, inputDims[2]).mapToObj(band -> {
            return data.coordStream(true).filter(e -> e.getCoords()[2] == band).max(Comparator.comparing(c -> data.get(c))).get();
        }).toArray(i -> new Coordinate[i]);
        data.freeRef();
        return coordinates;
    }).toArray(i -> new Coordinate[i][]);
    return new Result(TensorArray.wrap(IntStream.range(0, inputData.length()).mapToObj(dataIndex -> {
        Tensor tensor = inputData.get(dataIndex);
        final DoubleStream doubleStream = IntStream.range(0, inputDims[2]).mapToDouble(band -> {
            final int[] maxCoord = maxCoords[dataIndex][band].getCoords();
            double v = tensor.get(maxCoord[0], maxCoord[1], band);
            return v;
        });
        Tensor tensor1 = new Tensor(1, 1, inputDims[2]).set(Tensor.getDoubles(doubleStream, inputDims[2]));
        tensor.freeRef();
        return tensor1;
    }).toArray(i -> new Tensor[i])), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList delta) -> {
        if (inObj[0].isAlive()) {
            @Nonnull TensorArray tensorArray = TensorArray.wrap(IntStream.range(0, delta.length()).parallel().mapToObj(dataIndex -> {
                Tensor deltaTensor = delta.get(dataIndex);
                @Nonnull final Tensor passback = new Tensor(inputData.getDimensions());
                IntStream.range(0, inputDims[2]).forEach(b -> {
                    final int[] maxCoord = maxCoords[dataIndex][b].getCoords();
                    passback.set(new int[] { maxCoord[0], maxCoord[1], b }, deltaTensor.get(0, 0, b));
                });
                deltaTensor.freeRef();
                return passback;
            }).toArray(i -> new Tensor[i]));
            inObj[0].accumulate(buffer, tensorArray);
        }
    }) {

        @Override
        protected void _free() {
            Arrays.stream(inObj).forEach(nnResult -> nnResult.freeRef());
            inputData.freeRef();
        }

        @Override
        public boolean isAlive() {
            return inObj[0].isAlive();
        }
    };
}
Also used : IntStream(java.util.stream.IntStream) JsonObject(com.google.gson.JsonObject) Coordinate(com.simiacryptus.mindseye.lang.Coordinate) Arrays(java.util.Arrays) Logger(org.slf4j.Logger) LoggerFactory(org.slf4j.LoggerFactory) Tensor(com.simiacryptus.mindseye.lang.Tensor) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) DoubleStream(java.util.stream.DoubleStream) JsonUtil(com.simiacryptus.util.io.JsonUtil) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) TensorList(com.simiacryptus.mindseye.lang.TensorList) Map(java.util.Map) Layer(com.simiacryptus.mindseye.lang.Layer) TensorArray(com.simiacryptus.mindseye.lang.TensorArray) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) Comparator(java.util.Comparator) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) Tensor(com.simiacryptus.mindseye.lang.Tensor) Nonnull(javax.annotation.Nonnull) Coordinate(com.simiacryptus.mindseye.lang.Coordinate) TensorArray(com.simiacryptus.mindseye.lang.TensorArray) DoubleStream(java.util.stream.DoubleStream) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) Nonnull(javax.annotation.Nonnull)

Example 10 with TensorList

use of com.simiacryptus.mindseye.lang.TensorList in project MindsEye by SimiaCryptus.

the class MaxPoolingLayer method eval.

@Nonnull
@Override
public Result eval(@Nonnull final Result... inObj) {
    Arrays.stream(inObj).forEach(nnResult -> nnResult.addRef());
    final Result in = inObj[0];
    in.getData().length();
    @Nonnull final int[] inputDims = in.getData().getDimensions();
    final List<Tuple2<Integer, int[]>> regions = MaxPoolingLayer.calcRegionsCache.apply(new MaxPoolingLayer.CalcRegionsParameter(inputDims, kernelDims));
    final Tensor[] outputA = IntStream.range(0, in.getData().length()).mapToObj(dataIndex -> {
        final int[] newDims = IntStream.range(0, inputDims.length).map(i -> {
            return (int) Math.ceil(inputDims[i] * 1.0 / kernelDims[i]);
        }).toArray();
        @Nonnull final Tensor output = new Tensor(newDims);
        return output;
    }).toArray(i -> new Tensor[i]);
    Arrays.stream(outputA).mapToInt(x -> x.length()).sum();
    @Nonnull final int[][] gradientMapA = new int[in.getData().length()][];
    IntStream.range(0, in.getData().length()).forEach(dataIndex -> {
        @Nullable final Tensor input = in.getData().get(dataIndex);
        final Tensor output = outputA[dataIndex];
        @Nonnull final IntToDoubleFunction keyExtractor = inputCoords -> input.get(inputCoords);
        @Nonnull final int[] gradientMap = new int[input.length()];
        regions.parallelStream().forEach(tuple -> {
            final Integer from = tuple.getFirst();
            final int[] toList = tuple.getSecond();
            int toMax = -1;
            double bestValue = Double.NEGATIVE_INFINITY;
            for (final int c : toList) {
                final double value = keyExtractor.applyAsDouble(c);
                if (-1 == toMax || bestValue < value) {
                    bestValue = value;
                    toMax = c;
                }
            }
            gradientMap[from] = toMax;
            output.set(from, input.get(toMax));
        });
        input.freeRef();
        gradientMapA[dataIndex] = gradientMap;
    });
    return new Result(TensorArray.wrap(outputA), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList data) -> {
        if (in.isAlive()) {
            @Nonnull TensorArray tensorArray = TensorArray.wrap(IntStream.range(0, in.getData().length()).parallel().mapToObj(dataIndex -> {
                @Nonnull final Tensor backSignal = new Tensor(inputDims);
                final int[] ints = gradientMapA[dataIndex];
                @Nullable final Tensor datum = data.get(dataIndex);
                for (int i = 0; i < datum.length(); i++) {
                    backSignal.add(ints[i], datum.get(i));
                }
                datum.freeRef();
                return backSignal;
            }).toArray(i -> new Tensor[i]));
            in.accumulate(buffer, tensorArray);
        }
    }) {

        @Override
        protected void _free() {
            Arrays.stream(inObj).forEach(nnResult -> nnResult.freeRef());
        }

        @Override
        public boolean isAlive() {
            return in.isAlive();
        }
    };
}
Also used : IntStream(java.util.stream.IntStream) JsonObject(com.google.gson.JsonObject) Util(com.simiacryptus.util.Util) Arrays(java.util.Arrays) Logger(org.slf4j.Logger) IntToDoubleFunction(java.util.function.IntToDoubleFunction) LoggerFactory(org.slf4j.LoggerFactory) Tensor(com.simiacryptus.mindseye.lang.Tensor) Result(com.simiacryptus.mindseye.lang.Result) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) JsonUtil(com.simiacryptus.util.io.JsonUtil) Tuple2(com.simiacryptus.util.lang.Tuple2) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) TensorList(com.simiacryptus.mindseye.lang.TensorList) Map(java.util.Map) Layer(com.simiacryptus.mindseye.lang.Layer) TensorArray(com.simiacryptus.mindseye.lang.TensorArray) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) Tensor(com.simiacryptus.mindseye.lang.Tensor) Nonnull(javax.annotation.Nonnull) IntToDoubleFunction(java.util.function.IntToDoubleFunction) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) Tuple2(com.simiacryptus.util.lang.Tuple2) TensorArray(com.simiacryptus.mindseye.lang.TensorArray) Nullable(javax.annotation.Nullable) Nonnull(javax.annotation.Nonnull)

Aggregations

TensorList (com.simiacryptus.mindseye.lang.TensorList)110 Nonnull (javax.annotation.Nonnull)109 Nullable (javax.annotation.Nullable)103 Result (com.simiacryptus.mindseye.lang.Result)95 Arrays (java.util.Arrays)93 Layer (com.simiacryptus.mindseye.lang.Layer)91 Tensor (com.simiacryptus.mindseye.lang.Tensor)88 DeltaSet (com.simiacryptus.mindseye.lang.DeltaSet)87 IntStream (java.util.stream.IntStream)82 List (java.util.List)80 TensorArray (com.simiacryptus.mindseye.lang.TensorArray)76 Map (java.util.Map)68 JsonObject (com.google.gson.JsonObject)64 DataSerializer (com.simiacryptus.mindseye.lang.DataSerializer)63 LayerBase (com.simiacryptus.mindseye.lang.LayerBase)61 Logger (org.slf4j.Logger)57 LoggerFactory (org.slf4j.LoggerFactory)57 ReferenceCounting (com.simiacryptus.mindseye.lang.ReferenceCounting)33 CudaTensor (com.simiacryptus.mindseye.lang.cudnn.CudaTensor)30 CudaTensorList (com.simiacryptus.mindseye.lang.cudnn.CudaTensorList)30