Examples with DeltaSet - com.simiacryptus.mindseye.lang.DeltaSet

Example 46 with DeltaSet

use of com.simiacryptus.mindseye.lang.DeltaSet in project MindsEye by SimiaCryptus.

the class SimpleActivationLayer method eval.

@Nonnull
@Override
public Result eval(@Nonnull final Result... inObj) {
    final TensorList indata0 = inObj[0].getData();
    final int itemCnt = indata0.length();
    assert 0 < itemCnt;
    Arrays.stream(inObj).forEach(nnResult -> nnResult.addRef());
    Arrays.stream(inObj).forEach(nnResult -> nnResult.getData().addRef());
    @Nonnull final Tensor[] inputGradientA = new Tensor[itemCnt];
    return new Result(TensorArray.wrap(IntStream.range(0, itemCnt).parallel().mapToObj(dataIndex -> {
        @Nullable final Tensor input = indata0.get(dataIndex);
        @Nonnull final Tensor output = new Tensor(indata0.getDimensions());
        @Nonnull final Tensor inputGradient = new Tensor(input.length());
        inputGradientA[dataIndex] = inputGradient;
        @Nonnull final double[] results = new double[2];
        for (int i = 0; i < input.length(); i++) {
            eval(input.getData()[i], results);
            inputGradient.set(i, results[1]);
            output.set(i, results[0]);
        }
        input.freeRef();
        return output;
    }).toArray(i -> new Tensor[i])), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList data) -> {
        if (inObj[0].isAlive()) {
            @Nonnull TensorArray tensorArray = TensorArray.wrap(IntStream.range(0, itemCnt).parallel().mapToObj(dataIndex -> {
                @Nonnull final Tensor passback = new Tensor(data.getDimensions());
                @Nullable final double[] gradientData = inputGradientA[dataIndex].getData();
                @Nullable Tensor tensor = data.get(dataIndex);
                IntStream.range(0, passback.length()).forEach(i -> {
                    final double v = gradientData[i];
                    if (Double.isFinite(v)) {
                        passback.set(i, tensor.get(i) * v);
                    }
                });
                tensor.freeRef();
                return passback;
            }).toArray(i -> new Tensor[i]));
            inObj[0].accumulate(buffer, tensorArray);
        }
    }) {

        @Override
        protected void _free() {
            Arrays.stream(inObj).forEach(nnResult -> nnResult.freeRef());
            Arrays.stream(inObj).forEach(nnResult -> nnResult.getData().freeRef());
            for (@Nonnull Tensor tensor : inputGradientA) {
                tensor.freeRef();
            }
        }

        @Override
        public boolean isAlive() {
            return inObj[0].isAlive();
        }
    };
}

Also used : IntStream(java.util.stream.IntStream) JsonObject(com.google.gson.JsonObject) Arrays(java.util.Arrays) Logger(org.slf4j.Logger) LoggerFactory(org.slf4j.LoggerFactory) Tensor(com.simiacryptus.mindseye.lang.Tensor) Result(com.simiacryptus.mindseye.lang.Result) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) TensorList(com.simiacryptus.mindseye.lang.TensorList) Layer(com.simiacryptus.mindseye.lang.Layer) TensorArray(com.simiacryptus.mindseye.lang.TensorArray) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) Tensor(com.simiacryptus.mindseye.lang.Tensor) Nonnull(javax.annotation.Nonnull) TensorArray(com.simiacryptus.mindseye.lang.TensorArray) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) TensorList(com.simiacryptus.mindseye.lang.TensorList) Nullable(javax.annotation.Nullable) Result(com.simiacryptus.mindseye.lang.Result) Nonnull(javax.annotation.Nonnull)

Example 47 with DeltaSet

use of com.simiacryptus.mindseye.lang.DeltaSet in project MindsEye by SimiaCryptus.

the class SumInputsLayer method eval.

@Nonnull
@Override
public Result eval(@Nonnull final Result... inObj) {
    Arrays.stream(inObj).forEach(nnResult -> nnResult.addRef());
    Arrays.stream(inObj).forEach(x -> x.getData().addRef());
    return new Result(Arrays.stream(inObj).parallel().map(x -> {
        TensorList data = x.getData();
        data.addRef();
        return data;
    }).reduce((l, r) -> {
        assert l.length() == r.length() || 1 == l.length() || 1 == r.length();
        @Nonnull TensorArray sum = TensorArray.wrap(IntStream.range(0, l.length()).parallel().mapToObj(i -> {
            @Nullable final Tensor left = l.get(1 == l.length() ? 0 : i);
            @Nullable final Tensor right = r.get(1 == r.length() ? 0 : i);
            @Nullable Tensor tensor;
            if (right.length() == 1) {
                tensor = left.mapParallel(v -> v + right.get(0));
            } else {
                tensor = left.reduceParallel(right, (v1, v2) -> v1 + v2);
            }
            left.freeRef();
            right.freeRef();
            return tensor;
        }).toArray(i -> new Tensor[i]));
        l.freeRef();
        r.freeRef();
        return sum;
    }).get(), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList delta) -> {
        for (@Nonnull final Result input : inObj) {
            if (input.isAlive()) {
                @Nonnull TensorList projectedDelta = delta;
                if (1 < projectedDelta.length() && input.getData().length() == 1) {
                    projectedDelta = TensorArray.wrap(projectedDelta.stream().parallel().reduce((a, b) -> {
                        @Nullable Tensor c = a.addAndFree(b);
                        b.freeRef();
                        return c;
                    }).get());
                } else {
                    projectedDelta.addRef();
                }
                if (1 < Tensor.length(projectedDelta.getDimensions()) && Tensor.length(input.getData().getDimensions()) == 1) {
                    Tensor[] data = projectedDelta.stream().map(t -> new Tensor(new double[] { t.sum() })).toArray(i -> new Tensor[i]);
                    @Nonnull TensorArray data2 = TensorArray.wrap(data);
                    projectedDelta.freeRef();
                    projectedDelta = data2;
                }
                input.accumulate(buffer, projectedDelta);
            }
        }
    }) {

        @Override
        protected void _free() {
            Arrays.stream(inObj).forEach(nnResult -> nnResult.freeRef());
            Arrays.stream(inObj).forEach(x -> x.getData().freeRef());
        }

        @Override
        public boolean isAlive() {
            for (@Nonnull final Result element : inObj) if (element.isAlive()) {
                return true;
            }
            return false;
        }
    };
}

Also used : IntStream(java.util.stream.IntStream) JsonObject(com.google.gson.JsonObject) Arrays(java.util.Arrays) Tensor(com.simiacryptus.mindseye.lang.Tensor) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) TensorList(com.simiacryptus.mindseye.lang.TensorList) Map(java.util.Map) Layer(com.simiacryptus.mindseye.lang.Layer) TensorArray(com.simiacryptus.mindseye.lang.TensorArray) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) Tensor(com.simiacryptus.mindseye.lang.Tensor) Nonnull(javax.annotation.Nonnull) TensorArray(com.simiacryptus.mindseye.lang.TensorArray) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) TensorList(com.simiacryptus.mindseye.lang.TensorList) Nullable(javax.annotation.Nullable) Result(com.simiacryptus.mindseye.lang.Result) Nonnull(javax.annotation.Nonnull)

Example 48 with DeltaSet

use of com.simiacryptus.mindseye.lang.DeltaSet in project MindsEye by SimiaCryptus.

the class GateBiasLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(@Nonnull final Result... inObj) {
    if (!CudaSystem.isEnabled())
        return getCompatibilityLayer().evalAndFree(inObj);
    if (inObj.length != 2) {
        throw new IllegalArgumentException("inObj.length=" + inObj.length);
    }
    Result left = inObj[0];
    Result right = inObj[1];
    final TensorList leftData = left.getData();
    final TensorList rightData = right.getData();
    @Nonnull final int[] leftDimensions = leftData.getDimensions();
    @Nonnull final int[] rightDimensions = rightData.getDimensions();
    final int length = leftData.length();
    if (3 != leftDimensions.length) {
        throw new IllegalArgumentException("dimensions=" + Arrays.toString(leftDimensions));
    }
    return new Result(CudaSystem.run(gpu -> {
        @Nonnull final CudaResource<cudnnOpTensorDescriptor> opDescriptor = gpu.newOpDescriptor(cudnnOpTensorOp.CUDNN_OP_TENSOR_ADD, precision);
        @Nonnull final CudaDevice.CudaTensorDescriptor outputDescriptor = gpu.newTensorDescriptor(precision, length, leftDimensions[2], leftDimensions[1], leftDimensions[0], leftDimensions[2] * leftDimensions[1] * leftDimensions[0], leftDimensions[1] * leftDimensions[0], leftDimensions[0], 1);
        @Nullable final CudaTensor lPtr = gpu.getTensor(leftData, precision, MemoryType.Device, false);
        @Nullable final CudaTensor rPtr = gpu.getTensor(rightData, precision, MemoryType.Device, false);
        // assert lPtr.size == rPtr.size;
        @Nonnull final CudaMemory outputPtr = gpu.allocate((long) precision.size * outputDescriptor.nStride * length, MemoryType.Device, true);
        CudaMemory lPtrMemory = lPtr.getMemory(gpu);
        CudaMemory rPtrMemory = rPtr.getMemory(gpu);
        CudaSystem.handle(gpu.cudnnOpTensor(opDescriptor.getPtr(), precision.getPointer(1.0), lPtr.descriptor.getPtr(), lPtrMemory.getPtr(), precision.getPointer(1.0), rPtr.descriptor.getPtr(), rPtrMemory.getPtr(), precision.getPointer(0.0), outputDescriptor.getPtr(), outputPtr.getPtr()));
        assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
        lPtrMemory.dirty();
        rPtrMemory.dirty();
        outputPtr.dirty();
        lPtrMemory.freeRef();
        rPtrMemory.freeRef();
        rPtr.freeRef();
        lPtr.freeRef();
        opDescriptor.freeRef();
        CudaTensor cudaTensor = CudaTensor.wrap(outputPtr, outputDescriptor, precision);
        return CudaTensorList.wrap(cudaTensor, length, leftDimensions, precision);
    }, leftData), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList delta) -> {
        if (left.isAlive()) {
            delta.addRef();
            left.accumulate(buffer, delta);
        }
        if (right.isAlive()) {
            @Nonnull TensorList data = CudaSystem.run(gpu -> {
                // assert deltaTensor.size == rightTensor.size;
                if (Arrays.equals(rightDimensions, leftDimensions) && length == rightData.length()) {
                    assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
                    delta.addRef();
                    return delta;
                } else {
                    @Nonnull final CudaDevice.CudaTensorDescriptor reducedOutputDescriptor = gpu.newTensorDescriptor(precision, rightData.length(), rightDimensions[2], rightDimensions[1], rightDimensions[0], rightDimensions[2] * rightDimensions[1] * rightDimensions[0], rightDimensions[1] * rightDimensions[0], rightDimensions[0], 1);
                    long size = (long) precision.size * reducedOutputDescriptor.nStride * rightData.length();
                    @Nonnull final CudaMemory reducedOutputPtr = gpu.allocate(size, MemoryType.Managed, true);
                    CudaResource<cudnnReduceTensorDescriptor> reduceTensorDescriptor = gpu.cudnnCreateReduceTensorDescriptor(cudnnReduceTensorOp.CUDNN_REDUCE_TENSOR_ADD, precision.code, cudnnNanPropagation.CUDNN_NOT_PROPAGATE_NAN, cudnnReduceTensorIndices.CUDNN_REDUCE_TENSOR_NO_INDICES, cudnnIndicesType.CUDNN_32BIT_INDICES);
                    @Nullable final CudaTensor deltaTensor = gpu.getTensor(delta, precision, MemoryType.Device, false);
                    CudaMemory deltaTensorMemory = deltaTensor.getMemory(gpu);
                    @Nonnull final CudaMemory workspacePtr = gpu.allocate(deltaTensorMemory.size, MemoryType.Device, true);
                    @Nonnull final CudaMemory indexPtr = gpu.allocate(12 * delta.length(), MemoryType.Device, false);
                    delta.freeRef();
                    // outputPtr.synchronize();
                    gpu.cudnnReduceTensor(reduceTensorDescriptor.getPtr(), indexPtr.getPtr(), indexPtr.size, workspacePtr.getPtr(), workspacePtr.size, precision.getPointer(1.0), deltaTensor.descriptor.getPtr(), deltaTensorMemory.getPtr(), precision.getPointer(0.0), reducedOutputDescriptor.getPtr(), reducedOutputPtr.getPtr());
                    reducedOutputPtr.dirty();
                    deltaTensorMemory.dirty();
                    Stream.of(deltaTensorMemory, deltaTensor, reduceTensorDescriptor, workspacePtr, indexPtr).forEach(ReferenceCounting::freeRef);
                    return CudaTensorList.wrap(CudaTensor.wrap(reducedOutputPtr, reducedOutputDescriptor, precision), rightData.length(), rightDimensions, precision);
                }
            }, delta);
            right.accumulate(buffer, data);
        } else {
            delta.freeRef();
        }
    }) {

        @Override
        public final void accumulate(DeltaSet<Layer> buffer, TensorList delta) {
            getAccumulator().accept(buffer, delta);
        }

        @Override
        protected void _free() {
            leftData.freeRef();
            rightData.freeRef();
            left.freeRef();
            right.freeRef();
        }

        @Override
        public boolean isAlive() {
            for (@Nonnull final Result element : inObj) if (element.isAlive()) {
                return true;
            }
            return false;
        }
    };
}

Also used : JsonObject(com.google.gson.JsonObject) Arrays(java.util.Arrays) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) jcuda.jcudnn.cudnnReduceTensorDescriptor(jcuda.jcudnn.cudnnReduceTensorDescriptor) jcuda.jcudnn.cudnnReduceTensorOp(jcuda.jcudnn.cudnnReduceTensorOp) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) Precision(com.simiacryptus.mindseye.lang.cudnn.Precision) Map(java.util.Map) Layer(com.simiacryptus.mindseye.lang.Layer) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) CudaResource(com.simiacryptus.mindseye.lang.cudnn.CudaResource) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) jcuda.jcudnn.cudnnOpTensorOp(jcuda.jcudnn.cudnnOpTensorOp) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) jcuda.jcudnn.cudnnIndicesType(jcuda.jcudnn.cudnnIndicesType) jcuda.jcudnn.cudnnNanPropagation(jcuda.jcudnn.cudnnNanPropagation) jcuda.jcudnn.cudnnReduceTensorIndices(jcuda.jcudnn.cudnnReduceTensorIndices) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) Stream(java.util.stream.Stream) CudaSystem(com.simiacryptus.mindseye.lang.cudnn.CudaSystem) TensorList(com.simiacryptus.mindseye.lang.TensorList) MemoryType(com.simiacryptus.mindseye.lang.cudnn.MemoryType) ProductInputsLayer(com.simiacryptus.mindseye.layers.java.ProductInputsLayer) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) jcuda.jcudnn.cudnnOpTensorDescriptor(jcuda.jcudnn.cudnnOpTensorDescriptor) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) Nonnull(javax.annotation.Nonnull) jcuda.jcudnn.cudnnReduceTensorDescriptor(jcuda.jcudnn.cudnnReduceTensorDescriptor) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) CudaResource(com.simiacryptus.mindseye.lang.cudnn.CudaResource) Nullable(javax.annotation.Nullable) Nullable(javax.annotation.Nullable)

Example 49 with DeltaSet

use of com.simiacryptus.mindseye.lang.DeltaSet in project MindsEye by SimiaCryptus.

the class ImgBandSelectLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(@Nonnull final Result... inObj) {
    // assert Arrays.stream(inObj).flatMapToDouble(input->input.data.stream().flatMapToDouble(x-> Arrays.stream(x.getData()))).allMatch(v->Double.isFinite(v));
    assert getFrom() < getTo();
    assert getFrom() >= 0;
    assert getTo() > 0;
    assert 1 == inObj.length;
    assert 3 == inObj[0].getData().getDimensions().length;
    if (!CudaSystem.isEnabled())
        return getCompatibilityLayer().eval(inObj);
    final TensorList inputData = inObj[0].getData();
    @Nonnull final int[] inputDimensions = inputData.getDimensions();
    final int length = inputData.length();
    @Nonnull final int[] outputDimensions = Arrays.copyOf(inputDimensions, 3);
    outputDimensions[2] = getTo() - getFrom();
    long size = (length * outputDimensions[2] * outputDimensions[1] * outputDimensions[0] * precision.size);
    return new Result(CudaSystem.run(gpu -> {
        @Nullable final CudaTensor cudaInput = gpu.getTensor(inputData, precision, MemoryType.Device, false);
        inputData.freeRef();
        final int byteOffset = cudaInput.descriptor.cStride * getFrom() * precision.size;
        @Nonnull final CudaDevice.CudaTensorDescriptor inputDescriptor = gpu.newTensorDescriptor(// 
        precision, // 
        length, // 
        outputDimensions[2], // 
        outputDimensions[1], // 
        outputDimensions[0], // 
        cudaInput.descriptor.nStride, // 
        cudaInput.descriptor.cStride, // 
        cudaInput.descriptor.hStride, cudaInput.descriptor.wStride);
        CudaMemory cudaInputMemory = cudaInput.getMemory(gpu);
        assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
        CudaTensor cudaTensor = CudaTensor.wrap(cudaInputMemory.withByteOffset(byteOffset), inputDescriptor, precision);
        Stream.<ReferenceCounting>of(cudaInput, cudaInputMemory).forEach(ReferenceCounting::freeRef);
        return CudaTensorList.wrap(cudaTensor, length, outputDimensions, precision);
    }, inputData), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList delta) -> {
        if (!Arrays.equals(delta.getDimensions(), outputDimensions)) {
            throw new AssertionError(Arrays.toString(delta.getDimensions()) + " != " + Arrays.toString(outputDimensions));
        }
        if (inObj[0].isAlive()) {
            final TensorList passbackTensorList = CudaSystem.run(gpu -> {
                @Nonnull final CudaDevice.CudaTensorDescriptor viewDescriptor = gpu.newTensorDescriptor(// 
                precision, // 
                length, // 
                outputDimensions[2], // 
                outputDimensions[1], // 
                outputDimensions[0], // 
                inputDimensions[2] * inputDimensions[1] * inputDimensions[0], // 
                inputDimensions[1] * inputDimensions[0], // 
                inputDimensions[0], 1);
                @Nonnull final CudaDevice.CudaTensorDescriptor inputDescriptor = gpu.newTensorDescriptor(// 
                precision, // 
                length, // 
                inputDimensions[2], // 
                inputDimensions[1], // 
                inputDimensions[0], // 
                inputDimensions[2] * inputDimensions[1] * inputDimensions[0], // 
                inputDimensions[1] * inputDimensions[0], // 
                inputDimensions[0], 1);
                final int byteOffset = viewDescriptor.cStride * getFrom() * precision.size;
                assert delta.length() == length;
                // assert error.stream().flatMapToDouble(x-> Arrays.stream(x.getData())).allMatch(Double::isFinite);
                @Nullable final CudaTensor errorPtr = gpu.getTensor(delta, precision, MemoryType.Device, false);
                delta.freeRef();
                long size1 = (length * inputDimensions[2] * inputDimensions[1] * inputDimensions[0] * precision.size);
                @Nonnull final CudaMemory passbackBuffer = gpu.allocate(size1, MemoryType.Managed.normalize(), false);
                CudaMemory errorPtrMemory = errorPtr.getMemory(gpu);
                gpu.cudnnTransformTensor(precision.getPointer(1.0), errorPtr.descriptor.getPtr(), errorPtrMemory.getPtr(), precision.getPointer(0.0), viewDescriptor.getPtr(), passbackBuffer.getPtr().withByteOffset(byteOffset));
                errorPtrMemory.dirty();
                passbackBuffer.dirty();
                errorPtrMemory.freeRef();
                CudaTensor cudaTensor = CudaTensor.wrap(passbackBuffer, inputDescriptor, precision);
                Stream.<ReferenceCounting>of(errorPtr, viewDescriptor).forEach(ReferenceCounting::freeRef);
                return CudaTensorList.wrap(cudaTensor, length, inputDimensions, precision);
            // assert passbackTensorList.stream().flatMapToDouble(x-> Arrays.stream(x.getData())).allMatch(v->Double.isFinite(v));
            }, delta);
            inObj[0].accumulate(buffer, passbackTensorList);
        } else {
            delta.freeRef();
        }
    }) {

        @Override
        public void accumulate(final DeltaSet<Layer> buffer, final TensorList delta) {
            getAccumulator().accept(buffer, delta);
        }

        @Override
        protected void _free() {
            Arrays.stream(inObj).forEach(nnResult -> nnResult.freeRef());
        }

        @Override
        public boolean isAlive() {
            return Arrays.stream(inObj).anyMatch(x -> x.isAlive());
        }
    };
}

Also used : IntStream(java.util.stream.IntStream) JsonObject(com.google.gson.JsonObject) Arrays(java.util.Arrays) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) Precision(com.simiacryptus.mindseye.lang.cudnn.Precision) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) Stream(java.util.stream.Stream) CudaSystem(com.simiacryptus.mindseye.lang.cudnn.CudaSystem) TensorList(com.simiacryptus.mindseye.lang.TensorList) Map(java.util.Map) MemoryType(com.simiacryptus.mindseye.lang.cudnn.MemoryType) Layer(com.simiacryptus.mindseye.lang.Layer) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) Nonnull(javax.annotation.Nonnull) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) Nullable(javax.annotation.Nullable) Nullable(javax.annotation.Nullable)

Example 50 with DeltaSet

use of com.simiacryptus.mindseye.lang.DeltaSet in project MindsEye by SimiaCryptus.

the class ImgConcatLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(@Nonnull final Result... inObj) {
    if (!CudaSystem.isEnabled())
        return getCompatibilityLayer().evalAndFree(inObj);
    // assert Arrays.stream(this.bias).allMatch(Double::isFinite);
    // assert Arrays.stream(inObj).flatMapToDouble(input->input.data.stream().flatMapToDouble(x-> Arrays.stream(x.getData()))).allMatch(v->Double.isFinite(v));
    int[] dimensions = inObj[0].getData().getDimensions();
    assert 3 == dimensions.length;
    @Nonnull final int[] outputDimensions = Arrays.copyOf(dimensions, dimensions.length);
    final int length = inObj[0].getData().length();
    assert Arrays.stream(inObj).allMatch(x -> {
        @Nonnull int[] d = x.getData().getDimensions();
        return 3 == d.length && d[0] == outputDimensions[0] && d[1] == outputDimensions[1] && x.getData().length() == length;
    });
    outputDimensions[2] = Arrays.stream(inObj).mapToInt(x -> x.getData().getDimensions()[2]).sum();
    if (0 < maxBands && outputDimensions[2] > maxBands) {
        outputDimensions[2] = maxBands;
    }
    return new Result(CudaSystem.run(gpu -> {
        final long outputSize = ((long) length * outputDimensions[2] * outputDimensions[1] * outputDimensions[0] * precision.size);
        @Nonnull final CudaMemory cudaOutput = gpu.allocate(outputSize, MemoryType.Managed.normalize(), true);
        IntStream stream = IntStream.range(0, inObj.length);
        // if (!CoreSettings.INSTANCE.isConservative() && parallel) stream = stream.parallel();
        stream.forEach(i -> {
            assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
            final TensorList input = inObj[i].getData();
            @Nonnull final int[] inputDimensions = input.getDimensions();
            assert inputDimensions[0] == outputDimensions[0];
            assert inputDimensions[1] == outputDimensions[1];
            int bandOffset = IntStream.range(0, i).map(j -> inObj[j].getData().getDimensions()[2]).sum();
            if (maxBands > 0)
                bandOffset = Math.min(bandOffset, maxBands);
            int inputBands = inputDimensions[2];
            if (maxBands > 0)
                inputBands = Math.min(inputBands, maxBands - bandOffset);
            if (inputBands > 0) {
                @Nullable final CudaTensor cudaInput = gpu.getTensor(input, precision, MemoryType.Device, false);
                assert inputBands > 0;
                assert maxBands <= 0 || inputBands <= maxBands;
                assert inputBands <= inputDimensions[2];
                @Nonnull final CudaDevice.CudaTensorDescriptor outputDescriptor = gpu.newTensorDescriptor(// 
                precision, // 
                length, // 
                inputBands, // 
                outputDimensions[1], // 
                outputDimensions[0], // 
                outputDimensions[2] * outputDimensions[1] * outputDimensions[0], // 
                outputDimensions[1] * outputDimensions[0], // 
                outputDimensions[0], 1);
                @Nonnull final CudaDevice.CudaTensorDescriptor inputDescriptor = gpu.newTensorDescriptor(// 
                precision, // 
                length, // 
                inputBands, // 
                inputDimensions[1], // 
                inputDimensions[0], // 
                cudaInput.descriptor.nStride, // 
                cudaInput.descriptor.cStride, // 
                cudaInput.descriptor.hStride, cudaInput.descriptor.wStride);
                int byteOffset = outputDescriptor.cStride * bandOffset * precision.size;
                CudaMemory cudaInputMemory = cudaInput.getMemory(gpu);
                gpu.cudnnTransformTensor(precision.getPointer(1.0), inputDescriptor.getPtr(), cudaInputMemory.getPtr(), precision.getPointer(0.0), outputDescriptor.getPtr(), cudaOutput.getPtr().withByteOffset(byteOffset));
                assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
                cudaInputMemory.dirty();
                cudaOutput.dirty();
                cudaInputMemory.freeRef();
                Stream.<ReferenceCounting>of(cudaInput, outputDescriptor, inputDescriptor).forEach(ReferenceCounting::freeRef);
            }
        });
        CudaDevice.CudaTensorDescriptor outDesc = gpu.newTensorDescriptor(precision, length, outputDimensions[2], outputDimensions[1], outputDimensions[0]);
        return CudaTensorList.wrap(CudaTensor.wrap(cudaOutput, outDesc, precision), length, outputDimensions, precision);
    }, Arrays.stream(inObj).map(Result::getData).toArray()), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList delta) -> {
        assert delta.getDimensions()[0] == outputDimensions[0];
        assert delta.getDimensions()[1] == outputDimensions[1];
        assert delta.getDimensions()[2] == outputDimensions[2];
        if (!Arrays.equals(delta.getDimensions(), outputDimensions)) {
            throw new AssertionError(Arrays.toString(delta.getDimensions()) + " != " + Arrays.toString(outputDimensions));
        }
        // outputBuffer.freeRef();
        // assert error.stream().flatMapToDouble(x-> Arrays.stream(x.getData())).allMatch(Double::isFinite);
        @Nonnull IntStream stream = IntStream.range(0, inObj.length);
        if (!CoreSettings.INSTANCE.isSingleThreaded() && parallel)
            stream = stream.parallel();
        stream.forEach(i -> {
            final Result input = inObj[i];
            int[] inputDimentions = input.getData().getDimensions();
            assert 3 == inputDimentions.length;
            assert delta.length() == input.getData().length();
            assert inputDimentions[0] == outputDimensions[0];
            assert inputDimentions[1] == outputDimensions[1];
            int bandOffset = IntStream.range(0, i).map(j -> inObj[j].getData().getDimensions()[2]).sum();
            int inputBands = maxBands <= 0 ? inputDimentions[2] : Math.min(inputDimentions[2], maxBands - bandOffset);
            if (inputBands > 0 && input.isAlive()) {
                assert inputBands <= inputDimentions[2];
                assert inputBands <= outputDimensions[2];
                final TensorList passbackTensorList = CudaSystem.run(gpu -> {
                    final CudaTensor result;
                    synchronized (gpu) {
                        result = gpu.getTensor(delta, precision, MemoryType.Device, true);
                    }
                    @Nullable final CudaTensor cudaDelta = result;
                    CudaMemory cudaDeltaMemory = cudaDelta.getMemory(gpu);
                    try {
                        if (inputDimentions[2] == inputBands) {
                            @Nonnull final CudaDevice.CudaTensorDescriptor viewDescriptor = gpu.newTensorDescriptor(// 
                            precision, // 
                            length, // 
                            inputDimentions[2], // 
                            inputDimentions[1], // 
                            inputDimentions[0], // 
                            cudaDelta.descriptor.nStride, // 
                            cudaDelta.descriptor.cStride, // 
                            cudaDelta.descriptor.hStride, cudaDelta.descriptor.wStride);
                            int byteOffset = cudaDelta.descriptor.cStride * bandOffset * precision.size;
                            CudaMemory ptr = cudaDeltaMemory.withByteOffset(byteOffset);
                            CudaTensor cudaTensor = CudaTensor.wrap(ptr, viewDescriptor, precision);
                            Stream.<ReferenceCounting>of(cudaDelta).forEach(ReferenceCounting::freeRef);
                            return CudaTensorList.wrap(cudaTensor, length, inputDimentions, precision);
                        } else {
                            @Nonnull final CudaDevice.CudaTensorDescriptor passbackTransferDescriptor = gpu.newTensorDescriptor(// 
                            precision, // 
                            length, // 
                            inputBands, // 
                            inputDimentions[1], // 
                            inputDimentions[0], // 
                            inputDimentions[2] * inputDimentions[1] * inputDimentions[0], // 
                            inputDimentions[1] * inputDimentions[0], // 
                            inputDimentions[0], 1);
                            @Nonnull final CudaDevice.CudaTensorDescriptor passbackDescriptor = gpu.newTensorDescriptor(// 
                            precision, // 
                            length, // 
                            inputDimentions[2], // 
                            inputDimentions[1], // 
                            inputDimentions[0], // 
                            inputDimentions[2] * inputDimentions[1] * inputDimentions[0], // 
                            inputDimentions[1] * inputDimentions[0], // 
                            inputDimentions[0], 1);
                            @Nonnull final CudaDevice.CudaTensorDescriptor deltaViewDescriptor = gpu.newTensorDescriptor(// 
                            precision, // 
                            length, // 
                            inputBands, // 
                            inputDimentions[1], // 
                            inputDimentions[0], // 
                            cudaDelta.descriptor.nStride, // 
                            cudaDelta.descriptor.cStride, // 
                            cudaDelta.descriptor.hStride, cudaDelta.descriptor.wStride);
                            @Nonnull final CudaMemory cudaBackprop = gpu.allocate((long) passbackDescriptor.nStride * length * precision.size, MemoryType.Managed.normalize(), inputBands == inputDimentions[2]);
                            int byteOffset = cudaDelta.descriptor.cStride * bandOffset * precision.size;
                            gpu.cudnnTransformTensor(precision.getPointer(1.0), deltaViewDescriptor.getPtr(), cudaDeltaMemory.getPtr().withByteOffset(byteOffset), precision.getPointer(0.0), passbackTransferDescriptor.getPtr(), cudaBackprop.getPtr());
                            cudaBackprop.dirty();
                            cudaDeltaMemory.dirty();
                            Stream.<ReferenceCounting>of(cudaDelta, deltaViewDescriptor, passbackTransferDescriptor).forEach(ReferenceCounting::freeRef);
                            return CudaTensorList.wrap(CudaTensor.wrap(cudaBackprop, passbackDescriptor, precision), length, inputDimentions, precision);
                        }
                    } finally {
                        cudaDeltaMemory.freeRef();
                    }
                });
                input.accumulate(buffer, passbackTensorList);
            }
        // assert passbackTensorList.stream().flatMapToDouble(x-> Arrays.stream(x.getData())).allMatch(v->Double.isFinite(v));
        });
    }) {

        @Override
        protected void _free() {
            for (@Nonnull Result result : inObj) {
                result.freeRef();
                result.getData().freeRef();
            }
        }

        @Override
        public boolean isAlive() {
            return Arrays.stream(inObj).anyMatch(x -> x.isAlive());
        }
    };
}

Also used : IntStream(java.util.stream.IntStream) JsonObject(com.google.gson.JsonObject) Arrays(java.util.Arrays) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) CoreSettings(com.simiacryptus.mindseye.lang.CoreSettings) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) Precision(com.simiacryptus.mindseye.lang.cudnn.Precision) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) Stream(java.util.stream.Stream) CudaSystem(com.simiacryptus.mindseye.lang.cudnn.CudaSystem) TensorList(com.simiacryptus.mindseye.lang.TensorList) Map(java.util.Map) MemoryType(com.simiacryptus.mindseye.lang.cudnn.MemoryType) Layer(com.simiacryptus.mindseye.lang.Layer) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) Nonnull(javax.annotation.Nonnull) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) IntStream(java.util.stream.IntStream) Nullable(javax.annotation.Nullable) Nullable(javax.annotation.Nullable)

Aggregations

DeltaSet (com.simiacryptus.mindseye.lang.DeltaSet)98 Nonnull (javax.annotation.Nonnull)98 Result (com.simiacryptus.mindseye.lang.Result)90 Nullable (javax.annotation.Nullable)88 Layer (com.simiacryptus.mindseye.lang.Layer)86 TensorList (com.simiacryptus.mindseye.lang.TensorList)86 Arrays (java.util.Arrays)77 List (java.util.List)75 Tensor (com.simiacryptus.mindseye.lang.Tensor)73 Map (java.util.Map)66 IntStream (java.util.stream.IntStream)65 TensorArray (com.simiacryptus.mindseye.lang.TensorArray)64 JsonObject (com.google.gson.JsonObject)62 DataSerializer (com.simiacryptus.mindseye.lang.DataSerializer)61 LayerBase (com.simiacryptus.mindseye.lang.LayerBase)60 Logger (org.slf4j.Logger)47 LoggerFactory (org.slf4j.LoggerFactory)47 ReferenceCounting (com.simiacryptus.mindseye.lang.ReferenceCounting)23 CudaTensor (com.simiacryptus.mindseye.lang.cudnn.CudaTensor)22 CudaTensorList (com.simiacryptus.mindseye.lang.cudnn.CudaTensorList)22