Search in sources :

Example 61 with TensorList

use of com.simiacryptus.mindseye.lang.TensorList in project MindsEye by SimiaCryptus.

the class ImgConcatLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(@Nonnull final Result... inObj) {
    if (!CudaSystem.isEnabled())
        return getCompatibilityLayer().evalAndFree(inObj);
    // assert Arrays.stream(this.bias).allMatch(Double::isFinite);
    // assert Arrays.stream(inObj).flatMapToDouble(input->input.data.stream().flatMapToDouble(x-> Arrays.stream(x.getData()))).allMatch(v->Double.isFinite(v));
    int[] dimensions = inObj[0].getData().getDimensions();
    assert 3 == dimensions.length;
    @Nonnull final int[] outputDimensions = Arrays.copyOf(dimensions, dimensions.length);
    final int length = inObj[0].getData().length();
    assert Arrays.stream(inObj).allMatch(x -> {
        @Nonnull int[] d = x.getData().getDimensions();
        return 3 == d.length && d[0] == outputDimensions[0] && d[1] == outputDimensions[1] && x.getData().length() == length;
    });
    outputDimensions[2] = Arrays.stream(inObj).mapToInt(x -> x.getData().getDimensions()[2]).sum();
    if (0 < maxBands && outputDimensions[2] > maxBands) {
        outputDimensions[2] = maxBands;
    }
    return new Result(CudaSystem.run(gpu -> {
        final long outputSize = ((long) length * outputDimensions[2] * outputDimensions[1] * outputDimensions[0] * precision.size);
        @Nonnull final CudaMemory cudaOutput = gpu.allocate(outputSize, MemoryType.Managed.normalize(), true);
        IntStream stream = IntStream.range(0, inObj.length);
        // if (!CoreSettings.INSTANCE.isConservative() && parallel) stream = stream.parallel();
        stream.forEach(i -> {
            assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
            final TensorList input = inObj[i].getData();
            @Nonnull final int[] inputDimensions = input.getDimensions();
            assert inputDimensions[0] == outputDimensions[0];
            assert inputDimensions[1] == outputDimensions[1];
            int bandOffset = IntStream.range(0, i).map(j -> inObj[j].getData().getDimensions()[2]).sum();
            if (maxBands > 0)
                bandOffset = Math.min(bandOffset, maxBands);
            int inputBands = inputDimensions[2];
            if (maxBands > 0)
                inputBands = Math.min(inputBands, maxBands - bandOffset);
            if (inputBands > 0) {
                @Nullable final CudaTensor cudaInput = gpu.getTensor(input, precision, MemoryType.Device, false);
                assert inputBands > 0;
                assert maxBands <= 0 || inputBands <= maxBands;
                assert inputBands <= inputDimensions[2];
                @Nonnull final CudaDevice.CudaTensorDescriptor outputDescriptor = gpu.newTensorDescriptor(// 
                precision, // 
                length, // 
                inputBands, // 
                outputDimensions[1], // 
                outputDimensions[0], // 
                outputDimensions[2] * outputDimensions[1] * outputDimensions[0], // 
                outputDimensions[1] * outputDimensions[0], // 
                outputDimensions[0], 1);
                @Nonnull final CudaDevice.CudaTensorDescriptor inputDescriptor = gpu.newTensorDescriptor(// 
                precision, // 
                length, // 
                inputBands, // 
                inputDimensions[1], // 
                inputDimensions[0], // 
                cudaInput.descriptor.nStride, // 
                cudaInput.descriptor.cStride, // 
                cudaInput.descriptor.hStride, cudaInput.descriptor.wStride);
                int byteOffset = outputDescriptor.cStride * bandOffset * precision.size;
                CudaMemory cudaInputMemory = cudaInput.getMemory(gpu);
                gpu.cudnnTransformTensor(precision.getPointer(1.0), inputDescriptor.getPtr(), cudaInputMemory.getPtr(), precision.getPointer(0.0), outputDescriptor.getPtr(), cudaOutput.getPtr().withByteOffset(byteOffset));
                assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
                cudaInputMemory.dirty();
                cudaOutput.dirty();
                cudaInputMemory.freeRef();
                Stream.<ReferenceCounting>of(cudaInput, outputDescriptor, inputDescriptor).forEach(ReferenceCounting::freeRef);
            }
        });
        CudaDevice.CudaTensorDescriptor outDesc = gpu.newTensorDescriptor(precision, length, outputDimensions[2], outputDimensions[1], outputDimensions[0]);
        return CudaTensorList.wrap(CudaTensor.wrap(cudaOutput, outDesc, precision), length, outputDimensions, precision);
    }, Arrays.stream(inObj).map(Result::getData).toArray()), (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList delta) -> {
        assert delta.getDimensions()[0] == outputDimensions[0];
        assert delta.getDimensions()[1] == outputDimensions[1];
        assert delta.getDimensions()[2] == outputDimensions[2];
        if (!Arrays.equals(delta.getDimensions(), outputDimensions)) {
            throw new AssertionError(Arrays.toString(delta.getDimensions()) + " != " + Arrays.toString(outputDimensions));
        }
        // outputBuffer.freeRef();
        // assert error.stream().flatMapToDouble(x-> Arrays.stream(x.getData())).allMatch(Double::isFinite);
        @Nonnull IntStream stream = IntStream.range(0, inObj.length);
        if (!CoreSettings.INSTANCE.isSingleThreaded() && parallel)
            stream = stream.parallel();
        stream.forEach(i -> {
            final Result input = inObj[i];
            int[] inputDimentions = input.getData().getDimensions();
            assert 3 == inputDimentions.length;
            assert delta.length() == input.getData().length();
            assert inputDimentions[0] == outputDimensions[0];
            assert inputDimentions[1] == outputDimensions[1];
            int bandOffset = IntStream.range(0, i).map(j -> inObj[j].getData().getDimensions()[2]).sum();
            int inputBands = maxBands <= 0 ? inputDimentions[2] : Math.min(inputDimentions[2], maxBands - bandOffset);
            if (inputBands > 0 && input.isAlive()) {
                assert inputBands <= inputDimentions[2];
                assert inputBands <= outputDimensions[2];
                final TensorList passbackTensorList = CudaSystem.run(gpu -> {
                    final CudaTensor result;
                    synchronized (gpu) {
                        result = gpu.getTensor(delta, precision, MemoryType.Device, true);
                    }
                    @Nullable final CudaTensor cudaDelta = result;
                    CudaMemory cudaDeltaMemory = cudaDelta.getMemory(gpu);
                    try {
                        if (inputDimentions[2] == inputBands) {
                            @Nonnull final CudaDevice.CudaTensorDescriptor viewDescriptor = gpu.newTensorDescriptor(// 
                            precision, // 
                            length, // 
                            inputDimentions[2], // 
                            inputDimentions[1], // 
                            inputDimentions[0], // 
                            cudaDelta.descriptor.nStride, // 
                            cudaDelta.descriptor.cStride, // 
                            cudaDelta.descriptor.hStride, cudaDelta.descriptor.wStride);
                            int byteOffset = cudaDelta.descriptor.cStride * bandOffset * precision.size;
                            CudaMemory ptr = cudaDeltaMemory.withByteOffset(byteOffset);
                            CudaTensor cudaTensor = CudaTensor.wrap(ptr, viewDescriptor, precision);
                            Stream.<ReferenceCounting>of(cudaDelta).forEach(ReferenceCounting::freeRef);
                            return CudaTensorList.wrap(cudaTensor, length, inputDimentions, precision);
                        } else {
                            @Nonnull final CudaDevice.CudaTensorDescriptor passbackTransferDescriptor = gpu.newTensorDescriptor(// 
                            precision, // 
                            length, // 
                            inputBands, // 
                            inputDimentions[1], // 
                            inputDimentions[0], // 
                            inputDimentions[2] * inputDimentions[1] * inputDimentions[0], // 
                            inputDimentions[1] * inputDimentions[0], // 
                            inputDimentions[0], 1);
                            @Nonnull final CudaDevice.CudaTensorDescriptor passbackDescriptor = gpu.newTensorDescriptor(// 
                            precision, // 
                            length, // 
                            inputDimentions[2], // 
                            inputDimentions[1], // 
                            inputDimentions[0], // 
                            inputDimentions[2] * inputDimentions[1] * inputDimentions[0], // 
                            inputDimentions[1] * inputDimentions[0], // 
                            inputDimentions[0], 1);
                            @Nonnull final CudaDevice.CudaTensorDescriptor deltaViewDescriptor = gpu.newTensorDescriptor(// 
                            precision, // 
                            length, // 
                            inputBands, // 
                            inputDimentions[1], // 
                            inputDimentions[0], // 
                            cudaDelta.descriptor.nStride, // 
                            cudaDelta.descriptor.cStride, // 
                            cudaDelta.descriptor.hStride, cudaDelta.descriptor.wStride);
                            @Nonnull final CudaMemory cudaBackprop = gpu.allocate((long) passbackDescriptor.nStride * length * precision.size, MemoryType.Managed.normalize(), inputBands == inputDimentions[2]);
                            int byteOffset = cudaDelta.descriptor.cStride * bandOffset * precision.size;
                            gpu.cudnnTransformTensor(precision.getPointer(1.0), deltaViewDescriptor.getPtr(), cudaDeltaMemory.getPtr().withByteOffset(byteOffset), precision.getPointer(0.0), passbackTransferDescriptor.getPtr(), cudaBackprop.getPtr());
                            cudaBackprop.dirty();
                            cudaDeltaMemory.dirty();
                            Stream.<ReferenceCounting>of(cudaDelta, deltaViewDescriptor, passbackTransferDescriptor).forEach(ReferenceCounting::freeRef);
                            return CudaTensorList.wrap(CudaTensor.wrap(cudaBackprop, passbackDescriptor, precision), length, inputDimentions, precision);
                        }
                    } finally {
                        cudaDeltaMemory.freeRef();
                    }
                });
                input.accumulate(buffer, passbackTensorList);
            }
        // assert passbackTensorList.stream().flatMapToDouble(x-> Arrays.stream(x.getData())).allMatch(v->Double.isFinite(v));
        });
    }) {

        @Override
        protected void _free() {
            for (@Nonnull Result result : inObj) {
                result.freeRef();
                result.getData().freeRef();
            }
        }

        @Override
        public boolean isAlive() {
            return Arrays.stream(inObj).anyMatch(x -> x.isAlive());
        }
    };
}
Also used : IntStream(java.util.stream.IntStream) JsonObject(com.google.gson.JsonObject) Arrays(java.util.Arrays) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) CoreSettings(com.simiacryptus.mindseye.lang.CoreSettings) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) Precision(com.simiacryptus.mindseye.lang.cudnn.Precision) List(java.util.List) LayerBase(com.simiacryptus.mindseye.lang.LayerBase) Stream(java.util.stream.Stream) CudaSystem(com.simiacryptus.mindseye.lang.cudnn.CudaSystem) TensorList(com.simiacryptus.mindseye.lang.TensorList) Map(java.util.Map) MemoryType(com.simiacryptus.mindseye.lang.cudnn.MemoryType) Layer(com.simiacryptus.mindseye.lang.Layer) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) Nonnull(javax.annotation.Nonnull) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) IntStream(java.util.stream.IntStream) Nullable(javax.annotation.Nullable) Nullable(javax.annotation.Nullable)

Example 62 with TensorList

use of com.simiacryptus.mindseye.lang.TensorList in project MindsEye by SimiaCryptus.

the class ImgCropLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(@Nonnull final Result... inObj) {
    if (!CudaSystem.isEnabled())
        return getCompatibilityLayer().evalAndFree(inObj);
    assert 1 == inObj.length;
    final Result input = inObj[0];
    final TensorList inputData = input.getData();
    assert 3 == inputData.getDimensions().length;
    final int length = inputData.length();
    @Nonnull int[] dimIn = inputData.getDimensions();
    if (dimIn[0] == sizeX && dimIn[1] == sizeY) {
        return input;
    }
    @Nonnull final int[] dimOut = Arrays.copyOf(dimIn, 3);
    dimOut[0] = sizeX;
    dimOut[1] = sizeY;
    final TensorList outputData = CudaSystem.run(gpu -> {
        @Nullable final CudaTensor inputTensor = gpu.getTensor(inputData, precision, MemoryType.Device, false);
        inputData.freeRef();
        boolean dirty = dimOut[0] <= dimIn[0] && dimOut[1] <= dimIn[1];
        assert dimOut[0] > 0;
        assert dimOut[1] > 0;
        assert dimOut[2] > 0;
        CudaTensor cudaTensor = copy(gpu, inputTensor, length, dimIn, dimOut, dirty, precision);
        Stream.<ReferenceCounting>of(inputTensor).forEach(ReferenceCounting::freeRef);
        return CudaTensorList.wrap(cudaTensor, length, dimOut, precision);
    }, inputData);
    return new Result(outputData, (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList delta) -> {
        if (!Arrays.equals(delta.getDimensions(), outputData.getDimensions())) {
            throw new AssertionError(Arrays.toString(delta.getDimensions()) + " != " + Arrays.toString(outputData.getDimensions()));
        }
        if (delta.length() != outputData.length()) {
            throw new AssertionError(delta.length() + " != " + outputData.length());
        }
        assert delta.length() == length;
        if (input.isAlive()) {
            final TensorList passbackTensorList = CudaSystem.run(gpu -> {
                @Nullable final CudaTensor errorPtr = gpu.getTensor(delta, precision, MemoryType.Device, false);
                delta.freeRef();
                boolean dirty = dimOut[0] >= dimIn[0] && dimOut[1] >= dimIn[1];
                CudaTensor cudaTensor = copy(gpu, errorPtr, length, dimOut, dimIn, dirty, precision);
                Stream.<ReferenceCounting>of(errorPtr).forEach(ReferenceCounting::freeRef);
                return CudaTensorList.wrap(cudaTensor, length, dimIn, precision);
            }, delta);
            input.accumulate(buffer, passbackTensorList);
        } else {
            delta.freeRef();
        }
    }) {

        @Override
        public void accumulate(final DeltaSet<Layer> buffer, final TensorList delta) {
            getAccumulator().accept(buffer, delta);
        }

        @Override
        protected void _free() {
            Arrays.stream(inObj).forEach(nnResult -> nnResult.freeRef());
        }

        @Override
        public boolean isAlive() {
            return Arrays.stream(inObj).anyMatch(x -> x.isAlive());
        }
    };
}
Also used : CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) ReferenceCounting(com.simiacryptus.mindseye.lang.ReferenceCounting) Nonnull(javax.annotation.Nonnull) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Nullable(javax.annotation.Nullable) Result(com.simiacryptus.mindseye.lang.Result) Nullable(javax.annotation.Nullable)

Example 63 with TensorList

use of com.simiacryptus.mindseye.lang.TensorList in project MindsEye by SimiaCryptus.

the class ImgTileAssemblyLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(@Nonnull final Result... inObj) {
    if (!CudaSystem.isEnabled())
        return getCompatibilityLayer().evalAndFree(inObj);
    if (1 == inObj.length) {
        return inObj[0];
    }
    int[] inputDimensions = inObj[0].getData().getDimensions();
    assert 3 == inputDimensions.length;
    final int length = inObj[0].getData().length();
    int[] outputDims = getOutputDims(inObj);
    final TensorList outputData = CudaSystem.run(gpu -> {
        assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
        assert outputDims[0] > 0;
        assert outputDims[1] > 0;
        assert outputDims[2] > 0;
        @Nonnull final CudaMemory outputBuffer = gpu.allocate((long) length * outputDims[2] * outputDims[1] * outputDims[0] * precision.size, MemoryType.Managed.normalize(), false);
        int totalWidth = 0;
        int totalHeight = 0;
        int inputIndex = 0;
        List<CopyParams> copies = new ArrayList<>();
        for (int row = 0; row < rows; row++) {
            int positionX = 0;
            int rowHeight = 0;
            for (int col = 0; col < columns; col++) {
                int[] tileDimensions = inObj[inputIndex].getData().getDimensions();
                rowHeight = Math.max(rowHeight, tileDimensions[1]);
                copies.add(new CopyParams(gpu, inObj, outputBuffer, length, outputDims, tileDimensions, inputIndex, positionX, totalHeight));
                positionX += tileDimensions[0];
                inputIndex += 1;
                assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
            }
            totalHeight += rowHeight;
            totalWidth = Math.max(totalWidth, positionX);
        }
        assert CudaDevice.isThreadDeviceId(gpu.getDeviceId());
        Stream<CopyParams> stream = copies.stream();
        if (!CoreSettings.INSTANCE.isSingleThreaded() && parallel)
            stream = stream.parallel();
        stream.forEach(this::copy);
        Arrays.stream(inObj).forEach(r -> r.getData().freeRef());
        CudaDevice.CudaTensorDescriptor descriptor = gpu.newTensorDescriptor(precision, length, outputDims[2], outputDims[1], outputDims[0]);
        CudaTensor ptr = CudaTensor.wrap(outputBuffer, descriptor, precision);
        return CudaTensorList.wrap(ptr, length, outputDims, precision);
    }, Arrays.stream(inObj).map(Result::getData).toArray());
    return new Result(outputData, (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList error) -> {
        if (!Arrays.equals(error.getDimensions(), outputData.getDimensions())) {
            throw new AssertionError(Arrays.toString(error.getDimensions()) + " != " + Arrays.toString(outputData.getDimensions()));
        }
        if (error.length() != outputData.length()) {
            throw new AssertionError(error.length() + " != " + outputData.length());
        }
        assert error.length() == length;
        int totalHeight = 0;
        int inputIndex = 0;
        List<BackpropParams> tasks = new ArrayList<>();
        for (int row = 0; row < rows; row++) {
            int positionX = 0;
            int rowHeight = 0;
            for (int col = 0; col < columns; col++) {
                Result in = inObj[inputIndex];
                int[] tileDimensions = in.getData().getDimensions();
                rowHeight = Math.max(rowHeight, tileDimensions[1]);
                if (inObj[inputIndex].isAlive()) {
                    tasks.add(new BackpropParams(inObj, buffer, error, outputDims, tileDimensions, length, positionX, totalHeight, inputIndex));
                }
                positionX += tileDimensions[0];
                inputIndex += 1;
            }
            totalHeight += rowHeight;
        }
        Stream<BackpropParams> stream = tasks.stream();
        if (!CoreSettings.INSTANCE.isSingleThreaded() && parallel)
            stream = stream.parallel();
        stream.forEach(this::backprop);
    }) {

        @Override
        protected void _free() {
            Arrays.stream(inObj).forEach(nnResult -> nnResult.freeRef());
        }

        @Override
        public boolean isAlive() {
            return Arrays.stream(inObj).anyMatch(x -> x.isAlive());
        }
    };
}
Also used : CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) CudaDevice(com.simiacryptus.mindseye.lang.cudnn.CudaDevice) Nonnull(javax.annotation.Nonnull) CudaMemory(com.simiacryptus.mindseye.lang.cudnn.CudaMemory) ArrayList(java.util.ArrayList) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) Nullable(javax.annotation.Nullable)

Example 64 with TensorList

use of com.simiacryptus.mindseye.lang.TensorList in project MindsEye by SimiaCryptus.

the class ImgTileSelectLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(@Nonnull final Result... inObj) {
    if (!CudaSystem.isEnabled())
        return getCompatibilityLayer().evalAndFree(inObj);
    assert 1 == inObj.length;
    final Result input = inObj[0];
    final TensorList inputData = input.getData();
    assert 3 == inputData.getDimensions().length;
    final int length = inputData.length();
    @Nonnull int[] dimIn = inputData.getDimensions();
    if (dimIn[0] == sizeY && dimIn[1] == sizeX) {
        return input;
    }
    @Nonnull final int[] dimOut = getViewDimensions(dimIn, new int[] { sizeY, sizeX, dimIn[2] }, new int[] { positionX, positionY, 0 });
    final TensorList outputData = CudaSystem.run(gpu -> {
        assert dimOut[0] > 0;
        assert dimOut[1] > 0;
        assert dimOut[2] > 0;
        boolean dirty = dimOut[0] == dimIn[0] && dimOut[1] == dimIn[1];
        CudaTensor cudaTensor = copy(gpu, inputData, dimIn, dimOut, precision, this.positionX, this.positionY, dirty);
        return CudaTensorList.wrap(cudaTensor, length, dimOut, precision);
    }, inputData);
    int[] outputDimensions = outputData.getDimensions();
    assert length == outputData.length();
    return new Result(outputData, (@Nonnull final DeltaSet<Layer> buffer, @Nonnull final TensorList error) -> {
        if (!Arrays.equals(error.getDimensions(), outputDimensions)) {
            throw new AssertionError(Arrays.toString(error.getDimensions()) + " != " + Arrays.toString(outputDimensions));
        }
        if (error.length() != length) {
            throw new AssertionError(error.length() + " != " + length);
        }
        assert error.length() == inputData.length();
        if (input.isAlive()) {
            final TensorList passbackTensorList = CudaSystem.run(gpu -> {
                boolean dirty = dimOut[0] >= dimIn[0] && dimOut[1] >= dimIn[1];
                CudaTensor cudaTensor = copy(gpu, error, dimOut, dimIn, precision, -this.positionX, -this.positionY, dirty);
                return CudaTensorList.wrap(cudaTensor, length, dimIn, precision);
            }, error);
            input.accumulate(buffer, passbackTensorList);
        }
    }) {

        @Override
        protected void _free() {
            Arrays.stream(inObj).forEach(nnResult -> nnResult.freeRef());
        }

        @Override
        public boolean isAlive() {
            return Arrays.stream(inObj).anyMatch(x -> x.isAlive());
        }
    };
}
Also used : CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) Nonnull(javax.annotation.Nonnull) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) Nullable(javax.annotation.Nullable)

Example 65 with TensorList

use of com.simiacryptus.mindseye.lang.TensorList in project MindsEye by SimiaCryptus.

the class ImgTileSubnetLayer method evalAndFree.

@Nullable
@Override
public Result evalAndFree(@Nonnull final Result... inObj) {
    assert 1 == inObj.length;
    Result input = inObj[0];
    TensorList inputData = input.getData();
    @Nonnull final int[] inputDims = inputData.getDimensions();
    assert 3 == inputDims.length;
    int bands = inputDims[2];
    int length = inputData.length();
    CudaTensor passback = CudaSystem.run(gpu -> {
        return CudaTensor.wrap(gpu.allocate(inputData.getElements() * precision.size, MemoryType.Managed, true), gpu.newTensorDescriptor(precision, length, inputDims[2], inputDims[1], inputDims[0]), precision);
    });
    try {
        AtomicInteger counter = new AtomicInteger(0);
        int cols = (int) (Math.ceil((inputDims[0] - width) * 1.0 / strideX) + 1);
        int rows = (int) (Math.ceil((inputDims[1] - height) * 1.0 / strideY) + 1);
        if (cols == 1 && rows == 1)
            return getInner().evalAndFree(inObj);
        ArrayList<CudaTensor> tiles = new ArrayList<>();
        int[] tileDimensions = { width, height, bands };
        Result[][] tileResults = new Result[rows][];
        for (int row = 0; row < rows; row++) {
            tileResults[row] = new Result[cols];
            for (int col = 0; col < cols; col++) {
                int positionX = col * strideX;
                int positionY = row * strideY;
                assert positionX >= 0;
                assert positionY >= 0;
                assert positionX < inputDims[0];
                assert positionY < inputDims[1];
                CudaTensor tile = CudaSystem.run(gpu -> {
                    return ImgTileSelectLayer.copy(gpu, inputData, inputData.getDimensions(), tileDimensions, precision, positionX, positionY, true);
                });
                passback.addRef();
                tileResults[row][col] = getInner().evalAndFree(new Result(CudaTensorList.wrap(tile, length, tileDimensions, precision), (DeltaSet<Layer> ctx, TensorList delta) -> {
                    CudaSystem.run(gpu -> {
                        ImgTileSelectLayer.copy(gpu, delta, tileDimensions, -positionX, -positionY, precision, passback).freeRef();
                    });
                    if (counter.incrementAndGet() >= rows * cols) {
                        counter.set(0);
                        input.accumulate(ctx, CudaTensorList.create(passback, length, inputDims, precision));
                    }
                }) {

                    @Override
                    protected void _free() {
                        super._free();
                        passback.freeRef();
                    }
                });
            }
        }
        inputData.freeRef();
        logger.debug(String.format("Broke input %s into %s rows, %s cols", Arrays.toString(inputDims), rows, cols));
        Result result = new ImgTileAssemblyLayer(cols, rows).setParallel(parallel).setPrecision(precision).evalAndFree(Arrays.stream(tileResults).flatMap(Arrays::stream).toArray(i -> new Result[i]));
        return new Result(result.getData(), (ctx, delta) -> {
            result.accumulate(ctx, delta);
        }) {

            @Override
            public void accumulate(final DeltaSet<Layer> buffer, final TensorList delta) {
                getAccumulator().accept(buffer, delta);
            }

            @Override
            protected void _free() {
                super._free();
                result.freeRef();
                input.freeRef();
            }
        };
    } finally {
        passback.freeRef();
    }
}
Also used : JsonObject(com.google.gson.JsonObject) Arrays(java.util.Arrays) Logger(org.slf4j.Logger) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) LoggerFactory(org.slf4j.LoggerFactory) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) WrapperLayer(com.simiacryptus.mindseye.layers.java.WrapperLayer) Result(com.simiacryptus.mindseye.lang.Result) DataSerializer(com.simiacryptus.mindseye.lang.DataSerializer) ArrayList(java.util.ArrayList) Precision(com.simiacryptus.mindseye.lang.cudnn.Precision) List(java.util.List) CudaSystem(com.simiacryptus.mindseye.lang.cudnn.CudaSystem) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TensorList(com.simiacryptus.mindseye.lang.TensorList) Map(java.util.Map) MemoryType(com.simiacryptus.mindseye.lang.cudnn.MemoryType) Layer(com.simiacryptus.mindseye.lang.Layer) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) CudaTensor(com.simiacryptus.mindseye.lang.cudnn.CudaTensor) Nonnull(javax.annotation.Nonnull) ArrayList(java.util.ArrayList) DeltaSet(com.simiacryptus.mindseye.lang.DeltaSet) CudaTensorList(com.simiacryptus.mindseye.lang.cudnn.CudaTensorList) TensorList(com.simiacryptus.mindseye.lang.TensorList) Result(com.simiacryptus.mindseye.lang.Result) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Arrays(java.util.Arrays) Nullable(javax.annotation.Nullable)

Aggregations

TensorList (com.simiacryptus.mindseye.lang.TensorList)110 Nonnull (javax.annotation.Nonnull)109 Nullable (javax.annotation.Nullable)103 Result (com.simiacryptus.mindseye.lang.Result)95 Arrays (java.util.Arrays)93 Layer (com.simiacryptus.mindseye.lang.Layer)91 Tensor (com.simiacryptus.mindseye.lang.Tensor)88 DeltaSet (com.simiacryptus.mindseye.lang.DeltaSet)87 IntStream (java.util.stream.IntStream)82 List (java.util.List)80 TensorArray (com.simiacryptus.mindseye.lang.TensorArray)76 Map (java.util.Map)68 JsonObject (com.google.gson.JsonObject)64 DataSerializer (com.simiacryptus.mindseye.lang.DataSerializer)63 LayerBase (com.simiacryptus.mindseye.lang.LayerBase)61 Logger (org.slf4j.Logger)57 LoggerFactory (org.slf4j.LoggerFactory)57 ReferenceCounting (com.simiacryptus.mindseye.lang.ReferenceCounting)33 CudaTensor (com.simiacryptus.mindseye.lang.cudnn.CudaTensor)30 CudaTensorList (com.simiacryptus.mindseye.lang.cudnn.CudaTensorList)30