Search in sources :

Example 1 with IsMax

use of org.nd4j.linalg.api.ops.impl.transforms.IsMax in project deeplearning4j by deeplearning4j.

the class MaskedReductionUtil method maskedPoolingEpsilonCnn.

public static INDArray maskedPoolingEpsilonCnn(PoolingType poolingType, INDArray input, INDArray mask, INDArray epsilon2d, boolean alongHeight, int pnorm) {
    // [minibatch, depth, h=1, w=X] or [minibatch, depth, h=X, w=1] data
    // with a mask array of shape [minibatch, X]
    //If masking along height: broadcast dimensions are [0,2]
    //If masking along width: broadcast dimensions are [0,3]
    int[] dimensions = (alongHeight ? CNN_DIM_MASK_H : CNN_DIM_MASK_W);
    switch(poolingType) {
        case MAX:
            //TODO This is ugly - replace it with something better... Need something like a Broadcast CAS op
            INDArray negInfMask = Transforms.not(mask);
            BooleanIndexing.replaceWhere(negInfMask, Double.NEGATIVE_INFINITY, Conditions.equals(1.0));
            INDArray withInf = Nd4j.createUninitialized(input.shape());
            Nd4j.getExecutioner().exec(new BroadcastAddOp(input, negInfMask, withInf, dimensions));
            //At this point: all the masked out steps have value -inf, hence can't be the output of the MAX op
            INDArray isMax = Nd4j.getExecutioner().execAndReturn(new IsMax(withInf, 2, 3));
            return Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(isMax, epsilon2d, isMax, 0, 1));
        case AVG:
        case SUM:
            //if out = sum(in,dims) then dL/dIn = dL/dOut -> duplicate to each step and mask
            //if out = avg(in,dims) then dL/dIn = 1/N * dL/dOut
            //With masking: N differs for different time series
            INDArray out = Nd4j.createUninitialized(input.shape(), 'f');
            //Broadcast copy op, then divide and mask to 0 as appropriate
            Nd4j.getExecutioner().exec(new BroadcastCopyOp(out, epsilon2d, out, 0, 1));
            Nd4j.getExecutioner().exec(new BroadcastMulOp(out, mask, out, dimensions));
            if (poolingType == PoolingType.SUM) {
                return out;
            }
            //Note that with CNNs, current design is restricted to [minibatch, depth, 1, W] ot [minibatch, depth, H, 1]
            //[minibatchSize,tsLength] -> [minibatchSize,1]
            INDArray nEachTimeSeries = mask.sum(1);
            Nd4j.getExecutioner().exec(new BroadcastDivOp(out, nEachTimeSeries, out, 0));
            return out;
        case PNORM:
            //Similar to average and sum pooling: there's no N term here, so we can just set the masked values to 0
            INDArray masked2 = Nd4j.createUninitialized(input.shape());
            Nd4j.getExecutioner().exec(new BroadcastMulOp(input, mask, masked2, dimensions));
            INDArray abs = Transforms.abs(masked2, true);
            Transforms.pow(abs, pnorm, false);
            INDArray pNorm = Transforms.pow(abs.sum(2, 3), 1.0 / pnorm);
            INDArray numerator;
            if (pnorm == 2) {
                numerator = input.dup();
            } else {
                INDArray absp2 = Transforms.pow(Transforms.abs(input, true), pnorm - 2, false);
                numerator = input.mul(absp2);
            }
            INDArray denom = Transforms.pow(pNorm, pnorm - 1, false);
            denom.rdivi(epsilon2d);
            Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(numerator, denom, numerator, 0, 1));
            //Apply mask
            Nd4j.getExecutioner().exec(new BroadcastMulOp(numerator, mask, numerator, dimensions));
            return numerator;
        case NONE:
            throw new UnsupportedOperationException("NONE pooling type not supported");
        default:
            throw new UnsupportedOperationException("Unknown or not supported pooling type: " + poolingType);
    }
}
Also used : IsMax(org.nd4j.linalg.api.ops.impl.transforms.IsMax) BroadcastAddOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastAddOp) INDArray(org.nd4j.linalg.api.ndarray.INDArray) BroadcastMulOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp) BroadcastCopyOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastCopyOp) BroadcastDivOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastDivOp)

Example 2 with IsMax

use of org.nd4j.linalg.api.ops.impl.transforms.IsMax in project deeplearning4j by deeplearning4j.

the class GlobalPoolingLayer method epsilonHelperFullArray.

private INDArray epsilonHelperFullArray(INDArray inputArray, INDArray epsilon, int[] poolDim) {
    //Broadcast: occurs on the remaining dimensions, after the pool dimensions have been removed.
    //TODO find a more efficient way to do this
    int[] broadcastDims = new int[inputArray.rank() - poolDim.length];
    int count = 0;
    for (int i = 0; i < inputArray.rank(); i++) {
        if (ArrayUtils.contains(poolDim, i))
            continue;
        broadcastDims[count++] = i;
    }
    switch(poolingType) {
        case MAX:
            INDArray isMax = Nd4j.getExecutioner().execAndReturn(new IsMax(inputArray.dup(), poolDim));
            return Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(isMax, epsilon, isMax, broadcastDims));
        case AVG:
            //if out = avg(in,dims) then dL/dIn = 1/N * dL/dOut
            int n = 1;
            for (int d : poolDim) {
                n *= inputArray.size(d);
            }
            INDArray ret = Nd4j.create(inputArray.shape());
            Nd4j.getExecutioner().exec(new BroadcastCopyOp(ret, epsilon, ret, broadcastDims));
            ret.divi(n);
            return ret;
        case SUM:
            INDArray retSum = Nd4j.create(inputArray.shape());
            Nd4j.getExecutioner().exec(new BroadcastCopyOp(retSum, epsilon, retSum, broadcastDims));
            return retSum;
        case PNORM:
            int pnorm = layerConf().getPnorm();
            //First: do forward pass to get pNorm array
            INDArray abs = Transforms.abs(inputArray, true);
            Transforms.pow(abs, pnorm, false);
            INDArray pNorm = Transforms.pow(abs.sum(poolDim), 1.0 / pnorm);
            //dL/dIn = dL/dOut * dOut/dIn
            //dOut/dIn = in .* |in|^(p-2) /  ||in||_p^(p-1), where ||in||_p is the output p-norm
            INDArray numerator;
            if (pnorm == 2) {
                numerator = inputArray.dup();
            } else {
                INDArray absp2 = Transforms.pow(Transforms.abs(inputArray, true), pnorm - 2, false);
                numerator = inputArray.mul(absp2);
            }
            INDArray denom = Transforms.pow(pNorm, pnorm - 1, false);
            denom.rdivi(epsilon);
            Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(numerator, denom, numerator, broadcastDims));
            return numerator;
        default:
            throw new RuntimeException("Unknown or not supported pooling type: " + poolingType);
    }
}
Also used : IsMax(org.nd4j.linalg.api.ops.impl.transforms.IsMax) INDArray(org.nd4j.linalg.api.ndarray.INDArray) BroadcastMulOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp) BroadcastCopyOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastCopyOp)

Example 3 with IsMax

use of org.nd4j.linalg.api.ops.impl.transforms.IsMax in project deeplearning4j by deeplearning4j.

the class SubsamplingLayer method backpropGradient.

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon) {
    int miniBatch = input.size(0);
    int inDepth = input.size(1);
    int inH = input.size(2);
    int inW = input.size(3);
    int[] kernel = layerConf().getKernelSize();
    int[] strides = layerConf().getStride();
    int[] pad;
    int[] outSize;
    if (convolutionMode == ConvolutionMode.Same) {
        //Also performs validation
        outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode);
        pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] { inH, inW }, kernel, strides);
    } else {
        pad = layerConf().getPadding();
        //Also performs validation
        outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode);
    }
    int outH = outSize[0];
    int outW = outSize[1];
    if (helper != null && Nd4j.dataType() != DataBuffer.Type.HALF) {
        Pair<Gradient, INDArray> ret = helper.backpropGradient(input, epsilon, kernel, strides, pad, layerConf().getPoolingType(), convolutionMode);
        if (ret != null) {
            return ret;
        }
    }
    //subsampling doesn't have weights and thus gradients are not calculated for this layer
    //only scale and reshape epsilon
    int inputHeight = input().size(-2);
    int inputWidth = input().size(-1);
    Gradient retGradient = new DefaultGradient();
    //Epsilons in shape: [miniBatch, depth, outH, outW]
    //Epsilons out shape: [miniBatch, depth, inH, inW]
    //Two possibilities here for the epsilons:
    //(a) Epsilons come from a dense/output layer above, with c order and strides [depth*H*W, H*W, W, 1]
    //(b) Epsilons come from CNN layer above, with c order and strides [H*W, depth*H*W, W, 1] (i.e., due to permute)
    //We want to reshape epsilons to 1d here, but to do this without a copy: we end up with different orders of
    // element in the buffer, for the "dense above" and "cnn above" cases.
    //Fortunately, we can just permute things when we do the im2col reshaping; then, the order of the rows in
    // col2d will match the order of the 1d epsilons...
    //With the 1d epsilons order matching the rows order for the 2d im2col: we can just do a muliColumnVector op,
    // instead of a slower broadcast muli op
    boolean cOrderStrides = false;
    if (epsilon.ordering() != 'c') {
        epsilon = epsilon.dup('c');
        cOrderStrides = true;
    }
    if (!cOrderStrides && Shape.strideDescendingCAscendingF(epsilon)) {
        cOrderStrides = true;
    } else if (!Arrays.equals(new int[] { outH * outW, inDepth * outH * outW, outW, 1 }, epsilon.stride())) {
        //Unexpected/unusual strides, not either (a) or (b) cases above
        epsilon = epsilon.dup('c');
        cOrderStrides = true;
    }
    INDArray col6d;
    INDArray col6dPermuted;
    INDArray epsilon1d;
    if (cOrderStrides) {
        //"Dense/Output layer above strides... i.e., standard c-order strides
        col6d = Nd4j.create(new int[] { miniBatch, inDepth, outH, outW, kernel[0], kernel[1] }, 'c');
        col6dPermuted = col6d.permute(0, 1, 4, 5, 2, 3);
        //zero copy reshape
        epsilon1d = epsilon.reshape('c', ArrayUtil.prod(epsilon.length()), 1);
    } else {
        //"CNN layer above" strides...
        col6d = Nd4j.create(new int[] { inDepth, miniBatch, outH, outW, kernel[0], kernel[1] }, 'c');
        col6dPermuted = col6d.permute(1, 0, 4, 5, 2, 3);
        INDArray epsilonTemp = epsilon.permute(1, 0, 2, 3);
        //Should be a zero-copy reshape always
        epsilon1d = epsilonTemp.reshape('c', new int[] { ArrayUtil.prod(epsilon.length()), 1 });
    }
    INDArray col2d = col6d.reshape('c', miniBatch * inDepth * outH * outW, kernel[0] * kernel[1]);
    switch(layerConf().getPoolingType()) {
        case MAX:
            //Execute im2col, then reshape to 2d. Note rows are in a different order for cOrderStrides true vs false cases
            Convolution.im2col(input, kernel[0], kernel[1], strides[0], strides[1], pad[0], pad[1], convolutionMode == ConvolutionMode.Same, col6dPermuted);
            INDArray isMax = Nd4j.getExecutioner().execAndReturn(new IsMax(col2d, 1));
            isMax.muliColumnVector(epsilon1d);
            break;
        case AVG:
            //TODO: We could further optimize this by creating an uninitialized array, and doing a 'putiColumnVector' operation
            // instead of a zero initialization + an addiColumnVector op
            col2d.addiColumnVector(epsilon1d);
            break;
        case PNORM:
            int pnorm = layerConf().getPnorm();
            //First: do forward pass to get pNorm array
            Convolution.im2col(input, kernel[0], kernel[1], strides[0], strides[1], pad[0], pad[1], convolutionMode == ConvolutionMode.Same, col6dPermuted);
            //dup as we need col2d again later
            INDArray pNorm = Transforms.abs(col2d, true);
            Transforms.pow(pNorm, pnorm, false);
            pNorm = pNorm.sum(1);
            Transforms.pow(pNorm, (1.0 / pnorm), false);
            //dL/dIn = dL/dOut * dOut/dIn
            //dOut/dIn = in .* |in|^(p-2) /  ||in||_p^(p-1), where ||in||_p is the output p-norm
            INDArray numerator;
            if (pnorm == 2) {
                numerator = col2d;
            } else {
                INDArray absp2 = Transforms.pow(Transforms.abs(col2d, true), pnorm - 2, false);
                numerator = col2d.muli(absp2);
            }
            INDArray denom = Transforms.pow(pNorm, pnorm - 1, false);
            double eps = layerConf().getEps();
            // in case of 0
            Transforms.max(denom, eps, false);
            numerator.muliColumnVector(denom.rdivi(epsilon1d));
            break;
        case NONE:
            return new Pair<>(retGradient, epsilon);
        default:
            throw new IllegalStateException("Unknown or unsupported pooling type: " + layerConf().getPoolingType());
    }
    //Finally: we want the output strides for the epsilons to match the strides in the activations from the layer below
    //Assuming the layer below is a CNN layer (very likely) we want [H*W, depth*H*W, W, 1] instead of the standard
    // c-order [depth*H*W, H*W, W, 1] strides
    //To achieve this: [depth, miniBatch, H, W] in c order, then permute to [miniBatch, depth, H, W]
    //This gives us proper strides of 1 on the muli...
    INDArray tempEpsilon = Nd4j.create(new int[] { inDepth, miniBatch, inH, inW }, 'c');
    INDArray outEpsilon = tempEpsilon.permute(1, 0, 2, 3);
    Convolution.col2im(col6dPermuted, outEpsilon, strides[0], strides[1], pad[0], pad[1], inputHeight, inputWidth);
    if (layerConf().getPoolingType() == PoolingType.AVG)
        outEpsilon.divi(ArrayUtil.prod(layerConf().getKernelSize()));
    return new Pair<>(retGradient, outEpsilon);
}
Also used : IsMax(org.nd4j.linalg.api.ops.impl.transforms.IsMax) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Pair(org.deeplearning4j.berkeley.Pair)

Example 4 with IsMax

use of org.nd4j.linalg.api.ops.impl.transforms.IsMax in project deeplearning4j by deeplearning4j.

the class MaskedReductionUtil method maskedPoolingEpsilonTimeSeries.

public static INDArray maskedPoolingEpsilonTimeSeries(PoolingType poolingType, INDArray input, INDArray mask, INDArray epsilon2d, int pnorm) {
    if (input.rank() != 3) {
        throw new IllegalArgumentException("Expect rank 3 input activation array: got " + input.rank());
    }
    if (mask.rank() != 2) {
        throw new IllegalArgumentException("Expect rank 2 array for mask: got " + mask.rank());
    }
    if (epsilon2d.rank() != 2) {
        throw new IllegalArgumentException("Expected rank 2 array for errors: got " + epsilon2d.rank());
    }
    switch(poolingType) {
        case MAX:
            //TODO This is ugly - replace it with something better... Need something like a Broadcast CAS op
            INDArray negInfMask = Transforms.not(mask);
            BooleanIndexing.replaceWhere(negInfMask, Double.NEGATIVE_INFINITY, Conditions.equals(1.0));
            INDArray withInf = Nd4j.createUninitialized(input.shape());
            Nd4j.getExecutioner().exec(new BroadcastAddOp(input, negInfMask, withInf, 0, 2));
            //At this point: all the masked out steps have value -inf, hence can't be the output of the MAX op
            INDArray isMax = Nd4j.getExecutioner().execAndReturn(new IsMax(withInf, 2));
            return Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(isMax, epsilon2d, isMax, 0, 1));
        case AVG:
        case SUM:
            //if out = sum(in,dims) then dL/dIn = dL/dOut -> duplicate to each step and mask
            //if out = avg(in,dims) then dL/dIn = 1/N * dL/dOut
            //With masking: N differs for different time series
            INDArray out = Nd4j.createUninitialized(input.shape(), 'f');
            //Broadcast copy op, then divide and mask to 0 as appropriate
            Nd4j.getExecutioner().exec(new BroadcastCopyOp(out, epsilon2d, out, 0, 1));
            Nd4j.getExecutioner().exec(new BroadcastMulOp(out, mask, out, 0, 2));
            if (poolingType == PoolingType.SUM) {
                return out;
            }
            //[minibatchSize,tsLength] -> [minibatchSize,1]
            INDArray nEachTimeSeries = mask.sum(1);
            Nd4j.getExecutioner().exec(new BroadcastDivOp(out, nEachTimeSeries, out, 0));
            return out;
        case PNORM:
            //Similar to average and sum pooling: there's no N term here, so we can just set the masked values to 0
            INDArray masked2 = Nd4j.createUninitialized(input.shape());
            Nd4j.getExecutioner().exec(new BroadcastMulOp(input, mask, masked2, 0, 2));
            INDArray abs = Transforms.abs(masked2, true);
            Transforms.pow(abs, pnorm, false);
            INDArray pNorm = Transforms.pow(abs.sum(2), 1.0 / pnorm);
            INDArray numerator;
            if (pnorm == 2) {
                numerator = input.dup();
            } else {
                INDArray absp2 = Transforms.pow(Transforms.abs(input, true), pnorm - 2, false);
                numerator = input.mul(absp2);
            }
            INDArray denom = Transforms.pow(pNorm, pnorm - 1, false);
            denom.rdivi(epsilon2d);
            Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(numerator, denom, numerator, 0, 1));
            //Apply mask
            Nd4j.getExecutioner().exec(new BroadcastMulOp(numerator, mask, numerator, 0, 2));
            return numerator;
        case NONE:
            throw new UnsupportedOperationException("NONE pooling type not supported");
        default:
            throw new UnsupportedOperationException("Unknown or not supported pooling type: " + poolingType);
    }
}
Also used : IsMax(org.nd4j.linalg.api.ops.impl.transforms.IsMax) BroadcastAddOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastAddOp) INDArray(org.nd4j.linalg.api.ndarray.INDArray) BroadcastMulOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp) BroadcastCopyOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastCopyOp) BroadcastDivOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastDivOp)

Aggregations

INDArray (org.nd4j.linalg.api.ndarray.INDArray)4 IsMax (org.nd4j.linalg.api.ops.impl.transforms.IsMax)4 BroadcastCopyOp (org.nd4j.linalg.api.ops.impl.broadcast.BroadcastCopyOp)3 BroadcastMulOp (org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp)3 BroadcastAddOp (org.nd4j.linalg.api.ops.impl.broadcast.BroadcastAddOp)2 BroadcastDivOp (org.nd4j.linalg.api.ops.impl.broadcast.BroadcastDivOp)2 Pair (org.deeplearning4j.berkeley.Pair)1 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)1 Gradient (org.deeplearning4j.nn.gradient.Gradient)1