Search in sources :

Example 1 with BroadcastAddOp

use of org.nd4j.linalg.api.ops.impl.broadcast.BroadcastAddOp in project deeplearning4j by deeplearning4j.

the class MaskedReductionUtil method maskedPoolingEpsilonCnn.

public static INDArray maskedPoolingEpsilonCnn(PoolingType poolingType, INDArray input, INDArray mask, INDArray epsilon2d, boolean alongHeight, int pnorm) {
    // [minibatch, depth, h=1, w=X] or [minibatch, depth, h=X, w=1] data
    // with a mask array of shape [minibatch, X]
    //If masking along height: broadcast dimensions are [0,2]
    //If masking along width: broadcast dimensions are [0,3]
    int[] dimensions = (alongHeight ? CNN_DIM_MASK_H : CNN_DIM_MASK_W);
    switch(poolingType) {
        case MAX:
            //TODO This is ugly - replace it with something better... Need something like a Broadcast CAS op
            INDArray negInfMask = Transforms.not(mask);
            BooleanIndexing.replaceWhere(negInfMask, Double.NEGATIVE_INFINITY, Conditions.equals(1.0));
            INDArray withInf = Nd4j.createUninitialized(input.shape());
            Nd4j.getExecutioner().exec(new BroadcastAddOp(input, negInfMask, withInf, dimensions));
            //At this point: all the masked out steps have value -inf, hence can't be the output of the MAX op
            INDArray isMax = Nd4j.getExecutioner().execAndReturn(new IsMax(withInf, 2, 3));
            return Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(isMax, epsilon2d, isMax, 0, 1));
        case AVG:
        case SUM:
            //if out = sum(in,dims) then dL/dIn = dL/dOut -> duplicate to each step and mask
            //if out = avg(in,dims) then dL/dIn = 1/N * dL/dOut
            //With masking: N differs for different time series
            INDArray out = Nd4j.createUninitialized(input.shape(), 'f');
            //Broadcast copy op, then divide and mask to 0 as appropriate
            Nd4j.getExecutioner().exec(new BroadcastCopyOp(out, epsilon2d, out, 0, 1));
            Nd4j.getExecutioner().exec(new BroadcastMulOp(out, mask, out, dimensions));
            if (poolingType == PoolingType.SUM) {
                return out;
            }
            //Note that with CNNs, current design is restricted to [minibatch, depth, 1, W] ot [minibatch, depth, H, 1]
            //[minibatchSize,tsLength] -> [minibatchSize,1]
            INDArray nEachTimeSeries = mask.sum(1);
            Nd4j.getExecutioner().exec(new BroadcastDivOp(out, nEachTimeSeries, out, 0));
            return out;
        case PNORM:
            //Similar to average and sum pooling: there's no N term here, so we can just set the masked values to 0
            INDArray masked2 = Nd4j.createUninitialized(input.shape());
            Nd4j.getExecutioner().exec(new BroadcastMulOp(input, mask, masked2, dimensions));
            INDArray abs = Transforms.abs(masked2, true);
            Transforms.pow(abs, pnorm, false);
            INDArray pNorm = Transforms.pow(abs.sum(2, 3), 1.0 / pnorm);
            INDArray numerator;
            if (pnorm == 2) {
                numerator = input.dup();
            } else {
                INDArray absp2 = Transforms.pow(Transforms.abs(input, true), pnorm - 2, false);
                numerator = input.mul(absp2);
            }
            INDArray denom = Transforms.pow(pNorm, pnorm - 1, false);
            denom.rdivi(epsilon2d);
            Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(numerator, denom, numerator, 0, 1));
            //Apply mask
            Nd4j.getExecutioner().exec(new BroadcastMulOp(numerator, mask, numerator, dimensions));
            return numerator;
        case NONE:
            throw new UnsupportedOperationException("NONE pooling type not supported");
        default:
            throw new UnsupportedOperationException("Unknown or not supported pooling type: " + poolingType);
    }
}
Also used : IsMax(org.nd4j.linalg.api.ops.impl.transforms.IsMax) BroadcastAddOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastAddOp) INDArray(org.nd4j.linalg.api.ndarray.INDArray) BroadcastMulOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp) BroadcastCopyOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastCopyOp) BroadcastDivOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastDivOp)

Example 2 with BroadcastAddOp

use of org.nd4j.linalg.api.ops.impl.broadcast.BroadcastAddOp in project deeplearning4j by deeplearning4j.

the class MaskedReductionUtil method maskedPoolingConvolution.

public static INDArray maskedPoolingConvolution(PoolingType poolingType, INDArray toReduce, INDArray mask, boolean alongHeight, int pnorm) {
    // [minibatch, depth, h=1, w=X] or [minibatch, depth, h=X, w=1] data
    // with a mask array of shape [minibatch, X]
    //If masking along height: broadcast dimensions are [0,2]
    //If masking along width: broadcast dimensions are [0,3]
    int[] dimensions = (alongHeight ? CNN_DIM_MASK_H : CNN_DIM_MASK_W);
    switch(poolingType) {
        case MAX:
            //TODO This is ugly - replace it with something better... Need something like a Broadcast CAS op
            INDArray negInfMask = Transforms.not(mask);
            BooleanIndexing.replaceWhere(negInfMask, Double.NEGATIVE_INFINITY, Conditions.equals(1.0));
            INDArray withInf = Nd4j.createUninitialized(toReduce.shape());
            Nd4j.getExecutioner().exec(new BroadcastAddOp(toReduce, negInfMask, withInf, dimensions));
            return withInf.max(2, 3);
        case AVG:
        case SUM:
            INDArray masked = Nd4j.createUninitialized(toReduce.shape());
            Nd4j.getExecutioner().exec(new BroadcastMulOp(toReduce, mask, masked, dimensions));
            INDArray summed = masked.sum(2, 3);
            if (poolingType == PoolingType.SUM) {
                return summed;
            }
            INDArray maskCounts = mask.sum(1);
            summed.diviColumnVector(maskCounts);
            return summed;
        case PNORM:
            //Similar to average and sum pooling: there's no N term here, so we can just set the masked values to 0
            INDArray masked2 = Nd4j.createUninitialized(toReduce.shape());
            Nd4j.getExecutioner().exec(new BroadcastMulOp(toReduce, mask, masked2, dimensions));
            INDArray abs = Transforms.abs(masked2, true);
            Transforms.pow(abs, pnorm, false);
            INDArray pNorm = abs.sum(2, 3);
            return Transforms.pow(pNorm, 1.0 / pnorm);
        case NONE:
            throw new UnsupportedOperationException("NONE pooling type not supported");
        default:
            throw new UnsupportedOperationException("Unknown or not supported pooling type: " + poolingType);
    }
}
Also used : BroadcastAddOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastAddOp) INDArray(org.nd4j.linalg.api.ndarray.INDArray) BroadcastMulOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp)

Example 3 with BroadcastAddOp

use of org.nd4j.linalg.api.ops.impl.broadcast.BroadcastAddOp in project deeplearning4j by deeplearning4j.

the class MaskedReductionUtil method maskedPoolingTimeSeries.

public static INDArray maskedPoolingTimeSeries(PoolingType poolingType, INDArray toReduce, INDArray mask, int pnorm) {
    if (toReduce.rank() != 3) {
        throw new IllegalArgumentException("Expect rank 3 array: got " + toReduce.rank());
    }
    if (mask.rank() != 2) {
        throw new IllegalArgumentException("Expect rank 2 array for mask: got " + mask.rank());
    }
    switch(poolingType) {
        case MAX:
            //TODO This is ugly - replace it with something better... Need something like a Broadcast CAS op
            INDArray negInfMask = Transforms.not(mask);
            BooleanIndexing.replaceWhere(negInfMask, Double.NEGATIVE_INFINITY, Conditions.equals(1.0));
            INDArray withInf = Nd4j.createUninitialized(toReduce.shape());
            Nd4j.getExecutioner().exec(new BroadcastAddOp(toReduce, negInfMask, withInf, 0, 2));
            return withInf.max(2);
        case AVG:
        case SUM:
            INDArray masked = Nd4j.createUninitialized(toReduce.shape());
            Nd4j.getExecutioner().exec(new BroadcastMulOp(toReduce, mask, masked, 0, 2));
            INDArray summed = masked.sum(2);
            if (poolingType == PoolingType.SUM) {
                return summed;
            }
            INDArray maskCounts = mask.sum(1);
            summed.diviColumnVector(maskCounts);
            return summed;
        case PNORM:
            //Similar to average and sum pooling: there's no N term here, so we can just set the masked values to 0
            INDArray masked2 = Nd4j.createUninitialized(toReduce.shape());
            Nd4j.getExecutioner().exec(new BroadcastMulOp(toReduce, mask, masked2, 0, 2));
            INDArray abs = Transforms.abs(masked2, true);
            Transforms.pow(abs, pnorm, false);
            INDArray pNorm = abs.sum(2);
            return Transforms.pow(pNorm, 1.0 / pnorm);
        case NONE:
            throw new UnsupportedOperationException("NONE pooling type not supported");
        default:
            throw new UnsupportedOperationException("Unknown or not supported pooling type: " + poolingType);
    }
}
Also used : BroadcastAddOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastAddOp) INDArray(org.nd4j.linalg.api.ndarray.INDArray) BroadcastMulOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp)

Example 4 with BroadcastAddOp

use of org.nd4j.linalg.api.ops.impl.broadcast.BroadcastAddOp in project deeplearning4j by deeplearning4j.

the class BatchNormalization method preOutput.

public INDArray preOutput(INDArray x, TrainingMode training) {
    INDArray activations;
    // TODO add this directly in layer or get the layer prior...
    // batchnorm true but need to clarify if activation before or after
    org.deeplearning4j.nn.conf.layers.BatchNormalization layerConf = layerConf();
    int[] shape = getShape(x);
    // xHat = (x-xmean) / sqrt(var + epsilon)
    //Note that for CNNs, mean and variance are calculated per feature map (i.e., per activation) rather than per activation
    //Pg5 of http://arxiv.org/pdf/1502.03167v3.pdf
    // "For convolutional layers, we additionally want the normalization to obey the convolutional property – so that
    //  different elements of the same feature map, at different locations, are normalized in the same way. To achieve
    //  this, we jointly normalize all the activations in a minibatch, over all locations."
    INDArray mean, var;
    if (training == TrainingMode.TRAIN) {
        switch(x.rank()) {
            case 2:
                // mean and variance over samples in batch
                mean = x.mean(0);
                var = x.var(false, 0);
                break;
            case 4:
                // mean and variance over samples AND locations
                mean = x.mean(0, 2, 3);
                var = x.var(false, 0, 2, 3);
                break;
            default:
                throw new IllegalStateException("Batch normalization on activations of rank " + x.rank() + " not supported");
        }
        var.addi(layerConf.getEps());
    } else {
        // Global mean and variance estimate - used after training
        mean = getParam(BatchNormalizationParamInitializer.GLOBAL_MEAN);
        var = getParam(BatchNormalizationParamInitializer.GLOBAL_VAR);
    }
    std = Transforms.sqrt(var, true);
    INDArray gamma = null;
    INDArray beta = null;
    INDArray globalMeanView = getParam(BatchNormalizationParamInitializer.GLOBAL_MEAN);
    INDArray globalVarView = getParam(BatchNormalizationParamInitializer.GLOBAL_VAR);
    if (layerConf.isLockGammaBeta()) {
        if (helper != null && input.rank() == 4) {
            //TODO: don't create these each iteration, when using cudnn
            int[] gammaBetaShape = new int[] { 1, layerConf().getNOut() };
            gamma = Nd4j.valueArrayOf(gammaBetaShape, layerConf().getGamma());
            beta = Nd4j.valueArrayOf(gammaBetaShape, layerConf().getBeta());
        }
    } else {
        gamma = getParam(BatchNormalizationParamInitializer.GAMMA);
        beta = getParam(BatchNormalizationParamInitializer.BETA);
    }
    if (helper != null && input.rank() != 4) {
        //Note that cudnn does not support dense (2d) batch norm case as of v5.1
        double decay = layerConf.getDecay();
        INDArray ret = helper.preOutput(x, training == TrainingMode.TRAIN, shape, gamma, beta, globalMeanView, globalVarView, decay, layerConf.getEps());
        if (ret != null) {
            return ret;
        }
    }
    // BN(xk) = gamma*xˆ + β (applying gamma and beta for each activation)
    if (x.rank() == 2) {
        xMu = x.subRowVector(mean);
        xHat = xMu.divRowVector(std);
        if (layerConf.isLockGammaBeta()) {
            //Special case: gamma/beta have fixed values for all outputs
            //Use mul/addi(Number) here to avoid allocating temp arrays of all same value
            double g = layerConf.getGamma();
            double b = layerConf.getBeta();
            if (g != 1.0 && b != 0.0) {
                //Default and most common case: 1.0 and 0.0 for these parameters. No point executing 1 * x + 0 op
                activations = xHat.mul(g).addi(b);
            } else {
                activations = xHat;
            }
        } else {
            //Standard case: gamma and beta are learned per parameter
            activations = xHat.mulRowVector(gamma).addiRowVector(beta);
        }
    } else if (x.rank() == 4) {
        if (!Shape.strideDescendingCAscendingF(x))
            //TODO: temp Workaround for broadcast bug. To be removed when fixed
            x = x.dup();
        xMu = Nd4j.getExecutioner().execAndReturn(new BroadcastSubOp(x, mean, Nd4j.createUninitialized(x.shape(), x.ordering()), 1));
        xHat = Nd4j.getExecutioner().execAndReturn(new BroadcastDivOp(xMu, std, Nd4j.createUninitialized(x.shape(), x.ordering()), 1));
        if (layerConf.isLockGammaBeta()) {
            //Special case: gamma/beta have fixed values for all outputs
            //Use mul/addi(Number) here to avoid allocating temp arrays of all same value
            double g = layerConf.getGamma();
            double b = layerConf.getBeta();
            if (g != 1.0 && b != 0.0) {
                //Default and most common case: 1.0 and 0.0 for these parameters. No point executing 1 * x + 0 op
                activations = xHat.mul(g).addi(b);
            } else {
                activations = xHat;
            }
        } else {
            //Standard case: gamma and beta are learned per parameter
            activations = Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(xHat, gamma, Nd4j.createUninitialized(x.shape(), x.ordering()), 1));
            activations = Nd4j.getExecutioner().execAndReturn(new BroadcastAddOp(activations, beta, activations, 1));
        }
    } else {
        // TODO setup BatchNorm for RNN http://arxiv.org/pdf/1510.01378v1.pdf
        throw new IllegalStateException("The layer prior to BatchNorm in the configuration is not currently supported.");
    }
    // store mean and var if using batch mean while training
    double decay;
    if (training == TrainingMode.TRAIN) {
        if (layerConf.isMinibatch()) {
            //Standard case: Estimate global mean and variance stats by moving average
            //globalMean = decay * globalMean + (1-decay) * minibatchMean
            //globalVar  = decay * globalVar  + (1-decay) * minibatchVar
            //Note that it's safe to do a muli on 'mean' and 'var' variables: can't be the global arrays with training == Trainingmode.TRAIN
            decay = layerConf.getDecay();
            globalMeanView.muli(decay).addi(mean.muli(1 - decay));
            globalVarView.muli(decay).addi(var.muli(1 - decay));
        } else {
            //Special case: doing full-batch (entire data set) training (uncommon; only tiny data sets)
            //In this case, minibatch and global stats are identical. Don't want to use a moving average estimate.
            globalMeanView.assign(mean);
            globalVarView.assign(var);
        }
    }
    return activations;
}
Also used : BroadcastAddOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastAddOp) INDArray(org.nd4j.linalg.api.ndarray.INDArray) BroadcastSubOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastSubOp) BroadcastMulOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp) BroadcastDivOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastDivOp)

Example 5 with BroadcastAddOp

use of org.nd4j.linalg.api.ops.impl.broadcast.BroadcastAddOp in project deeplearning4j by deeplearning4j.

the class BatchNormalization method backpropGradient.

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon) {
    INDArray nextEpsilon;
    int[] shape = getShape(epsilon);
    // number examples in batch
    int batchSize = epsilon.size(0);
    org.deeplearning4j.nn.conf.layers.BatchNormalization layerConf = layerConf();
    INDArray gamma = null;
    INDArray dGammaView;
    INDArray dBetaView;
    INDArray dGlobalMeanView = gradientViews.get(BatchNormalizationParamInitializer.GLOBAL_MEAN);
    INDArray dGlobalVarView = gradientViews.get(BatchNormalizationParamInitializer.GLOBAL_VAR);
    if (layerConf.isLockGammaBeta()) {
        int[] tempShape = new int[] { 1, shape[1] };
        dGammaView = Nd4j.createUninitialized(tempShape, 'c');
        dBetaView = Nd4j.createUninitialized(tempShape, 'c');
    } else {
        gamma = getParam(BatchNormalizationParamInitializer.GAMMA);
        dGammaView = gradientViews.get(BatchNormalizationParamInitializer.GAMMA);
        dBetaView = gradientViews.get(BatchNormalizationParamInitializer.BETA);
    }
    Gradient retGradient = new DefaultGradient();
    if (helper != null && epsilon.rank() == 4) {
        //Note that cudnn does not support dense (2d) batch norm case as of v5.1
        if (layerConf.isLockGammaBeta()) {
            gamma = Nd4j.valueArrayOf(new int[] { 1, shape[1] }, layerConf.getGamma());
        }
        Pair<Gradient, INDArray> ret = helper.backpropGradient(input, epsilon, shape, gamma, dGammaView, dBetaView, layerConf.getEps());
        if (ret != null) {
            return ret;
        }
    }
    if (epsilon.rank() == 2) {
        //TODO: handle fixed beta/gamma case...
        //dL/dGamma = sum_examples dL/dOut .* xHat
        INDArray dGamma = epsilon.mul(xHat).sum(0);
        //dL/dBeta = sum_examples dL/dOut
        INDArray dBeta = epsilon.sum(0);
        INDArray dxhat;
        if (layerConf.isLockGammaBeta()) {
            dxhat = epsilon.mul(layerConf.getGamma());
        } else {
            //Standard case
            //dL/dxHat = dL/dOut . gamma        Shape: [minibatchSize, nOut]
            dxhat = epsilon.mulRowVector(gamma);
        }
        //dL/dVariance
        //Shape: [1, miniBatch]
        INDArray dLdVar = dxhat.mul(xMu).sum(0).muli(-0.5).muli(Transforms.pow(std, -3.0, true));
        //dL/dmu
        INDArray dxmu1 = dxhat.sum(0).divi(std).negi();
        INDArray dxmu2 = xMu.sum(0).muli(-2.0 / batchSize).muli(dLdVar);
        //Shape: [1, nOut]
        INDArray dLdmu = dxmu1.addi(dxmu2);
        //Note the array reuse here: dxhat, xMu, dLdVar, dLdmu - all are invalid after this line (but aren't used later anyway)
        INDArray dLdx = dxhat.diviRowVector(std).addi(xMu.muliRowVector(dLdVar.muli(2.0 / batchSize))).addiRowVector(dLdmu.muli(1.0 / batchSize));
        //TODO rework this to avoid the assign here
        dGammaView.assign(dGamma);
        dBetaView.assign(dBeta);
        retGradient.setGradientFor(BatchNormalizationParamInitializer.GAMMA, dGammaView);
        retGradient.setGradientFor(BatchNormalizationParamInitializer.BETA, dBetaView);
        //TODO: do this properly
        dGlobalMeanView.assign(0);
        dGlobalVarView.assign(0);
        retGradient.setGradientFor(BatchNormalizationParamInitializer.GLOBAL_MEAN, dGlobalMeanView);
        retGradient.setGradientFor(BatchNormalizationParamInitializer.GLOBAL_VAR, dGlobalVarView);
        nextEpsilon = dLdx;
    } else if (epsilon.rank() == 4) {
        INDArray dGamma = epsilon.mul(xHat).sum(0, 2, 3);
        INDArray dBeta = epsilon.sum(0, 2, 3);
        INDArray dxhat;
        if (layerConf.isLockGammaBeta()) {
            dxhat = epsilon.mul(layerConf.getGamma());
        } else {
            //Standard case
            dxhat = Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(epsilon, gamma, Nd4j.createUninitialized(epsilon.shape(), epsilon.ordering()), 1));
        }
        //dL/dVariance
        INDArray dLdVar = dxhat.mul(xMu).sum(0, 2, 3).muli(-0.5).muli(Transforms.pow(std, -3.0, true));
        //dL/dmu
        int effectiveBatchSize = input.size(0) * input.size(2) * input.size(3);
        INDArray dxmu1 = dxhat.sum(0, 2, 3).divi(std).negi();
        INDArray dxmu2 = xMu.sum(0, 2, 3).muli(-2.0 / effectiveBatchSize).muli(dLdVar);
        INDArray dLdmu = dxmu1.addi(dxmu2);
        INDArray dLdx = Nd4j.getExecutioner().execAndReturn(new BroadcastDivOp(dxhat, std, dxhat, 1)).addi(Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(xMu, dLdVar.muli(2.0 / effectiveBatchSize), xMu, 1)));
        Nd4j.getExecutioner().execAndReturn(new BroadcastAddOp(dLdx, dLdmu.muli(1.0 / effectiveBatchSize), dLdx, 1));
        //TODO rework this to avoid the assign here
        dGammaView.assign(dGamma);
        dBetaView.assign(dBeta);
        retGradient.setGradientFor(BatchNormalizationParamInitializer.GAMMA, dGammaView);
        retGradient.setGradientFor(BatchNormalizationParamInitializer.BETA, dBetaView);
        //TODO: do this properly
        dGlobalMeanView.assign(0);
        dGlobalVarView.assign(0);
        retGradient.setGradientFor(BatchNormalizationParamInitializer.GLOBAL_MEAN, dGlobalMeanView);
        retGradient.setGradientFor(BatchNormalizationParamInitializer.GLOBAL_VAR, dGlobalVarView);
        nextEpsilon = dLdx;
    } else {
        // TODO setup BatchNorm for RNN http://arxiv.org/pdf/1510.01378v1.pdf
        throw new IllegalStateException("The layer prior to BatchNorm in the configuration is not currently supported.");
    }
    return new Pair<>(retGradient, nextEpsilon);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) BroadcastMulOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp) BroadcastDivOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastDivOp) BroadcastAddOp(org.nd4j.linalg.api.ops.impl.broadcast.BroadcastAddOp) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Pair(org.deeplearning4j.berkeley.Pair)

Aggregations

BroadcastAddOp (org.nd4j.linalg.api.ops.impl.broadcast.BroadcastAddOp)9 BroadcastMulOp (org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp)9 INDArray (org.nd4j.linalg.api.ndarray.INDArray)7 BroadcastDivOp (org.nd4j.linalg.api.ops.impl.broadcast.BroadcastDivOp)5 Gradient (org.deeplearning4j.nn.gradient.Gradient)2 BroadcastCopyOp (org.nd4j.linalg.api.ops.impl.broadcast.BroadcastCopyOp)2 BroadcastSubOp (org.nd4j.linalg.api.ops.impl.broadcast.BroadcastSubOp)2 IsMax (org.nd4j.linalg.api.ops.impl.transforms.IsMax)2 Pair (org.deeplearning4j.berkeley.Pair)1 Layer (org.deeplearning4j.nn.api.Layer)1 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)1 Test (org.junit.Test)1