use of org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp in project deeplearning4j by deeplearning4j.
the class BatchNormalizationTest method testCnnForwardBackward.
@Test
public void testCnnForwardBackward() {
double eps = 1e-5;
int nIn = 4;
int hw = 3;
int minibatch = 2;
Nd4j.getRandom().setSeed(12345);
INDArray input = Nd4j.rand('c', new int[] { minibatch, nIn, hw, hw });
//TODO: other values for gamma/beta
INDArray gamma = Nd4j.ones(1, nIn);
INDArray beta = Nd4j.zeros(1, nIn);
Layer l = getLayer(nIn, eps, false, -1, -1);
INDArray mean = input.mean(0, 2, 3);
INDArray var = input.var(false, 0, 2, 3);
INDArray xHat = Nd4j.getExecutioner().execAndReturn(new BroadcastSubOp(input, mean, input.dup(), 1));
Nd4j.getExecutioner().execAndReturn(new BroadcastDivOp(xHat, Transforms.sqrt(var.add(eps), true), xHat, 1));
INDArray outExpected = Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(xHat, gamma, xHat.dup(), 1));
Nd4j.getExecutioner().execAndReturn(new BroadcastAddOp(outExpected, beta, outExpected, 1));
INDArray out = l.activate(input, true);
System.out.println(Arrays.toString(outExpected.data().asDouble()));
System.out.println(Arrays.toString(out.data().asDouble()));
assertEquals(outExpected, out);
//-------------------------------------------------------------
//Check backprop
//dL/dy
INDArray epsilon = Nd4j.rand('c', new int[] { minibatch, nIn, hw, hw });
int effectiveMinibatch = minibatch * hw * hw;
INDArray dldgammaExp = epsilon.mul(xHat).sum(0, 2, 3);
INDArray dldbetaExp = epsilon.sum(0, 2, 3);
//epsilon.mulRowVector(gamma);
INDArray dldxhat = Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(epsilon, gamma, epsilon.dup(), 1));
INDArray inputSubMean = Nd4j.getExecutioner().execAndReturn(new BroadcastSubOp(input, mean, input.dup(), 1));
INDArray dldvar = dldxhat.mul(inputSubMean).mul(-0.5);
dldvar = Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(dldvar, Transforms.pow(var.add(eps), -3.0 / 2.0, true), dldvar.dup(), 1));
dldvar = dldvar.sum(0, 2, 3);
INDArray dldmu = Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(dldxhat, Transforms.pow(var.add(eps), -1.0 / 2.0, true), dldxhat.dup(), 1)).neg().sum(0, 2, 3);
dldmu = dldmu.add(dldvar.mul(inputSubMean.mul(-2.0).sum(0, 2, 3).div(effectiveMinibatch)));
INDArray dldinExp = Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(dldxhat, Transforms.pow(var.add(eps), -1.0 / 2.0, true), dldxhat.dup(), 1));
dldinExp = dldinExp.add(Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(inputSubMean.mul(2.0 / effectiveMinibatch), dldvar, inputSubMean.dup(), 1)));
dldinExp = Nd4j.getExecutioner().execAndReturn(new BroadcastAddOp(dldinExp, dldmu.mul(1.0 / effectiveMinibatch), dldinExp.dup(), 1));
Pair<Gradient, INDArray> p = l.backpropGradient(epsilon);
INDArray dldgamma = p.getFirst().getGradientFor("gamma");
INDArray dldbeta = p.getFirst().getGradientFor("beta");
assertEquals(dldgammaExp, dldgamma);
assertEquals(dldbetaExp, dldbeta);
// System.out.println("EPSILONS");
// System.out.println(Arrays.toString(dldinExp.data().asDouble()));
// System.out.println(Arrays.toString(p.getSecond().dup().data().asDouble()));
assertEquals(dldinExp, p.getSecond());
}
use of org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp in project deeplearning4j by deeplearning4j.
the class GlobalPoolingMaskingTests method testMaskingCnnDim3.
@Test
public void testMaskingCnnDim3() {
//Test masking, where mask is along dimension 3
int minibatch = 3;
int depthIn = 3;
int depthOut = 4;
int nOut = 5;
int height = 3;
int width = 6;
PoolingType[] poolingTypes = new PoolingType[] { PoolingType.SUM, PoolingType.AVG, PoolingType.MAX, PoolingType.PNORM };
for (PoolingType pt : poolingTypes) {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).convolutionMode(ConvolutionMode.Same).seed(12345L).list().layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(height, 2).stride(height, 1).activation(Activation.TANH).build()).layer(1, new org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.Builder().poolingType(pt).build()).layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(depthOut).nOut(nOut).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
INDArray inToBeMasked = Nd4j.rand(new int[] { minibatch, depthIn, height, width });
//Shape for mask: [minibatch, width]
INDArray maskArray = Nd4j.create(new double[][] { { 1, 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 1, 0 }, { 1, 1, 1, 1, 0, 0 } });
//Multiply the input by the mask array, to ensure the 0s in the mask correspond to 0s in the input vector
// as would be the case in practice...
Nd4j.getExecutioner().exec(new BroadcastMulOp(inToBeMasked, maskArray, inToBeMasked, 0, 3));
net.setLayerMaskArrays(maskArray, null);
INDArray outMasked = net.output(inToBeMasked);
net.clearLayerMaskArrays();
for (int i = 0; i < minibatch; i++) {
System.out.println(i);
int numSteps = width - i;
INDArray subset = inToBeMasked.get(NDArrayIndex.interval(i, i, true), NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, numSteps));
assertArrayEquals(new int[] { 1, depthIn, height, width - i }, subset.shape());
INDArray outSubset = net.output(subset);
INDArray outMaskedSubset = outMasked.getRow(i);
assertEquals(outSubset, outMaskedSubset);
}
}
}
use of org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp in project deeplearning4j by deeplearning4j.
the class GlobalPoolingMaskingTests method testMaskingCnnDim2.
@Test
public void testMaskingCnnDim2() {
//Test masking, where mask is along dimension 2
int minibatch = 3;
int depthIn = 3;
int depthOut = 4;
int nOut = 5;
int height = 5;
int width = 4;
PoolingType[] poolingTypes = new PoolingType[] { PoolingType.SUM, PoolingType.AVG, PoolingType.MAX, PoolingType.PNORM };
for (PoolingType pt : poolingTypes) {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).convolutionMode(ConvolutionMode.Same).seed(12345L).list().layer(0, new ConvolutionLayer.Builder().nIn(depthIn).nOut(depthOut).kernelSize(2, width).stride(1, width).activation(Activation.TANH).build()).layer(1, new org.deeplearning4j.nn.conf.layers.GlobalPoolingLayer.Builder().poolingType(pt).build()).layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(depthOut).nOut(nOut).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
INDArray inToBeMasked = Nd4j.rand(new int[] { minibatch, depthIn, height, width });
//Shape for mask: [minibatch, width]
INDArray maskArray = Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 0 }, { 1, 1, 1, 0, 0 } });
//Multiply the input by the mask array, to ensure the 0s in the mask correspond to 0s in the input vector
// as would be the case in practice...
Nd4j.getExecutioner().exec(new BroadcastMulOp(inToBeMasked, maskArray, inToBeMasked, 0, 2));
net.setLayerMaskArrays(maskArray, null);
INDArray outMasked = net.output(inToBeMasked);
net.clearLayerMaskArrays();
for (int i = 0; i < minibatch; i++) {
System.out.println(i);
int numSteps = height - i;
INDArray subset = inToBeMasked.get(NDArrayIndex.interval(i, i, true), NDArrayIndex.all(), NDArrayIndex.interval(0, numSteps), NDArrayIndex.all());
assertArrayEquals(new int[] { 1, depthIn, height - i, width }, subset.shape());
INDArray outSubset = net.output(subset);
INDArray outMaskedSubset = outMasked.getRow(i);
assertEquals(outSubset, outMaskedSubset);
}
}
}
use of org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp in project deeplearning4j by deeplearning4j.
the class L2NormalizeVertex method doBackward.
@Override
public Pair<Gradient, INDArray[]> doBackward(boolean tbptt) {
if (!canDoBackward())
throw new IllegalStateException("Cannot do backward pass: errors not set (L2NormalizeVertex " + vertexName + " idx " + vertexIndex + ")");
INDArray x = inputs[0];
int[] dimensions = getDimensions(x);
INDArray norm = x.norm2(dimensions);
INDArray norm3 = Transforms.pow(norm, 3.0, true);
// in case of div/0
Transforms.max(norm, eps, false);
Transforms.max(norm3, eps, false);
INDArray dLdx;
if (x.rank() == 2) {
// 2D case
dLdx = epsilon.divColumnVector(norm);
INDArray xDivNorm3 = x.divColumnVector(norm3);
dLdx.subi(xDivNorm3.muliColumnVector(epsilon.mul(x).sum(1)));
} else {
//RNN and CNN case - Broadcast along dimension 0
INDArray dx = epsilon.mul(x).sum(dimensions);
//x / |x|_2^3 * sum_k (dLda*x)
INDArray xDivNorm3 = Nd4j.createUninitialized(x.shape(), x.ordering());
Nd4j.getExecutioner().exec(new BroadcastDivOp(x, norm3, xDivNorm3, 0));
Nd4j.getExecutioner().exec(new BroadcastMulOp(xDivNorm3, dx, xDivNorm3, 0));
//1/|x|_2 * dLda - above
dLdx = Nd4j.createUninitialized(epsilon.shape(), epsilon.ordering());
Nd4j.getExecutioner().exec(new BroadcastDivOp(epsilon, norm, dLdx, 0));
dLdx.subi(xDivNorm3);
}
return new Pair<>(null, new INDArray[] { dLdx });
}
use of org.nd4j.linalg.api.ops.impl.broadcast.BroadcastMulOp in project deeplearning4j by deeplearning4j.
the class MaskedReductionUtil method maskedPoolingEpsilonTimeSeries.
public static INDArray maskedPoolingEpsilonTimeSeries(PoolingType poolingType, INDArray input, INDArray mask, INDArray epsilon2d, int pnorm) {
if (input.rank() != 3) {
throw new IllegalArgumentException("Expect rank 3 input activation array: got " + input.rank());
}
if (mask.rank() != 2) {
throw new IllegalArgumentException("Expect rank 2 array for mask: got " + mask.rank());
}
if (epsilon2d.rank() != 2) {
throw new IllegalArgumentException("Expected rank 2 array for errors: got " + epsilon2d.rank());
}
switch(poolingType) {
case MAX:
//TODO This is ugly - replace it with something better... Need something like a Broadcast CAS op
INDArray negInfMask = Transforms.not(mask);
BooleanIndexing.replaceWhere(negInfMask, Double.NEGATIVE_INFINITY, Conditions.equals(1.0));
INDArray withInf = Nd4j.createUninitialized(input.shape());
Nd4j.getExecutioner().exec(new BroadcastAddOp(input, negInfMask, withInf, 0, 2));
//At this point: all the masked out steps have value -inf, hence can't be the output of the MAX op
INDArray isMax = Nd4j.getExecutioner().execAndReturn(new IsMax(withInf, 2));
return Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(isMax, epsilon2d, isMax, 0, 1));
case AVG:
case SUM:
//if out = sum(in,dims) then dL/dIn = dL/dOut -> duplicate to each step and mask
//if out = avg(in,dims) then dL/dIn = 1/N * dL/dOut
//With masking: N differs for different time series
INDArray out = Nd4j.createUninitialized(input.shape(), 'f');
//Broadcast copy op, then divide and mask to 0 as appropriate
Nd4j.getExecutioner().exec(new BroadcastCopyOp(out, epsilon2d, out, 0, 1));
Nd4j.getExecutioner().exec(new BroadcastMulOp(out, mask, out, 0, 2));
if (poolingType == PoolingType.SUM) {
return out;
}
//[minibatchSize,tsLength] -> [minibatchSize,1]
INDArray nEachTimeSeries = mask.sum(1);
Nd4j.getExecutioner().exec(new BroadcastDivOp(out, nEachTimeSeries, out, 0));
return out;
case PNORM:
//Similar to average and sum pooling: there's no N term here, so we can just set the masked values to 0
INDArray masked2 = Nd4j.createUninitialized(input.shape());
Nd4j.getExecutioner().exec(new BroadcastMulOp(input, mask, masked2, 0, 2));
INDArray abs = Transforms.abs(masked2, true);
Transforms.pow(abs, pnorm, false);
INDArray pNorm = Transforms.pow(abs.sum(2), 1.0 / pnorm);
INDArray numerator;
if (pnorm == 2) {
numerator = input.dup();
} else {
INDArray absp2 = Transforms.pow(Transforms.abs(input, true), pnorm - 2, false);
numerator = input.mul(absp2);
}
INDArray denom = Transforms.pow(pNorm, pnorm - 1, false);
denom.rdivi(epsilon2d);
Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(numerator, denom, numerator, 0, 1));
//Apply mask
Nd4j.getExecutioner().exec(new BroadcastMulOp(numerator, mask, numerator, 0, 2));
return numerator;
case NONE:
throw new UnsupportedOperationException("NONE pooling type not supported");
default:
throw new UnsupportedOperationException("Unknown or not supported pooling type: " + poolingType);
}
}
Aggregations