use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class BackPropMLPTest method testIrisMiniBatchGradients.
private static void testIrisMiniBatchGradients(int miniBatchSize, int[] hiddenLayerSizes, Activation activationFunction) {
int totalExamples = 10 * miniBatchSize;
if (totalExamples > 150) {
totalExamples = miniBatchSize * (150 / miniBatchSize);
}
if (miniBatchSize > 150) {
fail();
}
DataSetIterator iris = new IrisDataSetIterator(miniBatchSize, totalExamples);
MultiLayerNetwork network = new MultiLayerNetwork(getIrisMLPSimpleConfig(hiddenLayerSizes, Activation.SIGMOID));
network.init();
Layer[] layers = network.getLayers();
int nLayers = layers.length;
while (iris.hasNext()) {
DataSet data = iris.next();
INDArray x = data.getFeatureMatrix();
INDArray y = data.getLabels();
//Do forward pass:
INDArray[] layerWeights = new INDArray[nLayers];
INDArray[] layerBiases = new INDArray[nLayers];
for (int i = 0; i < nLayers; i++) {
layerWeights[i] = layers[i].getParam(DefaultParamInitializer.WEIGHT_KEY).dup();
layerBiases[i] = layers[i].getParam(DefaultParamInitializer.BIAS_KEY).dup();
}
INDArray[] layerZs = new INDArray[nLayers];
INDArray[] layerActivations = new INDArray[nLayers];
for (int i = 0; i < nLayers; i++) {
INDArray layerInput = (i == 0 ? x : layerActivations[i - 1]);
layerZs[i] = layerInput.mmul(layerWeights[i]).addiRowVector(layerBiases[i]);
layerActivations[i] = (i == nLayers - 1 ? doSoftmax(layerZs[i].dup()) : doSigmoid(layerZs[i].dup()));
}
//Do backward pass:
INDArray[] deltas = new INDArray[nLayers];
//Out - labels; shape=[miniBatchSize,nOut];
deltas[nLayers - 1] = layerActivations[nLayers - 1].sub(y);
assertArrayEquals(deltas[nLayers - 1].shape(), new int[] { miniBatchSize, 3 });
for (int i = nLayers - 2; i >= 0; i--) {
INDArray sigmaPrimeOfZ;
sigmaPrimeOfZ = doSigmoidDerivative(layerZs[i]);
INDArray epsilon = layerWeights[i + 1].mmul(deltas[i + 1].transpose()).transpose();
deltas[i] = epsilon.mul(sigmaPrimeOfZ);
assertArrayEquals(deltas[i].shape(), new int[] { miniBatchSize, hiddenLayerSizes[i] });
}
INDArray[] dLdw = new INDArray[nLayers];
INDArray[] dLdb = new INDArray[nLayers];
for (int i = 0; i < nLayers; i++) {
INDArray prevActivations = (i == 0 ? x : layerActivations[i - 1]);
//Raw gradients, so not yet divided by mini-batch size (division is done in BaseUpdater)
//Shape: [nIn, nOut]
dLdw[i] = deltas[i].transpose().mmul(prevActivations).transpose();
//Shape: [1,nOut]
dLdb[i] = deltas[i].sum(0);
int nIn = (i == 0 ? 4 : hiddenLayerSizes[i - 1]);
int nOut = (i < nLayers - 1 ? hiddenLayerSizes[i] : 3);
assertArrayEquals(dLdw[i].shape(), new int[] { nIn, nOut });
assertArrayEquals(dLdb[i].shape(), new int[] { 1, nOut });
}
//Calculate and get gradient, compare to expected
network.setInput(x);
network.setLabels(y);
network.computeGradientAndScore();
Gradient gradient = network.gradientAndScore().getFirst();
float eps = 1e-4f;
for (int i = 0; i < hiddenLayerSizes.length; i++) {
String wKey = i + "_" + DefaultParamInitializer.WEIGHT_KEY;
String bKey = i + "_" + DefaultParamInitializer.BIAS_KEY;
INDArray wGrad = gradient.getGradientFor(wKey);
INDArray bGrad = gradient.getGradientFor(bKey);
float[] wGradf = asFloat(wGrad);
float[] bGradf = asFloat(bGrad);
float[] expWGradf = asFloat(dLdw[i]);
float[] expBGradf = asFloat(dLdb[i]);
assertArrayEquals(wGradf, expWGradf, eps);
assertArrayEquals(bGradf, expBGradf, eps);
}
}
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class MultiLayerTest method testGradientUpdate.
@Test
public void testGradientUpdate() throws Exception {
DataSetIterator iter = new IrisDataSetIterator(1, 1);
Gradient expectedGradient = new DefaultGradient();
expectedGradient.setGradientFor("0_W", Nd4j.ones(4, 5));
expectedGradient.setGradientFor("0_b", Nd4j.ones(1, 5));
expectedGradient.setGradientFor("1_W", Nd4j.ones(5, 3));
expectedGradient.setGradientFor("1_b", Nd4j.ones(1, 3));
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(org.deeplearning4j.nn.conf.Updater.SGD).learningRate(1).activation(Activation.RELU).weightInit(WeightInit.XAVIER).list().layer(0, new DenseLayer.Builder().name("dnn1").nIn(4).nOut(5).build()).layer(1, new OutputLayer.Builder().name("output").nIn(5).nOut(3).activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER).build()).backprop(true).pretrain(false).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.fit(iter.next());
// TODO validate actual layer gradientView - issue getting var out of BaseLayer w/o adding MLN getter that gets confused with local gradient vars
Gradient actualGradient = net.gradient;
assertNotEquals(expectedGradient.getGradientFor("0_W"), actualGradient.getGradientFor("0_W"));
net.update(expectedGradient);
actualGradient = net.gradient;
assertEquals(expectedGradient.getGradientFor("0_W"), actualGradient.getGradientFor("0_W"));
// Update params with set
net.setParam("0_W", Nd4j.ones(4, 5));
net.setParam("0_b", Nd4j.ones(1, 5));
net.setParam("1_W", Nd4j.ones(5, 3));
net.setParam("1_b", Nd4j.ones(1, 3));
INDArray actualParams = net.params();
// Confirm params
assertEquals(expectedGradient.gradient(), actualParams);
net.update(expectedGradient);
actualParams = net.params();
assertEquals(Nd4j.ones(1, 43).addi(1), actualParams);
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class TestVAE method testParamGradientOrderAndViews.
@Test
public void testParamGradientOrderAndViews() {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder().list().layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder().nIn(10).nOut(5).encoderLayerSizes(12, 13).decoderLayerSizes(14, 15).build()).build();
NeuralNetConfiguration c = mlc.getConf(0);
org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder vae = (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) c.getLayer();
MultiLayerNetwork net = new MultiLayerNetwork(mlc);
net.init();
net.initGradientsView();
org.deeplearning4j.nn.layers.variational.VariationalAutoencoder layer = (org.deeplearning4j.nn.layers.variational.VariationalAutoencoder) net.getLayer(0);
Map<String, INDArray> layerParams = layer.paramTable();
Map<String, INDArray> layerGradViews = layer.getGradientViews();
layer.setInput(Nd4j.rand(3, 10));
layer.computeGradientAndScore();
;
Gradient g = layer.gradient();
Map<String, INDArray> grads = g.gradientForVariable();
assertEquals(layerParams.size(), layerGradViews.size());
assertEquals(layerParams.size(), grads.size());
//Iteration order should be consistent due to linked hashmaps
Iterator<String> pIter = layerParams.keySet().iterator();
Iterator<String> gvIter = layerGradViews.keySet().iterator();
Iterator<String> gIter = grads.keySet().iterator();
while (pIter.hasNext()) {
String p = pIter.next();
String gv = gvIter.next();
String gr = gIter.next();
// System.out.println(p + "\t" + gv + "\t" + gr);
assertEquals(p, gv);
assertEquals(p, gr);
INDArray pArr = layerParams.get(p);
INDArray gvArr = layerGradViews.get(p);
INDArray gArr = grads.get(p);
assertArrayEquals(pArr.shape(), gvArr.shape());
//Should be the exact same object due to view mechanics
assertTrue(gvArr == gArr);
}
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class GravesLSTMTest method testGravesBackwardBasicHelper.
private static void testGravesBackwardBasicHelper(int nIn, int nOut, int lstmNHiddenUnits, int miniBatchSize, int timeSeriesLength) {
INDArray inputData = Nd4j.ones(miniBatchSize, nIn, timeSeriesLength);
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(lstmNHiddenUnits).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).activation(Activation.TANH).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
GravesLSTM lstm = (GravesLSTM) conf.getLayer().instantiate(conf, null, 0, params, true);
lstm.setBackpropGradientsViewArray(Nd4j.create(1, conf.getLayer().initializer().numParams(conf)));
//Set input, do a forward pass:
lstm.activate(inputData);
assertNotNull(lstm.input());
INDArray epsilon = Nd4j.ones(miniBatchSize, lstmNHiddenUnits, timeSeriesLength);
Pair<Gradient, INDArray> out = lstm.backpropGradient(epsilon);
Gradient outGradient = out.getFirst();
INDArray nextEpsilon = out.getSecond();
INDArray biasGradient = outGradient.getGradientFor(GravesLSTMParamInitializer.BIAS_KEY);
INDArray inWeightGradient = outGradient.getGradientFor(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY);
INDArray recurrentWeightGradient = outGradient.getGradientFor(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY);
assertNotNull(biasGradient);
assertNotNull(inWeightGradient);
assertNotNull(recurrentWeightGradient);
assertArrayEquals(biasGradient.shape(), new int[] { 1, 4 * lstmNHiddenUnits });
assertArrayEquals(inWeightGradient.shape(), new int[] { nIn, 4 * lstmNHiddenUnits });
assertArrayEquals(recurrentWeightGradient.shape(), new int[] { lstmNHiddenUnits, 4 * lstmNHiddenUnits + 3 });
assertNotNull(nextEpsilon);
assertArrayEquals(nextEpsilon.shape(), new int[] { miniBatchSize, nIn, timeSeriesLength });
//Check update:
for (String s : outGradient.gradientForVariable().keySet()) {
lstm.update(outGradient.getGradientFor(s), s);
}
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class RBMTests method testComputeGradientAndScore.
@Test
public void testComputeGradientAndScore() {
INDArray input = Nd4j.linspace(1, 10, 10);
INDArray params = getStandardParams(10, 5);
RBM rbm = getRBMLayer(10, 5, HiddenUnit.BINARY, VisibleUnit.BINARY, params, true, false, 1, LossFunctions.LossFunction.MSE);
rbm.setInput(input);
rbm.computeGradientAndScore();
Pair<Gradient, Double> pair = rbm.gradientAndScore();
INDArray hprob = sigmoid(input.mmul(rbm.getParam(PretrainParamInitializer.WEIGHT_KEY)).addiRowVector(rbm.getParam(PretrainParamInitializer.BIAS_KEY)));
INDArray vprob = sigmoid(hprob.mmul(rbm.getParam(PretrainParamInitializer.WEIGHT_KEY).transpose()).addiRowVector(rbm.getParam(PretrainParamInitializer.VISIBLE_BIAS_KEY)));
Distribution dist = Nd4j.getDistributions().createBinomial(1, vprob);
dist.reseedRandomGenerator(42);
INDArray vSample = dist.sample(vprob.shape());
//double expectedScore = LossFunctions.LossFunction.MSE.getILossFunction().computeScore(input, vSample, "sigmoid", null, false);
double expectedScore = LossFunctions.LossFunction.MSE.getILossFunction().computeScore(input, vSample, new ActivationSigmoid(), null, false);
assertEquals(expectedScore, pair.getSecond(), 1e-8);
}
Aggregations