use of org.deeplearning4j.nn.updater.LayerUpdater in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testUpdaters.
@Test
public void testUpdaters() throws Exception {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS).momentum(0.9).graphBuilder().addInputs(// 40x40x1
"input").addLayer("l0_cnn", new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 }).nOut(100).build(), // out: 40x40x100
"input").addLayer("l1_max", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 }, new int[] { 2, 2 }, new int[] { 1, 1 }).build(), // 21x21x100
"l0_cnn").addLayer("l2_cnn", new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 2, 2 }, new int[] { 1, 1 }).nOut(200).build(), // 11x11x200
"l1_max").addLayer("l3_max", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 3, 3 }, new int[] { 2, 2 }, new int[] { 1, 1 }).build(), // 6x6x200
"l2_cnn").addLayer("l4_fc", new DenseLayer.Builder().nOut(1024).build(), // output: 1x1x1024
"l3_max").addLayer("l5_out", new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(10).activation(Activation.SOFTMAX).build(), "l4_fc").setOutputs("l5_out").backprop(true).pretrain(false).setInputTypes(InputType.convolutional(40, 40, 1)).build();
//First: check that the nIns are set properly...
Map<String, GraphVertex> map = conf.getVertices();
LayerVertex l0_cnn = (LayerVertex) map.get("l0_cnn");
LayerVertex l2_cnn = (LayerVertex) map.get("l2_cnn");
LayerVertex l4_fc = (LayerVertex) map.get("l4_fc");
LayerVertex l5_out = (LayerVertex) map.get("l5_out");
assertEquals(1, ((FeedForwardLayer) l0_cnn.getLayerConf().getLayer()).getNIn());
assertEquals(100, ((FeedForwardLayer) l2_cnn.getLayerConf().getLayer()).getNIn());
assertEquals(6 * 6 * 200, ((FeedForwardLayer) l4_fc.getLayerConf().getLayer()).getNIn());
assertEquals(1024, ((FeedForwardLayer) l5_out.getLayerConf().getLayer()).getNIn());
//Check updaters state:
ComputationGraph g = new ComputationGraph(conf);
g.init();
g.initGradientsView();
ComputationGraphUpdater updater = g.getUpdater();
//First: get the updaters array
Field layerUpdatersField = updater.getClass().getDeclaredField("layerUpdaters");
layerUpdatersField.setAccessible(true);
org.deeplearning4j.nn.api.Updater[] layerUpdaters = (org.deeplearning4j.nn.api.Updater[]) layerUpdatersField.get(updater);
//And get the map between names and updater indexes
Field layerUpdatersMapField = updater.getClass().getDeclaredField("layerUpdatersMap");
layerUpdatersMapField.setAccessible(true);
Map<String, Integer> layerUpdatersMap = (Map<String, Integer>) layerUpdatersMapField.get(updater);
//Go through each layer; check that the updater state size matches the parameters size
org.deeplearning4j.nn.api.Layer[] layers = g.getLayers();
for (org.deeplearning4j.nn.api.Layer l : layers) {
String layerName = l.conf().getLayer().getLayerName();
int nParams = l.numParams();
Map<String, INDArray> paramTable = l.paramTable();
Map<String, Integer> parameterSizeCounts = new LinkedHashMap<>();
for (Map.Entry<String, INDArray> e : paramTable.entrySet()) {
parameterSizeCounts.put(e.getKey(), e.getValue().length());
}
int updaterIdx = layerUpdatersMap.get(layerName);
org.deeplearning4j.nn.api.Updater u = layerUpdaters[updaterIdx];
LayerUpdater lu = (LayerUpdater) u;
Field updaterForVariableField = LayerUpdater.class.getDeclaredField("updaterForVariable");
updaterForVariableField.setAccessible(true);
Map<String, GradientUpdater> updaterForVariable = (Map<String, GradientUpdater>) updaterForVariableField.get(lu);
Map<String, Integer> updaterStateSizeCounts = new HashMap<>();
for (Map.Entry<String, GradientUpdater> entry : updaterForVariable.entrySet()) {
GradientUpdater gu = entry.getValue();
Nesterovs nesterovs = (Nesterovs) gu;
INDArray v = nesterovs.getV();
int length = (v == null ? -1 : v.length());
updaterStateSizeCounts.put(entry.getKey(), length);
}
//Check subsampling layers:
if (l.numParams() == 0) {
assertEquals(0, updaterForVariable.size());
}
System.out.println(layerName + "\t" + nParams + "\t" + parameterSizeCounts + "\t Updater size: " + updaterStateSizeCounts);
//Now, with nesterov updater: 1 history value per parameter
for (String s : parameterSizeCounts.keySet()) {
int paramSize = parameterSizeCounts.get(s);
int updaterSize = updaterStateSizeCounts.get(s);
assertEquals(layerName + "/" + s, paramSize, updaterSize);
}
}
//minibatch, depth, height, width
INDArray in = Nd4j.create(2, 1, 40, 40);
INDArray l = Nd4j.create(2, 10);
DataSet ds = new DataSet(in, l);
g.fit(ds);
}
use of org.deeplearning4j.nn.updater.LayerUpdater in project deeplearning4j by deeplearning4j.
the class BatchNormalizationTest method testGradientAndUpdaters.
@Test
public void testGradientAndUpdaters() throws Exception {
//Global mean/variance are part of the parameter vector. Expect 0 gradient, and no-op updater for these
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.RMSPROP).seed(12345).list().layer(0, new ConvolutionLayer.Builder().nIn(1).nOut(6).weightInit(WeightInit.XAVIER).activation(Activation.IDENTITY).build()).layer(1, new BatchNormalization.Builder().build()).layer(2, new ActivationLayer.Builder().activation(Activation.LEAKYRELU).build()).layer(3, new DenseLayer.Builder().nOut(10).activation(Activation.LEAKYRELU).build()).layer(4, new BatchNormalization.Builder().build()).layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nOut(10).build()).backprop(true).pretrain(false).setInputType(InputType.convolutionalFlat(28, 28, 1)).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
DataSetIterator iter = new MnistDataSetIterator(16, true, 12345);
DataSet ds = iter.next();
net.setInput(ds.getFeatures());
net.setLabels(ds.getLabels());
net.computeGradientAndScore();
Gradient g = net.gradient();
Map<String, INDArray> map = g.gradientForVariable();
for (String s : map.keySet()) {
INDArray grad = map.get(s);
if (s.endsWith(BatchNormalizationParamInitializer.GLOBAL_MEAN) || s.endsWith(BatchNormalizationParamInitializer.GLOBAL_VAR)) {
assertEquals(Nd4j.zeros(grad.shape()), grad);
}
}
org.deeplearning4j.nn.api.Updater u = net.getUpdater();
Field f = MultiLayerUpdater.class.getDeclaredField("layerUpdaters");
f.setAccessible(true);
org.deeplearning4j.nn.api.Updater[] updaters = (org.deeplearning4j.nn.api.Updater[]) f.get(u);
assertNotNull(updaters);
assertEquals(6, updaters.length);
for (int i = 0; i <= 5; i++) {
LayerUpdater lu = (LayerUpdater) updaters[i];
Map<String, GradientUpdater> guMap = lu.getUpdaterForVariable();
for (Map.Entry<String, GradientUpdater> entry : guMap.entrySet()) {
if (i == 1 || i == 4) {
String param = entry.getKey();
if (BatchNormalizationParamInitializer.GLOBAL_MEAN.equals(param) || BatchNormalizationParamInitializer.GLOBAL_VAR.equals(param)) {
assertTrue(entry.getValue() instanceof NoOpUpdater);
} else {
assertTrue(entry.getValue() instanceof RmsProp);
}
} else {
assertTrue(entry.getValue() instanceof RmsProp);
}
}
}
}
Aggregations