Search in sources :

Example 1 with Updater

use of org.deeplearning4j.nn.conf.Updater in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testUpdaters.

@Test
public void testUpdaters() throws Exception {
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS).momentum(0.9).graphBuilder().addInputs(// 40x40x1
    "input").addLayer("l0_cnn", new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 }).nOut(100).build(), // out: 40x40x100
    "input").addLayer("l1_max", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 }, new int[] { 2, 2 }, new int[] { 1, 1 }).build(), // 21x21x100
    "l0_cnn").addLayer("l2_cnn", new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 2, 2 }, new int[] { 1, 1 }).nOut(200).build(), // 11x11x200
    "l1_max").addLayer("l3_max", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 3, 3 }, new int[] { 2, 2 }, new int[] { 1, 1 }).build(), // 6x6x200
    "l2_cnn").addLayer("l4_fc", new DenseLayer.Builder().nOut(1024).build(), // output: 1x1x1024
    "l3_max").addLayer("l5_out", new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(10).activation(Activation.SOFTMAX).build(), "l4_fc").setOutputs("l5_out").backprop(true).pretrain(false).setInputTypes(InputType.convolutional(40, 40, 1)).build();
    //First: check that the nIns are set properly...
    Map<String, GraphVertex> map = conf.getVertices();
    LayerVertex l0_cnn = (LayerVertex) map.get("l0_cnn");
    LayerVertex l2_cnn = (LayerVertex) map.get("l2_cnn");
    LayerVertex l4_fc = (LayerVertex) map.get("l4_fc");
    LayerVertex l5_out = (LayerVertex) map.get("l5_out");
    assertEquals(1, ((FeedForwardLayer) l0_cnn.getLayerConf().getLayer()).getNIn());
    assertEquals(100, ((FeedForwardLayer) l2_cnn.getLayerConf().getLayer()).getNIn());
    assertEquals(6 * 6 * 200, ((FeedForwardLayer) l4_fc.getLayerConf().getLayer()).getNIn());
    assertEquals(1024, ((FeedForwardLayer) l5_out.getLayerConf().getLayer()).getNIn());
    //Check updaters state:
    ComputationGraph g = new ComputationGraph(conf);
    g.init();
    g.initGradientsView();
    ComputationGraphUpdater updater = g.getUpdater();
    //First: get the updaters array
    Field layerUpdatersField = updater.getClass().getDeclaredField("layerUpdaters");
    layerUpdatersField.setAccessible(true);
    org.deeplearning4j.nn.api.Updater[] layerUpdaters = (org.deeplearning4j.nn.api.Updater[]) layerUpdatersField.get(updater);
    //And get the map between names and updater indexes
    Field layerUpdatersMapField = updater.getClass().getDeclaredField("layerUpdatersMap");
    layerUpdatersMapField.setAccessible(true);
    Map<String, Integer> layerUpdatersMap = (Map<String, Integer>) layerUpdatersMapField.get(updater);
    //Go through each layer; check that the updater state size matches the parameters size
    org.deeplearning4j.nn.api.Layer[] layers = g.getLayers();
    for (org.deeplearning4j.nn.api.Layer l : layers) {
        String layerName = l.conf().getLayer().getLayerName();
        int nParams = l.numParams();
        Map<String, INDArray> paramTable = l.paramTable();
        Map<String, Integer> parameterSizeCounts = new LinkedHashMap<>();
        for (Map.Entry<String, INDArray> e : paramTable.entrySet()) {
            parameterSizeCounts.put(e.getKey(), e.getValue().length());
        }
        int updaterIdx = layerUpdatersMap.get(layerName);
        org.deeplearning4j.nn.api.Updater u = layerUpdaters[updaterIdx];
        LayerUpdater lu = (LayerUpdater) u;
        Field updaterForVariableField = LayerUpdater.class.getDeclaredField("updaterForVariable");
        updaterForVariableField.setAccessible(true);
        Map<String, GradientUpdater> updaterForVariable = (Map<String, GradientUpdater>) updaterForVariableField.get(lu);
        Map<String, Integer> updaterStateSizeCounts = new HashMap<>();
        for (Map.Entry<String, GradientUpdater> entry : updaterForVariable.entrySet()) {
            GradientUpdater gu = entry.getValue();
            Nesterovs nesterovs = (Nesterovs) gu;
            INDArray v = nesterovs.getV();
            int length = (v == null ? -1 : v.length());
            updaterStateSizeCounts.put(entry.getKey(), length);
        }
        //Check subsampling layers:
        if (l.numParams() == 0) {
            assertEquals(0, updaterForVariable.size());
        }
        System.out.println(layerName + "\t" + nParams + "\t" + parameterSizeCounts + "\t Updater size: " + updaterStateSizeCounts);
        //Now, with nesterov updater: 1 history value per parameter
        for (String s : parameterSizeCounts.keySet()) {
            int paramSize = parameterSizeCounts.get(s);
            int updaterSize = updaterStateSizeCounts.get(s);
            assertEquals(layerName + "/" + s, paramSize, updaterSize);
        }
    }
    //minibatch, depth, height, width
    INDArray in = Nd4j.create(2, 1, 40, 40);
    INDArray l = Nd4j.create(2, 10);
    DataSet ds = new DataSet(in, l);
    g.fit(ds);
}
Also used : LayerVertex(org.deeplearning4j.nn.conf.graph.LayerVertex) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DataSet(org.nd4j.linalg.dataset.DataSet) ComputationGraphUpdater(org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater) GradientUpdater(org.nd4j.linalg.learning.GradientUpdater) LinkedHashMap(java.util.LinkedHashMap) Field(java.lang.reflect.Field) GraphVertex(org.deeplearning4j.nn.conf.graph.GraphVertex) LayerUpdater(org.deeplearning4j.nn.updater.LayerUpdater) Updater(org.deeplearning4j.nn.conf.Updater) ComputationGraphUpdater(org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater) GradientUpdater(org.nd4j.linalg.learning.GradientUpdater) Nesterovs(org.nd4j.linalg.learning.Nesterovs) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) LayerUpdater(org.deeplearning4j.nn.updater.LayerUpdater) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) Test(org.junit.Test)

Example 2 with Updater

use of org.deeplearning4j.nn.conf.Updater in project deeplearning4j by deeplearning4j.

the class TrainModule method getLayerInfoTable.

private String[][] getLayerInfoTable(int layerIdx, TrainModuleUtils.GraphInfo gi, I18N i18N, boolean noData, StatsStorage ss, String wid) {
    List<String[]> layerInfoRows = new ArrayList<>();
    layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerName"), gi.getVertexNames().get(layerIdx) });
    layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerType"), "" });
    if (!noData) {
        Persistable p = ss.getStaticInfo(currentSessionID, StatsListener.TYPE_ID, wid);
        if (p != null) {
            StatsInitializationReport initReport = (StatsInitializationReport) p;
            String configJson = initReport.getModelConfigJson();
            String modelClass = initReport.getModelClassName();
            //TODO error handling...
            String layerType = "";
            Layer layer = null;
            NeuralNetConfiguration nnc = null;
            if (modelClass.endsWith("MultiLayerNetwork")) {
                MultiLayerConfiguration conf = MultiLayerConfiguration.fromJson(configJson);
                //-1 because of input
                int confIdx = layerIdx - 1;
                if (confIdx >= 0) {
                    nnc = conf.getConf(confIdx);
                    layer = nnc.getLayer();
                } else {
                    //Input layer
                    layerType = "Input";
                }
            } else if (modelClass.endsWith("ComputationGraph")) {
                ComputationGraphConfiguration conf = ComputationGraphConfiguration.fromJson(configJson);
                String vertexName = gi.getVertexNames().get(layerIdx);
                Map<String, GraphVertex> vertices = conf.getVertices();
                if (vertices.containsKey(vertexName) && vertices.get(vertexName) instanceof LayerVertex) {
                    LayerVertex lv = (LayerVertex) vertices.get(vertexName);
                    nnc = lv.getLayerConf();
                    layer = nnc.getLayer();
                } else if (conf.getNetworkInputs().contains(vertexName)) {
                    layerType = "Input";
                } else {
                    GraphVertex gv = conf.getVertices().get(vertexName);
                    if (gv != null) {
                        layerType = gv.getClass().getSimpleName();
                    }
                }
            } else if (modelClass.endsWith("VariationalAutoencoder")) {
                layerType = gi.getVertexTypes().get(layerIdx);
                Map<String, String> map = gi.getVertexInfo().get(layerIdx);
                for (Map.Entry<String, String> entry : map.entrySet()) {
                    layerInfoRows.add(new String[] { entry.getKey(), entry.getValue() });
                }
            }
            if (layer != null) {
                layerType = getLayerType(layer);
            }
            if (layer != null) {
                String activationFn = null;
                if (layer instanceof FeedForwardLayer) {
                    FeedForwardLayer ffl = (FeedForwardLayer) layer;
                    layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerNIn"), String.valueOf(ffl.getNIn()) });
                    layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerSize"), String.valueOf(ffl.getNOut()) });
                    activationFn = layer.getActivationFn().toString();
                }
                int nParams = layer.initializer().numParams(nnc);
                layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerNParams"), String.valueOf(nParams) });
                if (nParams > 0) {
                    WeightInit wi = layer.getWeightInit();
                    String str = wi.toString();
                    if (wi == WeightInit.DISTRIBUTION) {
                        str += layer.getDist();
                    }
                    layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerWeightInit"), str });
                    Updater u = layer.getUpdater();
                    String us = (u == null ? "" : u.toString());
                    layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerUpdater"), us });
                //TODO: Maybe L1/L2, dropout, updater-specific values etc
                }
                if (layer instanceof ConvolutionLayer || layer instanceof SubsamplingLayer) {
                    int[] kernel;
                    int[] stride;
                    int[] padding;
                    if (layer instanceof ConvolutionLayer) {
                        ConvolutionLayer cl = (ConvolutionLayer) layer;
                        kernel = cl.getKernelSize();
                        stride = cl.getStride();
                        padding = cl.getPadding();
                    } else {
                        SubsamplingLayer ssl = (SubsamplingLayer) layer;
                        kernel = ssl.getKernelSize();
                        stride = ssl.getStride();
                        padding = ssl.getPadding();
                        activationFn = null;
                        layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerSubsamplingPoolingType"), ssl.getPoolingType().toString() });
                    }
                    layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerCnnKernel"), Arrays.toString(kernel) });
                    layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerCnnStride"), Arrays.toString(stride) });
                    layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerCnnPadding"), Arrays.toString(padding) });
                }
                if (activationFn != null) {
                    layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerActivationFn"), activationFn });
                }
            }
            layerInfoRows.get(1)[1] = layerType;
        }
    }
    return layerInfoRows.toArray(new String[layerInfoRows.size()][0]);
}
Also used : StatsInitializationReport(org.deeplearning4j.ui.stats.api.StatsInitializationReport) LayerVertex(org.deeplearning4j.nn.conf.graph.LayerVertex) Persistable(org.deeplearning4j.api.storage.Persistable) SubsamplingLayer(org.deeplearning4j.nn.conf.layers.SubsamplingLayer) WeightInit(org.deeplearning4j.nn.weights.WeightInit) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) ConvolutionLayer(org.deeplearning4j.nn.conf.layers.ConvolutionLayer) SubsamplingLayer(org.deeplearning4j.nn.conf.layers.SubsamplingLayer) FeedForwardLayer(org.deeplearning4j.nn.conf.layers.FeedForwardLayer) Layer(org.deeplearning4j.nn.conf.layers.Layer) ConvolutionLayer(org.deeplearning4j.nn.conf.layers.ConvolutionLayer) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) GraphVertex(org.deeplearning4j.nn.conf.graph.GraphVertex) Updater(org.deeplearning4j.nn.conf.Updater) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) FeedForwardLayer(org.deeplearning4j.nn.conf.layers.FeedForwardLayer)

Example 3 with Updater

use of org.deeplearning4j.nn.conf.Updater in project deeplearning4j by deeplearning4j.

the class TestSparkMultiLayerParameterAveraging method testUpdaters.

@Test
public void testUpdaters() {
    SparkDl4jMultiLayer sparkNet = getBasicNetwork();
    MultiLayerNetwork netCopy = sparkNet.getNetwork().clone();
    netCopy.fit(data);
    Updater expectedUpdater = netCopy.conf().getLayer().getUpdater();
    double expectedLR = netCopy.conf().getLayer().getLearningRate();
    double expectedMomentum = netCopy.conf().getLayer().getMomentum();
    Updater actualUpdater = sparkNet.getNetwork().conf().getLayer().getUpdater();
    sparkNet.fit(sparkData);
    double actualLR = sparkNet.getNetwork().conf().getLayer().getLearningRate();
    double actualMomentum = sparkNet.getNetwork().conf().getLayer().getMomentum();
    assertEquals(expectedUpdater, actualUpdater);
    assertEquals(expectedLR, actualLR, 0.01);
    assertEquals(expectedMomentum, actualMomentum, 0.01);
}
Also used : Updater(org.deeplearning4j.nn.conf.Updater) SparkDl4jMultiLayer(org.deeplearning4j.spark.impl.multilayer.SparkDl4jMultiLayer) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) BaseSparkTest(org.deeplearning4j.spark.BaseSparkTest) Test(org.junit.Test)

Example 4 with Updater

use of org.deeplearning4j.nn.conf.Updater in project deeplearning4j by deeplearning4j.

the class BatchNormalizationTest method testGradientAndUpdaters.

@Test
public void testGradientAndUpdaters() throws Exception {
    //Global mean/variance are part of the parameter vector. Expect 0 gradient, and no-op updater for these
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.RMSPROP).seed(12345).list().layer(0, new ConvolutionLayer.Builder().nIn(1).nOut(6).weightInit(WeightInit.XAVIER).activation(Activation.IDENTITY).build()).layer(1, new BatchNormalization.Builder().build()).layer(2, new ActivationLayer.Builder().activation(Activation.LEAKYRELU).build()).layer(3, new DenseLayer.Builder().nOut(10).activation(Activation.LEAKYRELU).build()).layer(4, new BatchNormalization.Builder().build()).layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nOut(10).build()).backprop(true).pretrain(false).setInputType(InputType.convolutionalFlat(28, 28, 1)).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    DataSetIterator iter = new MnistDataSetIterator(16, true, 12345);
    DataSet ds = iter.next();
    net.setInput(ds.getFeatures());
    net.setLabels(ds.getLabels());
    net.computeGradientAndScore();
    Gradient g = net.gradient();
    Map<String, INDArray> map = g.gradientForVariable();
    for (String s : map.keySet()) {
        INDArray grad = map.get(s);
        if (s.endsWith(BatchNormalizationParamInitializer.GLOBAL_MEAN) || s.endsWith(BatchNormalizationParamInitializer.GLOBAL_VAR)) {
            assertEquals(Nd4j.zeros(grad.shape()), grad);
        }
    }
    org.deeplearning4j.nn.api.Updater u = net.getUpdater();
    Field f = MultiLayerUpdater.class.getDeclaredField("layerUpdaters");
    f.setAccessible(true);
    org.deeplearning4j.nn.api.Updater[] updaters = (org.deeplearning4j.nn.api.Updater[]) f.get(u);
    assertNotNull(updaters);
    assertEquals(6, updaters.length);
    for (int i = 0; i <= 5; i++) {
        LayerUpdater lu = (LayerUpdater) updaters[i];
        Map<String, GradientUpdater> guMap = lu.getUpdaterForVariable();
        for (Map.Entry<String, GradientUpdater> entry : guMap.entrySet()) {
            if (i == 1 || i == 4) {
                String param = entry.getKey();
                if (BatchNormalizationParamInitializer.GLOBAL_MEAN.equals(param) || BatchNormalizationParamInitializer.GLOBAL_VAR.equals(param)) {
                    assertTrue(entry.getValue() instanceof NoOpUpdater);
                } else {
                    assertTrue(entry.getValue() instanceof RmsProp);
                }
            } else {
                assertTrue(entry.getValue() instanceof RmsProp);
            }
        }
    }
}
Also used : DataSet(org.nd4j.linalg.dataset.DataSet) BatchNormalization(org.deeplearning4j.nn.conf.layers.BatchNormalization) Field(java.lang.reflect.Field) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) LayerUpdater(org.deeplearning4j.nn.updater.LayerUpdater) Updater(org.deeplearning4j.nn.conf.Updater) MultiLayerUpdater(org.deeplearning4j.nn.updater.MultiLayerUpdater) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Gradient(org.deeplearning4j.nn.gradient.Gradient) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) LayerUpdater(org.deeplearning4j.nn.updater.LayerUpdater) MultiLayerUpdater(org.deeplearning4j.nn.updater.MultiLayerUpdater) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) ListDataSetIterator(org.deeplearning4j.datasets.iterator.impl.ListDataSetIterator) Test(org.junit.Test)

Aggregations

Updater (org.deeplearning4j.nn.conf.Updater)4 Test (org.junit.Test)3 Field (java.lang.reflect.Field)2 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)2 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)2 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)2 GraphVertex (org.deeplearning4j.nn.conf.graph.GraphVertex)2 LayerVertex (org.deeplearning4j.nn.conf.graph.LayerVertex)2 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)2 LayerUpdater (org.deeplearning4j.nn.updater.LayerUpdater)2 INDArray (org.nd4j.linalg.api.ndarray.INDArray)2 DataSet (org.nd4j.linalg.dataset.DataSet)2 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 Persistable (org.deeplearning4j.api.storage.Persistable)1 ListDataSetIterator (org.deeplearning4j.datasets.iterator.impl.ListDataSetIterator)1 MnistDataSetIterator (org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator)1 BatchNormalization (org.deeplearning4j.nn.conf.layers.BatchNormalization)1 ConvolutionLayer (org.deeplearning4j.nn.conf.layers.ConvolutionLayer)1