Search in sources :

Example 1 with ComputationGraphUpdater

use of org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testUpdaters.

@Test
public void testUpdaters() throws Exception {
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS).momentum(0.9).graphBuilder().addInputs(// 40x40x1
    "input").addLayer("l0_cnn", new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 }).nOut(100).build(), // out: 40x40x100
    "input").addLayer("l1_max", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 }, new int[] { 2, 2 }, new int[] { 1, 1 }).build(), // 21x21x100
    "l0_cnn").addLayer("l2_cnn", new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 2, 2 }, new int[] { 1, 1 }).nOut(200).build(), // 11x11x200
    "l1_max").addLayer("l3_max", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 3, 3 }, new int[] { 2, 2 }, new int[] { 1, 1 }).build(), // 6x6x200
    "l2_cnn").addLayer("l4_fc", new DenseLayer.Builder().nOut(1024).build(), // output: 1x1x1024
    "l3_max").addLayer("l5_out", new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(10).activation(Activation.SOFTMAX).build(), "l4_fc").setOutputs("l5_out").backprop(true).pretrain(false).setInputTypes(InputType.convolutional(40, 40, 1)).build();
    //First: check that the nIns are set properly...
    Map<String, GraphVertex> map = conf.getVertices();
    LayerVertex l0_cnn = (LayerVertex) map.get("l0_cnn");
    LayerVertex l2_cnn = (LayerVertex) map.get("l2_cnn");
    LayerVertex l4_fc = (LayerVertex) map.get("l4_fc");
    LayerVertex l5_out = (LayerVertex) map.get("l5_out");
    assertEquals(1, ((FeedForwardLayer) l0_cnn.getLayerConf().getLayer()).getNIn());
    assertEquals(100, ((FeedForwardLayer) l2_cnn.getLayerConf().getLayer()).getNIn());
    assertEquals(6 * 6 * 200, ((FeedForwardLayer) l4_fc.getLayerConf().getLayer()).getNIn());
    assertEquals(1024, ((FeedForwardLayer) l5_out.getLayerConf().getLayer()).getNIn());
    //Check updaters state:
    ComputationGraph g = new ComputationGraph(conf);
    g.init();
    g.initGradientsView();
    ComputationGraphUpdater updater = g.getUpdater();
    //First: get the updaters array
    Field layerUpdatersField = updater.getClass().getDeclaredField("layerUpdaters");
    layerUpdatersField.setAccessible(true);
    org.deeplearning4j.nn.api.Updater[] layerUpdaters = (org.deeplearning4j.nn.api.Updater[]) layerUpdatersField.get(updater);
    //And get the map between names and updater indexes
    Field layerUpdatersMapField = updater.getClass().getDeclaredField("layerUpdatersMap");
    layerUpdatersMapField.setAccessible(true);
    Map<String, Integer> layerUpdatersMap = (Map<String, Integer>) layerUpdatersMapField.get(updater);
    //Go through each layer; check that the updater state size matches the parameters size
    org.deeplearning4j.nn.api.Layer[] layers = g.getLayers();
    for (org.deeplearning4j.nn.api.Layer l : layers) {
        String layerName = l.conf().getLayer().getLayerName();
        int nParams = l.numParams();
        Map<String, INDArray> paramTable = l.paramTable();
        Map<String, Integer> parameterSizeCounts = new LinkedHashMap<>();
        for (Map.Entry<String, INDArray> e : paramTable.entrySet()) {
            parameterSizeCounts.put(e.getKey(), e.getValue().length());
        }
        int updaterIdx = layerUpdatersMap.get(layerName);
        org.deeplearning4j.nn.api.Updater u = layerUpdaters[updaterIdx];
        LayerUpdater lu = (LayerUpdater) u;
        Field updaterForVariableField = LayerUpdater.class.getDeclaredField("updaterForVariable");
        updaterForVariableField.setAccessible(true);
        Map<String, GradientUpdater> updaterForVariable = (Map<String, GradientUpdater>) updaterForVariableField.get(lu);
        Map<String, Integer> updaterStateSizeCounts = new HashMap<>();
        for (Map.Entry<String, GradientUpdater> entry : updaterForVariable.entrySet()) {
            GradientUpdater gu = entry.getValue();
            Nesterovs nesterovs = (Nesterovs) gu;
            INDArray v = nesterovs.getV();
            int length = (v == null ? -1 : v.length());
            updaterStateSizeCounts.put(entry.getKey(), length);
        }
        //Check subsampling layers:
        if (l.numParams() == 0) {
            assertEquals(0, updaterForVariable.size());
        }
        System.out.println(layerName + "\t" + nParams + "\t" + parameterSizeCounts + "\t Updater size: " + updaterStateSizeCounts);
        //Now, with nesterov updater: 1 history value per parameter
        for (String s : parameterSizeCounts.keySet()) {
            int paramSize = parameterSizeCounts.get(s);
            int updaterSize = updaterStateSizeCounts.get(s);
            assertEquals(layerName + "/" + s, paramSize, updaterSize);
        }
    }
    //minibatch, depth, height, width
    INDArray in = Nd4j.create(2, 1, 40, 40);
    INDArray l = Nd4j.create(2, 10);
    DataSet ds = new DataSet(in, l);
    g.fit(ds);
}
Also used : LayerVertex(org.deeplearning4j.nn.conf.graph.LayerVertex) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DataSet(org.nd4j.linalg.dataset.DataSet) ComputationGraphUpdater(org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater) GradientUpdater(org.nd4j.linalg.learning.GradientUpdater) LinkedHashMap(java.util.LinkedHashMap) Field(java.lang.reflect.Field) GraphVertex(org.deeplearning4j.nn.conf.graph.GraphVertex) LayerUpdater(org.deeplearning4j.nn.updater.LayerUpdater) Updater(org.deeplearning4j.nn.conf.Updater) ComputationGraphUpdater(org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater) GradientUpdater(org.nd4j.linalg.learning.GradientUpdater) Nesterovs(org.nd4j.linalg.learning.Nesterovs) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) LayerUpdater(org.deeplearning4j.nn.updater.LayerUpdater) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) Test(org.junit.Test)

Example 2 with ComputationGraphUpdater

use of org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater in project deeplearning4j by deeplearning4j.

the class ComputationGraph method clone.

@Override
public ComputationGraph clone() {
    ComputationGraph cg = new ComputationGraph(configuration.clone());
    cg.init(params().dup(), false);
    if (solver != null) {
        //If  solver is null: updater hasn't been initialized -> getUpdater call will force initialization, however
        ComputationGraphUpdater u = this.getUpdater();
        INDArray updaterState = u.getStateViewArray();
        if (updaterState != null) {
            cg.getUpdater().setStateViewArray(updaterState.dup());
        }
    }
    cg.listeners = this.listeners;
    for (int i = 0; i < topologicalOrder.length; i++) {
        if (!vertices[topologicalOrder[i]].hasLayer())
            continue;
        String layerName = vertices[topologicalOrder[i]].getVertexName();
        if (getLayer(layerName) instanceof FrozenLayer) {
            cg.getVertex(layerName).setLayerAsFrozen();
        }
    }
    return cg;
}
Also used : FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphUpdater(org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater)

Example 3 with ComputationGraphUpdater

use of org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater in project deeplearning4j by deeplearning4j.

the class ComputationGraph method getUpdater.

/**
     * Get the ComputationGraphUpdater for the network
     */
public ComputationGraphUpdater getUpdater() {
    if (solver == null) {
        solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this).build();
        solver.getOptimizer().setUpdaterComputationGraph(new ComputationGraphUpdater(this));
    }
    return solver.getOptimizer().getComputationGraphUpdater();
}
Also used : Solver(org.deeplearning4j.optimize.Solver) ComputationGraphUpdater(org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater)

Example 4 with ComputationGraphUpdater

use of org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater in project deeplearning4j by deeplearning4j.

the class GradientCheckUtil method checkGradients.

/**Check backprop gradients for a ComputationGraph
     * @param graph ComputationGraph to test. This must be initialized.
     * @param epsilon Usually on the order of 1e-4 or so.
     * @param maxRelError Maximum relative error. Usually < 0.01, though maybe more for deep networks
     * @param minAbsoluteError Minimum absolute error to cause a failure. Numerical gradients can be non-zero due to precision issues.
     *                         For example, 0.0 vs. 1e-18: relative error is 1.0, but not really a failure
     * @param print Whether to print full pass/failure details for each parameter gradient
     * @param exitOnFirstError If true: return upon first failure. If false: continue checking even if
     *  one parameter gradient has failed. Typically use false for debugging, true for unit tests.
     * @param inputs Input arrays to use for forward pass. May be mini-batch data.
     * @param labels Labels/targets (output) arrays to use to calculate backprop gradient. May be mini-batch data.
     * @return true if gradients are passed, false otherwise.
     */
public static boolean checkGradients(ComputationGraph graph, double epsilon, double maxRelError, double minAbsoluteError, boolean print, boolean exitOnFirstError, INDArray[] inputs, INDArray[] labels) {
    //Basic sanity checks on input:
    if (epsilon <= 0.0 || epsilon > 0.1)
        throw new IllegalArgumentException("Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so");
    if (maxRelError <= 0.0 || maxRelError > 0.25)
        throw new IllegalArgumentException("Invalid maxRelativeError: " + maxRelError);
    if (graph.getNumInputArrays() != inputs.length)
        throw new IllegalArgumentException("Invalid input arrays: expect " + graph.getNumInputArrays() + " inputs");
    if (graph.getNumOutputArrays() != labels.length)
        throw new IllegalArgumentException("Invalid labels arrays: expect " + graph.getNumOutputArrays() + " outputs");
    //Check configuration
    int layerCount = 0;
    for (String vertexName : graph.getConfiguration().getVertices().keySet()) {
        GraphVertex gv = graph.getConfiguration().getVertices().get(vertexName);
        if (!(gv instanceof LayerVertex))
            continue;
        LayerVertex lv = (LayerVertex) gv;
        org.deeplearning4j.nn.conf.Updater u = lv.getLayerConf().getLayer().getUpdater();
        if (u == org.deeplearning4j.nn.conf.Updater.SGD) {
            //Must have LR of 1.0
            double lr = lv.getLayerConf().getLayer().getLearningRate();
            if (lr != 1.0) {
                throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer \"" + vertexName + "\"; got " + u);
            }
        } else if (u != org.deeplearning4j.nn.conf.Updater.NONE) {
            throw new IllegalStateException("Must have Updater.NONE (or SGD + lr=1.0) for layer \"" + vertexName + "\"; got " + u);
        }
        double dropout = lv.getLayerConf().getLayer().getDropOut();
        if (lv.getLayerConf().isUseRegularization() && dropout != 0.0) {
            throw new IllegalStateException("Must have dropout == 0.0 for gradient checks - got dropout = " + dropout + " for layer " + layerCount);
        }
        IActivation activation = lv.getLayerConf().getLayer().getActivationFn();
        if (activation != null) {
            if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) {
                log.warn("Layer \"" + vertexName + "\" is possibly using an unsuitable activation function: " + activation.getClass() + ". Activation functions for gradient checks must be smooth (like sigmoid, tanh, softmax) and not " + "contain discontinuities like ReLU or LeakyReLU (these may cause spurious failures)");
            }
        }
    }
    for (int i = 0; i < inputs.length; i++) graph.setInput(i, inputs[i]);
    for (int i = 0; i < labels.length; i++) graph.setLabel(i, labels[i]);
    graph.computeGradientAndScore();
    Pair<Gradient, Double> gradAndScore = graph.gradientAndScore();
    ComputationGraphUpdater updater = new ComputationGraphUpdater(graph);
    updater.update(graph, gradAndScore.getFirst(), 0, graph.batchSize());
    //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done)
    INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup();
    //need dup: params are a *view* of full parameters
    INDArray originalParams = graph.params().dup();
    int nParams = originalParams.length();
    Map<String, INDArray> paramTable = graph.paramTable();
    List<String> paramNames = new ArrayList<>(paramTable.keySet());
    int[] paramEnds = new int[paramNames.size()];
    paramEnds[0] = paramTable.get(paramNames.get(0)).length();
    for (int i = 1; i < paramEnds.length; i++) {
        paramEnds[i] = paramEnds[i - 1] + paramTable.get(paramNames.get(i)).length();
    }
    int currParamNameIdx = 0;
    int totalNFailures = 0;
    double maxError = 0.0;
    MultiDataSet mds = new MultiDataSet(inputs, labels);
    //Assumption here: params is a view that we can modify in-place
    INDArray params = graph.params();
    for (int i = 0; i < nParams; i++) {
        //Get param name
        if (i >= paramEnds[currParamNameIdx]) {
            currParamNameIdx++;
        }
        String paramName = paramNames.get(currParamNameIdx);
        //(w+epsilon): Do forward pass and score
        double origValue = params.getDouble(i);
        params.putScalar(i, origValue + epsilon);
        //training == true for batch norm, etc (scores and gradients need to be calculated on same thing)
        double scorePlus = graph.score(mds, true);
        //(w-epsilon): Do forward pass and score
        params.putScalar(i, origValue - epsilon);
        double scoreMinus = graph.score(mds, true);
        //Reset original param value
        params.putScalar(i, origValue);
        //Calculate numerical parameter gradient:
        double scoreDelta = scorePlus - scoreMinus;
        double numericalGradient = scoreDelta / (2 * epsilon);
        if (Double.isNaN(numericalGradient))
            throw new IllegalStateException("Numerical gradient was NaN for parameter " + i + " of " + nParams);
        double backpropGradient = gradientToCheck.getDouble(i);
        //http://cs231n.github.io/neural-networks-3/#gradcheck
        //use mean centered
        double relError = Math.abs(backpropGradient - numericalGradient) / (Math.abs(numericalGradient) + Math.abs(backpropGradient));
        if (backpropGradient == 0.0 && numericalGradient == 0.0)
            //Edge case: i.e., RNNs with time series length of 1.0
            relError = 0.0;
        if (relError > maxError)
            maxError = relError;
        if (relError > maxRelError || Double.isNaN(relError)) {
            double absError = Math.abs(backpropGradient - numericalGradient);
            if (absError < minAbsoluteError) {
                log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + "; absolute error = " + absError + " < minAbsoluteError = " + minAbsoluteError);
            } else {
                if (print)
                    log.info("Param " + i + " (" + paramName + ") FAILED: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + ", scorePlus=" + scorePlus + ", scoreMinus= " + scoreMinus);
                if (exitOnFirstError)
                    return false;
                totalNFailures++;
            }
        } else if (print) {
            log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError);
        }
    }
    if (print) {
        int nPass = nParams - totalNFailures;
        log.info("GradientCheckUtil.checkGradients(): " + nParams + " params checked, " + nPass + " passed, " + totalNFailures + " failed. Largest relative error = " + maxError);
    }
    return totalNFailures == 0;
}
Also used : LayerVertex(org.deeplearning4j.nn.conf.graph.LayerVertex) Gradient(org.deeplearning4j.nn.gradient.Gradient) ComputationGraphUpdater(org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater) ArrayList(java.util.ArrayList) IActivation(org.nd4j.linalg.activations.IActivation) GraphVertex(org.deeplearning4j.nn.conf.graph.GraphVertex) INDArray(org.nd4j.linalg.api.ndarray.INDArray) MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet)

Example 5 with ComputationGraphUpdater

use of org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater in project deeplearning4j by deeplearning4j.

the class ParallelWrapper method averageUpdatersState.

private void averageUpdatersState(AtomicInteger locker, double score) {
    if (averageUpdaters) {
        ComputationGraphUpdater updater = ((ComputationGraph) model).getUpdater();
        int batchSize = 0;
        if (updater != null && updater.getStateViewArray() != null) {
            if (!legacyAveraging || Nd4j.getAffinityManager().getNumberOfDevices() == 1) {
                List<INDArray> updaters = new ArrayList<>();
                for (int cnt = 0; cnt < workers && cnt < locker.get(); cnt++) {
                    ComputationGraph workerModel = (ComputationGraph) zoo[cnt].getModel();
                    updaters.add(workerModel.getUpdater().getStateViewArray());
                    batchSize += workerModel.batchSize();
                }
                Nd4j.averageAndPropagate(updater.getStateViewArray(), updaters);
            } else {
                INDArray state = Nd4j.zeros(updater.getStateViewArray().shape());
                int cnt = 0;
                for (; cnt < workers && cnt < locker.get(); cnt++) {
                    ComputationGraph workerModel = (ComputationGraph) zoo[cnt].getModel();
                    state.addi(workerModel.getUpdater().getStateViewArray());
                    batchSize += workerModel.batchSize();
                }
                state.divi(cnt);
                updater.setStateViewArray(state);
            }
        }
    }
    ((ComputationGraph) model).setScore(score);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphUpdater(org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph)

Aggregations

ComputationGraphUpdater (org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater)9 INDArray (org.nd4j.linalg.api.ndarray.INDArray)6 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)4 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)2 GraphVertex (org.deeplearning4j.nn.conf.graph.GraphVertex)2 LayerVertex (org.deeplearning4j.nn.conf.graph.LayerVertex)2 GridExecutioner (org.nd4j.linalg.api.ops.executioner.GridExecutioner)2 Field (java.lang.reflect.Field)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 ZipEntry (java.util.zip.ZipEntry)1 ZipFile (java.util.zip.ZipFile)1 Persistable (org.deeplearning4j.api.storage.Persistable)1 StatsStorageRouter (org.deeplearning4j.api.storage.StatsStorageRouter)1 StorageMetaData (org.deeplearning4j.api.storage.StorageMetaData)1 Layer (org.deeplearning4j.nn.api.Layer)1 Updater (org.deeplearning4j.nn.conf.Updater)1 Gradient (org.deeplearning4j.nn.gradient.Gradient)1