Search in sources :

Example 1 with IOutputLayer

use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.

the class GradientCheckUtil method checkGradients.

/**
     * Check backprop gradients for a MultiLayerNetwork.
     * @param mln MultiLayerNetwork to test. This must be initialized.
     * @param epsilon Usually on the order/ of 1e-4 or so.
     * @param maxRelError Maximum relative error. Usually < 1e-5 or so, though maybe more for deep networks or those with nonlinear activation
     * @param minAbsoluteError Minimum absolute error to cause a failure. Numerical gradients can be non-zero due to precision issues.
     *                         For example, 0.0 vs. 1e-18: relative error is 1.0, but not really a failure
     * @param print Whether to print full pass/failure details for each parameter gradient
     * @param exitOnFirstError If true: return upon first failure. If false: continue checking even if
     *  one parameter gradient has failed. Typically use false for debugging, true for unit tests.
     * @param input Input array to use for forward pass. May be mini-batch data.
     * @param labels Labels/targets to use to calculate backprop gradient. May be mini-batch data.
     * @return true if gradients are passed, false otherwise.
     */
public static boolean checkGradients(MultiLayerNetwork mln, double epsilon, double maxRelError, double minAbsoluteError, boolean print, boolean exitOnFirstError, INDArray input, INDArray labels) {
    //Basic sanity checks on input:
    if (epsilon <= 0.0 || epsilon > 0.1)
        throw new IllegalArgumentException("Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so");
    if (maxRelError <= 0.0 || maxRelError > 0.25)
        throw new IllegalArgumentException("Invalid maxRelativeError: " + maxRelError);
    if (!(mln.getOutputLayer() instanceof IOutputLayer))
        throw new IllegalArgumentException("Cannot check backprop gradients without OutputLayer");
    //Check network configuration:
    int layerCount = 0;
    for (NeuralNetConfiguration n : mln.getLayerWiseConfigurations().getConfs()) {
        org.deeplearning4j.nn.conf.Updater u = n.getLayer().getUpdater();
        if (u == org.deeplearning4j.nn.conf.Updater.SGD) {
            //Must have LR of 1.0
            double lr = n.getLayer().getLearningRate();
            if (lr != 1.0) {
                throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer " + layerCount + "; got " + u + " with lr=" + lr + " for layer \"" + n.getLayer().getLayerName() + "\"");
            }
        } else if (u != org.deeplearning4j.nn.conf.Updater.NONE) {
            throw new IllegalStateException("Must have Updater.NONE (or SGD + lr=1.0) for layer " + layerCount + "; got " + u);
        }
        double dropout = n.getLayer().getDropOut();
        if (n.isUseRegularization() && dropout != 0.0) {
            throw new IllegalStateException("Must have dropout == 0.0 for gradient checks - got dropout = " + dropout + " for layer " + layerCount);
        }
        IActivation activation = n.getLayer().getActivationFn();
        if (activation != null) {
            if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) {
                log.warn("Layer " + layerCount + " is possibly using an unsuitable activation function: " + activation.getClass() + ". Activation functions for gradient checks must be smooth (like sigmoid, tanh, softmax) and not " + "contain discontinuities like ReLU or LeakyReLU (these may cause spurious failures)");
            }
        }
    }
    mln.setInput(input);
    mln.setLabels(labels);
    mln.computeGradientAndScore();
    Pair<Gradient, Double> gradAndScore = mln.gradientAndScore();
    Updater updater = UpdaterCreator.getUpdater(mln);
    updater.update(mln, gradAndScore.getFirst(), 0, mln.batchSize());
    //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done)
    INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup();
    //need dup: params are a *view* of full parameters
    INDArray originalParams = mln.params().dup();
    int nParams = originalParams.length();
    Map<String, INDArray> paramTable = mln.paramTable();
    List<String> paramNames = new ArrayList<>(paramTable.keySet());
    int[] paramEnds = new int[paramNames.size()];
    paramEnds[0] = paramTable.get(paramNames.get(0)).length();
    for (int i = 1; i < paramEnds.length; i++) {
        paramEnds[i] = paramEnds[i - 1] + paramTable.get(paramNames.get(i)).length();
    }
    int totalNFailures = 0;
    double maxError = 0.0;
    DataSet ds = new DataSet(input, labels);
    int currParamNameIdx = 0;
    //Assumption here: params is a view that we can modify in-place
    INDArray params = mln.params();
    for (int i = 0; i < nParams; i++) {
        //Get param name
        if (i >= paramEnds[currParamNameIdx]) {
            currParamNameIdx++;
        }
        String paramName = paramNames.get(currParamNameIdx);
        //(w+epsilon): Do forward pass and score
        double origValue = params.getDouble(i);
        params.putScalar(i, origValue + epsilon);
        double scorePlus = mln.score(ds, true);
        //(w-epsilon): Do forward pass and score
        params.putScalar(i, origValue - epsilon);
        double scoreMinus = mln.score(ds, true);
        //Reset original param value
        params.putScalar(i, origValue);
        //Calculate numerical parameter gradient:
        double scoreDelta = scorePlus - scoreMinus;
        double numericalGradient = scoreDelta / (2 * epsilon);
        if (Double.isNaN(numericalGradient))
            throw new IllegalStateException("Numerical gradient was NaN for parameter " + i + " of " + nParams);
        double backpropGradient = gradientToCheck.getDouble(i);
        //http://cs231n.github.io/neural-networks-3/#gradcheck
        //use mean centered
        double relError = Math.abs(backpropGradient - numericalGradient) / (Math.abs(numericalGradient) + Math.abs(backpropGradient));
        if (backpropGradient == 0.0 && numericalGradient == 0.0)
            //Edge case: i.e., RNNs with time series length of 1.0
            relError = 0.0;
        if (relError > maxError)
            maxError = relError;
        if (relError > maxRelError || Double.isNaN(relError)) {
            double absError = Math.abs(backpropGradient - numericalGradient);
            if (absError < minAbsoluteError) {
                log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + "; absolute error = " + absError + " < minAbsoluteError = " + minAbsoluteError);
            } else {
                if (print)
                    log.info("Param " + i + " (" + paramName + ") FAILED: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + ", scorePlus=" + scorePlus + ", scoreMinus= " + scoreMinus);
                if (exitOnFirstError)
                    return false;
                totalNFailures++;
            }
        } else if (print) {
            log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError);
        }
    }
    if (print) {
        int nPass = nParams - totalNFailures;
        log.info("GradientCheckUtil.checkGradients(): " + nParams + " params checked, " + nPass + " passed, " + totalNFailures + " failed. Largest relative error = " + maxError);
    }
    return totalNFailures == 0;
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DataSet(org.nd4j.linalg.dataset.DataSet) MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet) ArrayList(java.util.ArrayList) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) IActivation(org.nd4j.linalg.activations.IActivation) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphUpdater(org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater) Updater(org.deeplearning4j.nn.api.Updater) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Example 2 with IOutputLayer

use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.

the class ComputationGraph method score.

/**
     * Sets the input and labels and returns a score for the prediction with respect to the true labels<br>
     *
     * @param dataSet  the data to score
     * @param training whether score is being calculated at training time (true) or test time (false)
     * @return the score for the given input,label pairs
     */
public double score(MultiDataSet dataSet, boolean training) {
    boolean hasMaskArrays = dataSet.hasMaskArrays();
    if (hasMaskArrays) {
        setLayerMaskArrays(dataSet.getFeaturesMaskArrays(), dataSet.getLabelsMaskArrays());
    }
    feedForward(dataSet.getFeatures(), training);
    INDArray[] labels = dataSet.getLabels();
    setLabels(labels);
    //Score: sum of the scores for the various output layers...
    double l1 = calcL1();
    double l2 = calcL2();
    double score = 0.0;
    int i = 0;
    for (String s : configuration.getNetworkOutputs()) {
        Layer outLayer = verticesMap.get(s).getLayer();
        if (outLayer == null || !(outLayer instanceof IOutputLayer)) {
            log.warn("Cannot calculate score: vertex \"" + s + "\" is not an output layer");
            return 0.0;
        }
        IOutputLayer ol = (IOutputLayer) outLayer;
        ol.setLabels(labels[i++]);
        score += ol.computeScore(l1, l2, training);
        //Only want to add l1/l2 once...
        l1 = 0.0;
        l2 = 0.0;
    }
    if (hasMaskArrays)
        clearLayerMaskArrays();
    return score;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) Layer(org.deeplearning4j.nn.api.Layer) FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) RecurrentLayer(org.deeplearning4j.nn.api.layers.RecurrentLayer) FeedForwardLayer(org.deeplearning4j.nn.conf.layers.FeedForwardLayer) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Example 3 with IOutputLayer

use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.

the class ComputationGraph method feedForward.

private Map<String, INDArray> feedForward(boolean train, boolean excludeOutputLayers) {
    Map<String, INDArray> layerActivations = new HashMap<>();
    //Do forward pass according to the topological ordering of the network
    for (int i = 0; i < topologicalOrder.length; i++) {
        GraphVertex current = vertices[topologicalOrder[i]];
        if (current.isInputVertex()) {
            VertexIndices[] inputsTo = current.getOutputVertices();
            INDArray input = inputs[current.getVertexIndex()];
            layerActivations.put(current.getVertexName(), input);
            for (VertexIndices v : inputsTo) {
                int vIdx = v.getVertexIndex();
                int vIdxInputNum = v.getVertexEdgeNumber();
                //This input: the 'vIdxInputNum'th input to vertex 'vIdx'
                vertices[vIdx].setInput(vIdxInputNum, input.dup());
            }
        } else {
            //Do forward pass:
            if (excludeOutputLayers && current.isOutputVertex() && current.hasLayer() && current.getLayer() instanceof IOutputLayer) {
                // we only need to ensure the input to the output layers is set properly
                continue;
            }
            INDArray out = current.doForward(train);
            if (current.hasLayer()) {
                layerActivations.put(current.getVertexName(), out);
            }
            //Now, set the inputs for the next vertices:
            VertexIndices[] outputsTo = current.getOutputVertices();
            if (outputsTo != null) {
                for (VertexIndices v : outputsTo) {
                    int vIdx = v.getVertexIndex();
                    int inputNum = v.getVertexEdgeNumber();
                    //This (jth) connection from the output: is the 'inputNum'th input to vertex 'vIdx'
                    vertices[vIdx].setInput(inputNum, out);
                }
            }
        }
    }
    return layerActivations;
}
Also used : GraphVertex(org.deeplearning4j.nn.graph.vertex.GraphVertex) INDArray(org.nd4j.linalg.api.ndarray.INDArray) VertexIndices(org.deeplearning4j.nn.graph.vertex.VertexIndices) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Example 4 with IOutputLayer

use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.

the class ComputationGraph method calcBackpropGradients.

/**
     * Do backprop (gradient calculation)
     *
     * @param truncatedBPTT    false: normal backprop. true: calculate gradients using truncated BPTT for RNN layers
     * @param externalEpsilons null usually (for typical supervised learning). If not null (and length > 0) then assume that
     *                         the user has provided some errors externally, as they would do for example in reinforcement
     *                         learning situations.
     */
protected void calcBackpropGradients(boolean truncatedBPTT, INDArray... externalEpsilons) {
    if (flattenedGradients == null)
        initGradientsView();
    LinkedList<Triple<String, INDArray, Character>> gradients = new LinkedList<>();
    //Do backprop according to the reverse of the topological ordering of the network
    //If true: already set epsilon for this vertex; later epsilons should be *added* to the existing one, not set
    boolean[] setVertexEpsilon = new boolean[topologicalOrder.length];
    for (int i = topologicalOrder.length - 1; i >= 0; i--) {
        GraphVertex current = vertices[topologicalOrder[i]];
        if (current.isInputVertex())
            //No op
            continue;
        //FIXME: make the frozen vertex feature extraction more flexible
        if (current.hasLayer() && current.getLayer() instanceof FrozenLayer)
            break;
        if (current.isOutputVertex()) {
            //Two reasons for a vertex to be an output vertex:
            //(a) it's an output layer (i.e., instanceof IOutputLayer), or
            //(b) it's a normal layer, but it has been marked as an output layer for use in external errors - for reinforcement learning, for example
            int thisOutputNumber = configuration.getNetworkOutputs().indexOf(current.getVertexName());
            if (current.getLayer() instanceof IOutputLayer) {
                IOutputLayer outputLayer = (IOutputLayer) current.getLayer();
                INDArray currLabels = labels[thisOutputNumber];
                outputLayer.setLabels(currLabels);
            } else {
                current.setEpsilon(externalEpsilons[thisOutputNumber]);
                setVertexEpsilon[topologicalOrder[i]] = true;
            }
        }
        Pair<Gradient, INDArray[]> pair = current.doBackward(truncatedBPTT);
        INDArray[] epsilons = pair.getSecond();
        //Inputs to the current GraphVertex:
        VertexIndices[] inputVertices = current.getInputVertices();
        //Set epsilons for the vertices that provide inputs to this vertex:
        if (inputVertices != null) {
            int j = 0;
            for (VertexIndices v : inputVertices) {
                GraphVertex gv = vertices[v.getVertexIndex()];
                if (setVertexEpsilon[gv.getVertexIndex()]) {
                    //This vertex: must output to multiple vertices... we want to add the epsilons here
                    INDArray currentEps = gv.getEpsilon();
                    //TODO: in some circumstances, it may be safe  to do in-place add (but not always)
                    gv.setEpsilon(currentEps.add(epsilons[j++]));
                } else {
                    gv.setEpsilon(epsilons[j++]);
                }
                setVertexEpsilon[gv.getVertexIndex()] = true;
            }
        }
        if (pair.getFirst() != null) {
            Gradient g = pair.getFirst();
            Map<String, INDArray> map = g.gradientForVariable();
            LinkedList<Triple<String, INDArray, Character>> tempList = new LinkedList<>();
            for (Map.Entry<String, INDArray> entry : map.entrySet()) {
                String origName = entry.getKey();
                String newName = current.getVertexName() + "_" + origName;
                tempList.addFirst(new Triple<>(newName, entry.getValue(), g.flatteningOrderForVariable(origName)));
            }
            for (Triple<String, INDArray, Character> t : tempList) gradients.addFirst(t);
        }
    }
    //Now, add the gradients in the order we need them in for flattening (same as params order)
    Gradient gradient = new DefaultGradient(flattenedGradients);
    for (Triple<String, INDArray, Character> t : gradients) {
        gradient.setGradientFor(t.getFirst(), t.getSecond(), t.getThird());
    }
    this.gradient = gradient;
}
Also used : DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) Gradient(org.deeplearning4j.nn.gradient.Gradient) Triple(org.deeplearning4j.berkeley.Triple) FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) GraphVertex(org.deeplearning4j.nn.graph.vertex.GraphVertex) INDArray(org.nd4j.linalg.api.ndarray.INDArray) VertexIndices(org.deeplearning4j.nn.graph.vertex.VertexIndices) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Example 5 with IOutputLayer

use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.

the class ComputationGraph method evaluate.

/**
     * Evaluate the network (for classification) on the provided data set, with top N accuracy in addition to standard accuracy.
     * For 'standard' accuracy evaluation only, use topN = 1
     *
     * @param iterator   Iterator (data) to evaluate on
     * @param labelsList List of labels. May be null.
     * @param topN       N value for top N accuracy evaluation
     * @return Evaluation object, summarizing the results of the evaluation on the provided DataSetIterator
     */
public Evaluation evaluate(DataSetIterator iterator, List<String> labelsList, int topN) {
    if (layers == null || !(getOutputLayer(0) instanceof IOutputLayer)) {
        throw new IllegalStateException("Cannot evaluate network with no output layer");
    }
    if (labelsList == null)
        labelsList = iterator.getLabels();
    Evaluation e = new Evaluation(labelsList, topN);
    while (iterator.hasNext()) {
        org.nd4j.linalg.dataset.DataSet next = iterator.next();
        if (next.getFeatureMatrix() == null || next.getLabels() == null)
            break;
        INDArray features = next.getFeatures();
        INDArray labels = next.getLabels();
        INDArray[] out;
        out = output(false, features);
        if (labels.rank() == 3)
            e.evalTimeSeries(labels, out[0]);
        else
            e.eval(labels, out[0]);
    }
    return e;
}
Also used : Evaluation(org.deeplearning4j.eval.Evaluation) INDArray(org.nd4j.linalg.api.ndarray.INDArray) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Aggregations

IOutputLayer (org.deeplearning4j.nn.api.layers.IOutputLayer)17 INDArray (org.nd4j.linalg.api.ndarray.INDArray)13 FrozenLayer (org.deeplearning4j.nn.layers.FrozenLayer)6 RecurrentLayer (org.deeplearning4j.nn.api.layers.RecurrentLayer)5 FeedForwardLayer (org.deeplearning4j.nn.conf.layers.FeedForwardLayer)5 Gradient (org.deeplearning4j.nn.gradient.Gradient)4 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)3 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)3 Pair (org.deeplearning4j.berkeley.Pair)2 Triple (org.deeplearning4j.berkeley.Triple)2 Layer (org.deeplearning4j.nn.api.Layer)2 InputPreProcessor (org.deeplearning4j.nn.conf.InputPreProcessor)2 InputType (org.deeplearning4j.nn.conf.inputs.InputType)2 GraphVertex (org.deeplearning4j.nn.graph.vertex.GraphVertex)2 VertexIndices (org.deeplearning4j.nn.graph.vertex.VertexIndices)2 ArrayList (java.util.ArrayList)1 Evaluation (org.deeplearning4j.eval.Evaluation)1 Updater (org.deeplearning4j.nn.api.Updater)1 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)1 PreprocessorVertex (org.deeplearning4j.nn.conf.graph.PreprocessorVertex)1