Search in sources :

Example 16 with IOutputLayer

use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.

the class MultiLayerNetwork method calcBackpropGradients.

/** Calculate gradients and errors. Used in two places:
     * (a) backprop (for standard multi layer network learning)
     * (b) backpropGradient (layer method, for when MultiLayerNetwork is used as a layer)
     * @param epsilon Errors (technically errors .* activations). Not used if withOutputLayer = true
     * @param withOutputLayer if true: assume last layer is output layer, and calculate errors based on labels. In this
     *                        case, the epsilon input is not used (may/should be null).
     *                        If false: calculate backprop gradients
     * @return Gradients and the error (epsilon) at the input
     */
protected Pair<Gradient, INDArray> calcBackpropGradients(INDArray epsilon, boolean withOutputLayer) {
    if (flattenedGradients == null)
        initGradientsView();
    String multiGradientKey;
    Gradient gradient = new DefaultGradient(flattenedGradients);
    Layer currLayer;
    //calculate and apply the backward gradient for every layer
    /**
         * Skip the output layer for the indexing and just loop backwards updating the coefficients for each layer.
         * (when withOutputLayer == true)
         *
         * Activate applies the activation function for each layer and sets that as the input for the following layer.
         *
         * Typical literature contains most trivial case for the error calculation: wT * weights
         * This interpretation transpose a few things to get mini batch because ND4J is rows vs columns organization for params
         */
    int numLayers = getnLayers();
    //Store gradients is a list; used to ensure iteration order in DefaultGradient linked hash map. i.e., layer 0 first instead of output layer
    LinkedList<Triple<String, INDArray, Character>> gradientList = new LinkedList<>();
    int layerFrom;
    Pair<Gradient, INDArray> currPair;
    if (withOutputLayer) {
        if (!(getOutputLayer() instanceof IOutputLayer)) {
            log.warn("Warning: final layer isn't output layer. You cannot use backprop without an output layer.");
            return null;
        }
        IOutputLayer outputLayer = (IOutputLayer) getOutputLayer();
        if (labels == null)
            throw new IllegalStateException("No labels found");
        outputLayer.setLabels(labels);
        currPair = outputLayer.backpropGradient(null);
        for (Map.Entry<String, INDArray> entry : currPair.getFirst().gradientForVariable().entrySet()) {
            String origName = entry.getKey();
            multiGradientKey = String.valueOf(numLayers - 1) + "_" + origName;
            gradientList.addLast(new Triple<>(multiGradientKey, entry.getValue(), currPair.getFirst().flatteningOrderForVariable(origName)));
        }
        if (getLayerWiseConfigurations().getInputPreProcess(numLayers - 1) != null)
            currPair = new Pair<>(currPair.getFirst(), this.layerWiseConfigurations.getInputPreProcess(numLayers - 1).backprop(currPair.getSecond(), getInputMiniBatchSize()));
        layerFrom = numLayers - 2;
    } else {
        currPair = new Pair<>(null, epsilon);
        layerFrom = numLayers - 1;
    }
    // Calculate gradients for previous layers & drops output layer in count
    for (int j = layerFrom; j >= 0; j--) {
        currLayer = getLayer(j);
        if (currLayer instanceof FrozenLayer)
            break;
        currPair = currLayer.backpropGradient(currPair.getSecond());
        LinkedList<Triple<String, INDArray, Character>> tempList = new LinkedList<>();
        for (Map.Entry<String, INDArray> entry : currPair.getFirst().gradientForVariable().entrySet()) {
            String origName = entry.getKey();
            multiGradientKey = String.valueOf(j) + "_" + origName;
            tempList.addFirst(new Triple<>(multiGradientKey, entry.getValue(), currPair.getFirst().flatteningOrderForVariable(origName)));
        }
        for (Triple<String, INDArray, Character> triple : tempList) gradientList.addFirst(triple);
        //Pass epsilon through input processor before passing to next layer (if applicable)
        if (getLayerWiseConfigurations().getInputPreProcess(j) != null)
            currPair = new Pair<>(currPair.getFirst(), getLayerWiseConfigurations().getInputPreProcess(j).backprop(currPair.getSecond(), getInputMiniBatchSize()));
    }
    //Add gradients to Gradients (map), in correct order
    for (Triple<String, INDArray, Character> triple : gradientList) {
        gradient.setGradientFor(triple.getFirst(), triple.getSecond(), triple.getThird());
    }
    return new Pair<>(gradient, currPair.getSecond());
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) FeedForwardLayer(org.deeplearning4j.nn.conf.layers.FeedForwardLayer) FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer) RecurrentLayer(org.deeplearning4j.nn.api.layers.RecurrentLayer) Triple(org.deeplearning4j.berkeley.Triple) FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer) Pair(org.deeplearning4j.berkeley.Pair)

Example 17 with IOutputLayer

use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.

the class MultiLayerNetwork method scoreExamples.

/**Calculate the score for each example in a DataSet individually. Unlike {@link #score(DataSet)} and {@link #score(DataSet, boolean)}
     * this method does not average/sum over examples. This method allows for examples to be scored individually (at test time only), which
     * may be useful for example for autoencoder architectures and the like.<br>
     * Each row of the output (assuming addRegularizationTerms == true) is equivalent to calling score(DataSet) with a single example.
     * @param data The data to score
     * @param addRegularizationTerms If true: add l1/l2 regularization terms (if any) to the score. If false: don't add regularization terms
     * @return An INDArray (column vector) of size input.numRows(); the ith entry is the score (loss value) of the ith example
     */
public INDArray scoreExamples(DataSet data, boolean addRegularizationTerms) {
    boolean hasMaskArray = data.hasMaskArrays();
    if (hasMaskArray)
        setLayerMaskArrays(data.getFeaturesMaskArray(), data.getLabelsMaskArray());
    feedForward(data.getFeatureMatrix(), false);
    setLabels(data.getLabels());
    INDArray out;
    if (getOutputLayer() instanceof IOutputLayer) {
        IOutputLayer ol = (IOutputLayer) getOutputLayer();
        ol.setLabels(data.getLabels());
        double l1 = (addRegularizationTerms ? calcL1(true) : 0.0);
        double l2 = (addRegularizationTerms ? calcL2(true) : 0.0);
        out = ol.computeScoreForExamples(l1, l2);
    } else {
        throw new UnsupportedOperationException("Cannot calculate score with respect to labels without an OutputLayer");
    }
    if (hasMaskArray)
        clearLayerMaskArrays();
    return out;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Aggregations

IOutputLayer (org.deeplearning4j.nn.api.layers.IOutputLayer)17 INDArray (org.nd4j.linalg.api.ndarray.INDArray)13 FrozenLayer (org.deeplearning4j.nn.layers.FrozenLayer)6 RecurrentLayer (org.deeplearning4j.nn.api.layers.RecurrentLayer)5 FeedForwardLayer (org.deeplearning4j.nn.conf.layers.FeedForwardLayer)5 Gradient (org.deeplearning4j.nn.gradient.Gradient)4 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)3 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)3 Pair (org.deeplearning4j.berkeley.Pair)2 Triple (org.deeplearning4j.berkeley.Triple)2 Layer (org.deeplearning4j.nn.api.Layer)2 InputPreProcessor (org.deeplearning4j.nn.conf.InputPreProcessor)2 InputType (org.deeplearning4j.nn.conf.inputs.InputType)2 GraphVertex (org.deeplearning4j.nn.graph.vertex.GraphVertex)2 VertexIndices (org.deeplearning4j.nn.graph.vertex.VertexIndices)2 ArrayList (java.util.ArrayList)1 Evaluation (org.deeplearning4j.eval.Evaluation)1 Updater (org.deeplearning4j.nn.api.Updater)1 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)1 PreprocessorVertex (org.deeplearning4j.nn.conf.graph.PreprocessorVertex)1