Search in sources :

Example 11 with IOutputLayer

use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.

the class KerasSequentialModel method getMultiLayerConfiguration.

/**
     * Configure a MultiLayerConfiguration from this Keras Sequential model configuration.
     *
     * @return          MultiLayerConfiguration
     */
public MultiLayerConfiguration getMultiLayerConfiguration() throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException {
    if (!this.className.equals(MODEL_CLASS_NAME_SEQUENTIAL))
        throw new InvalidKerasConfigurationException("Keras model class name " + this.className + " incompatible with MultiLayerNetwork");
    if (this.inputLayerNames.size() != 1)
        throw new InvalidKerasConfigurationException("MultiLayeNetwork expects only 1 input (found " + this.inputLayerNames.size() + ")");
    if (this.outputLayerNames.size() != 1)
        throw new InvalidKerasConfigurationException("MultiLayeNetwork expects only 1 output (found " + this.outputLayerNames.size() + ")");
    NeuralNetConfiguration.Builder modelBuilder = new NeuralNetConfiguration.Builder();
    NeuralNetConfiguration.ListBuilder listBuilder = modelBuilder.list();
    /* Add layers one at a time. */
    KerasLayer prevLayer = null;
    int layerIndex = 0;
    for (KerasLayer layer : this.layersOrdered) {
        if (layer.usesRegularization())
            modelBuilder.setUseRegularization(true);
        if (layer.isLayer()) {
            int nbInbound = layer.getInboundLayerNames().size();
            if (nbInbound != 1)
                throw new InvalidKerasConfigurationException("Layers in MultiLayerConfiguration must have exactly one inbound layer (found " + nbInbound + " for layer " + layer.getLayerName() + ")");
            if (prevLayer != null) {
                InputType[] inputTypes = new InputType[1];
                InputPreProcessor preprocessor = null;
                if (prevLayer.isInputPreProcessor()) {
                    inputTypes[0] = this.outputTypes.get(prevLayer.getInboundLayerNames().get(0));
                    preprocessor = prevLayer.getInputPreprocessor(inputTypes);
                } else {
                    inputTypes[0] = this.outputTypes.get(prevLayer.getLayerName());
                    preprocessor = layer.getInputPreprocessor(inputTypes);
                }
                if (preprocessor != null)
                    listBuilder.inputPreProcessor(layerIndex, preprocessor);
            }
            listBuilder.layer(layerIndex++, layer.getLayer());
            if (this.outputLayerNames.contains(layer.getLayerName()) && !(layer.getLayer() instanceof IOutputLayer))
                log.warn("Model cannot be trained: output layer " + layer.getLayerName() + " is not an IOutputLayer (no loss function specified)");
        } else if (layer.getVertex() != null)
            throw new InvalidKerasConfigurationException("Cannot add vertex to MultiLayerConfiguration (class name " + layer.getClassName() + ", layer name " + layer.getLayerName() + ")");
        prevLayer = layer;
    }
    InputType inputType = this.layersOrdered.get(0).getOutputType();
    if (inputType != null)
        listBuilder.setInputType(inputType);
    /* Whether to use standard backprop (or BPTT) or truncated BPTT. */
    if (this.useTruncatedBPTT && this.truncatedBPTT > 0)
        listBuilder.backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(truncatedBPTT).tBPTTBackwardLength(truncatedBPTT);
    else
        listBuilder.backpropType(BackpropType.Standard);
    return listBuilder.build();
}
Also used : InputType(org.deeplearning4j.nn.conf.inputs.InputType) InputPreProcessor(org.deeplearning4j.nn.conf.InputPreProcessor) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Example 12 with IOutputLayer

use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.

the class ComputationGraph method scoreExamples.

/**
     * Calculate the score for each example in a DataSet individually. Unlike {@link #score(MultiDataSet)} and {@link #score(MultiDataSet, boolean)}
     * this method does not average/sum over examples. This method allows for examples to be scored individually (at test time only), which
     * may be useful for example for autoencoder architectures and the like.<br>
     * Each row of the output (assuming addRegularizationTerms == true) is equivalent to calling score(MultiDataSet) with a single example.
     *
     * @param data                   The data to score
     * @param addRegularizationTerms If true: add l1/l2 regularization terms (if any) to the score. If false: don't add regularization terms
     * @return An INDArray (column vector) of size input.numRows(); the ith entry is the score (loss value) of the ith example
     */
public INDArray scoreExamples(MultiDataSet data, boolean addRegularizationTerms) {
    boolean hasMaskArray = data.hasMaskArrays();
    if (hasMaskArray)
        setLayerMaskArrays(data.getFeaturesMaskArrays(), data.getLabelsMaskArrays());
    feedForward(data.getFeatures(), false);
    setLabels(data.getLabels());
    INDArray out = null;
    double l1 = (addRegularizationTerms ? calcL1() : 0.0);
    double l2 = (addRegularizationTerms ? calcL2() : 0.0);
    int i = 0;
    for (String s : configuration.getNetworkOutputs()) {
        Layer outLayer = verticesMap.get(s).getLayer();
        if (outLayer == null || !(outLayer instanceof IOutputLayer)) {
            throw new UnsupportedOperationException("Cannot calculate score: vertex \"" + s + "\" is not an output layer");
        }
        IOutputLayer ol = (IOutputLayer) outLayer;
        ol.setLabels(labels[i++]);
        INDArray scoreCurrLayer = ol.computeScoreForExamples(l1, l2);
        if (out == null)
            out = scoreCurrLayer;
        else
            out.addi(scoreCurrLayer);
        //Only want to add l1/l2 once...
        l1 = 0.0;
        l2 = 0.0;
    }
    if (hasMaskArray)
        clearLayerMaskArrays();
    return out;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) Layer(org.deeplearning4j.nn.api.Layer) FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) RecurrentLayer(org.deeplearning4j.nn.api.layers.RecurrentLayer) FeedForwardLayer(org.deeplearning4j.nn.conf.layers.FeedForwardLayer) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Example 13 with IOutputLayer

use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.

the class MultiLayerNetwork method computeGradientAndScore.

@Override
public void computeGradientAndScore() {
    //Calculate activations (which are stored in each layer, and used in backprop)
    if (layerWiseConfigurations.getBackpropType() == BackpropType.TruncatedBPTT) {
        List<INDArray> activations = rnnActivateUsingStoredState(getInput(), true, true);
        if (trainingListeners.size() > 0) {
            for (TrainingListener tl : trainingListeners) {
                tl.onForwardPass(this, activations);
            }
        }
        truncatedBPTTGradient();
    } else {
        //First: do a feed-forward through the network
        //Note that we don't actually need to do the full forward pass through the output layer right now; but we do
        // need the input to the output layer to be set (such that backprop can be done)
        List<INDArray> activations = feedForwardToLayer(layers.length - 2, true);
        if (trainingListeners.size() > 0) {
            //TODO: We possibly do want output layer activations in some cases here...
            for (TrainingListener tl : trainingListeners) {
                tl.onForwardPass(this, activations);
            }
        }
        INDArray actSecondLastLayer = activations.get(activations.size() - 1);
        if (layerWiseConfigurations.getInputPreProcess(layers.length - 1) != null)
            actSecondLastLayer = layerWiseConfigurations.getInputPreProcess(layers.length - 1).preProcess(actSecondLastLayer, getInputMiniBatchSize());
        getOutputLayer().setInput(actSecondLastLayer);
        //Then: compute gradients
        backprop();
    }
    //Calculate score
    if (!(getOutputLayer() instanceof IOutputLayer)) {
        throw new IllegalStateException("Cannot calculate gradient and score with respect to labels: final layer is not an IOutputLayer");
    }
    score = ((IOutputLayer) getOutputLayer()).computeScore(calcL1(true), calcL2(true), true);
    //Listeners
    if (trainingListeners.size() > 0) {
        for (TrainingListener tl : trainingListeners) {
            tl.onBackwardPass(this);
        }
    }
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) TrainingListener(org.deeplearning4j.optimize.api.TrainingListener) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Example 14 with IOutputLayer

use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.

the class MultiLayerNetwork method finetune.

/**
     * Run SGD based on the given labels
     */
public void finetune() {
    if (!layerWiseConfigurations.isBackprop()) {
        log.warn("Warning: finetune is not applied.");
        return;
    }
    if (!(getOutputLayer() instanceof IOutputLayer)) {
        log.warn("Output layer not instance of output layer returning.");
        return;
    }
    if (flattenedGradients == null)
        initGradientsView();
    if (labels == null)
        throw new IllegalStateException("No labels found");
    log.info("Finetune phase");
    IOutputLayer output = (IOutputLayer) getOutputLayer();
    if (output.conf().getOptimizationAlgo() != OptimizationAlgorithm.HESSIAN_FREE) {
        feedForward();
        output.fit(output.input(), labels);
    } else {
        throw new UnsupportedOperationException();
    }
}
Also used : IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Example 15 with IOutputLayer

use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.

the class MultiLayerNetwork method truncatedBPTTGradient.

/** Equivalent to backprop(), but calculates gradient for truncated BPTT instead. */
protected void truncatedBPTTGradient() {
    if (flattenedGradients == null)
        initGradientsView();
    String multiGradientKey;
    gradient = new DefaultGradient();
    Layer currLayer;
    if (!(getOutputLayer() instanceof IOutputLayer)) {
        log.warn("Warning: final layer isn't output layer. You cannot use backprop (truncated BPTT) without an output layer.");
        return;
    }
    IOutputLayer outputLayer = (IOutputLayer) getOutputLayer();
    if (labels == null)
        throw new IllegalStateException("No labels found");
    if (outputLayer.conf().getLayer().getWeightInit() == WeightInit.ZERO) {
        throw new IllegalStateException("Output layer weights cannot be initialized to zero when using backprop.");
    }
    outputLayer.setLabels(labels);
    //calculate and apply the backward gradient for every layer
    int numLayers = getnLayers();
    //Store gradients is a list; used to ensure iteration order in DefaultGradient linked hash map. i.e., layer 0 first instead of output layer
    LinkedList<Pair<String, INDArray>> gradientList = new LinkedList<>();
    Pair<Gradient, INDArray> currPair = outputLayer.backpropGradient(null);
    for (Map.Entry<String, INDArray> entry : currPair.getFirst().gradientForVariable().entrySet()) {
        multiGradientKey = String.valueOf(numLayers - 1) + "_" + entry.getKey();
        gradientList.addLast(new Pair<>(multiGradientKey, entry.getValue()));
    }
    if (getLayerWiseConfigurations().getInputPreProcess(numLayers - 1) != null)
        currPair = new Pair<>(currPair.getFirst(), this.layerWiseConfigurations.getInputPreProcess(numLayers - 1).backprop(currPair.getSecond(), getInputMiniBatchSize()));
    // Calculate gradients for previous layers & drops output layer in count
    for (int j = numLayers - 2; j >= 0; j--) {
        currLayer = getLayer(j);
        if (currLayer instanceof RecurrentLayer) {
            currPair = ((RecurrentLayer) currLayer).tbpttBackpropGradient(currPair.getSecond(), layerWiseConfigurations.getTbpttBackLength());
        } else {
            currPair = currLayer.backpropGradient(currPair.getSecond());
        }
        LinkedList<Pair<String, INDArray>> tempList = new LinkedList<>();
        for (Map.Entry<String, INDArray> entry : currPair.getFirst().gradientForVariable().entrySet()) {
            multiGradientKey = String.valueOf(j) + "_" + entry.getKey();
            tempList.addFirst(new Pair<>(multiGradientKey, entry.getValue()));
        }
        for (Pair<String, INDArray> pair : tempList) gradientList.addFirst(pair);
        //Pass epsilon through input processor before passing to next layer (if applicable)
        if (getLayerWiseConfigurations().getInputPreProcess(j) != null)
            currPair = new Pair<>(currPair.getFirst(), getLayerWiseConfigurations().getInputPreProcess(j).backprop(currPair.getSecond(), getInputMiniBatchSize()));
    }
    //Add gradients to Gradients, in correct order
    for (Pair<String, INDArray> pair : gradientList) gradient.setGradientFor(pair.getFirst(), pair.getSecond());
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) FeedForwardLayer(org.deeplearning4j.nn.conf.layers.FeedForwardLayer) FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer) RecurrentLayer(org.deeplearning4j.nn.api.layers.RecurrentLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer) RecurrentLayer(org.deeplearning4j.nn.api.layers.RecurrentLayer) Pair(org.deeplearning4j.berkeley.Pair)

Aggregations

IOutputLayer (org.deeplearning4j.nn.api.layers.IOutputLayer)17 INDArray (org.nd4j.linalg.api.ndarray.INDArray)13 FrozenLayer (org.deeplearning4j.nn.layers.FrozenLayer)6 RecurrentLayer (org.deeplearning4j.nn.api.layers.RecurrentLayer)5 FeedForwardLayer (org.deeplearning4j.nn.conf.layers.FeedForwardLayer)5 Gradient (org.deeplearning4j.nn.gradient.Gradient)4 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)3 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)3 Pair (org.deeplearning4j.berkeley.Pair)2 Triple (org.deeplearning4j.berkeley.Triple)2 Layer (org.deeplearning4j.nn.api.Layer)2 InputPreProcessor (org.deeplearning4j.nn.conf.InputPreProcessor)2 InputType (org.deeplearning4j.nn.conf.inputs.InputType)2 GraphVertex (org.deeplearning4j.nn.graph.vertex.GraphVertex)2 VertexIndices (org.deeplearning4j.nn.graph.vertex.VertexIndices)2 ArrayList (java.util.ArrayList)1 Evaluation (org.deeplearning4j.eval.Evaluation)1 Updater (org.deeplearning4j.nn.api.Updater)1 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)1 PreprocessorVertex (org.deeplearning4j.nn.conf.graph.PreprocessorVertex)1