Search in sources :

Example 6 with TrainingListener

use of org.deeplearning4j.optimize.api.TrainingListener in project deeplearning4j by deeplearning4j.

the class MultiLayerNetwork method computeGradientAndScore.

@Override
public void computeGradientAndScore() {
    //Calculate activations (which are stored in each layer, and used in backprop)
    if (layerWiseConfigurations.getBackpropType() == BackpropType.TruncatedBPTT) {
        List<INDArray> activations = rnnActivateUsingStoredState(getInput(), true, true);
        if (trainingListeners.size() > 0) {
            for (TrainingListener tl : trainingListeners) {
                tl.onForwardPass(this, activations);
            }
        }
        truncatedBPTTGradient();
    } else {
        //First: do a feed-forward through the network
        //Note that we don't actually need to do the full forward pass through the output layer right now; but we do
        // need the input to the output layer to be set (such that backprop can be done)
        List<INDArray> activations = feedForwardToLayer(layers.length - 2, true);
        if (trainingListeners.size() > 0) {
            //TODO: We possibly do want output layer activations in some cases here...
            for (TrainingListener tl : trainingListeners) {
                tl.onForwardPass(this, activations);
            }
        }
        INDArray actSecondLastLayer = activations.get(activations.size() - 1);
        if (layerWiseConfigurations.getInputPreProcess(layers.length - 1) != null)
            actSecondLastLayer = layerWiseConfigurations.getInputPreProcess(layers.length - 1).preProcess(actSecondLastLayer, getInputMiniBatchSize());
        getOutputLayer().setInput(actSecondLastLayer);
        //Then: compute gradients
        backprop();
    }
    //Calculate score
    if (!(getOutputLayer() instanceof IOutputLayer)) {
        throw new IllegalStateException("Cannot calculate gradient and score with respect to labels: final layer is not an IOutputLayer");
    }
    score = ((IOutputLayer) getOutputLayer()).computeScore(calcL1(true), calcL2(true), true);
    //Listeners
    if (trainingListeners.size() > 0) {
        for (TrainingListener tl : trainingListeners) {
            tl.onBackwardPass(this);
        }
    }
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) TrainingListener(org.deeplearning4j.optimize.api.TrainingListener) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Example 7 with TrainingListener

use of org.deeplearning4j.optimize.api.TrainingListener in project deeplearning4j by deeplearning4j.

the class MultiLayerNetwork method setListeners.

@Override
public void setListeners(Collection<IterationListener> listeners) {
    this.listeners = listeners;
    if (layers == null) {
        init();
    }
    for (Layer layer : layers) {
        layer.setListeners(listeners);
    }
    if (solver != null) {
        solver.setListeners(listeners);
    }
    this.trainingListeners.clear();
    if (listeners != null) {
        for (IterationListener il : listeners) {
            if (il instanceof TrainingListener) {
                this.trainingListeners.add((TrainingListener) il);
            }
        }
    }
}
Also used : IterationListener(org.deeplearning4j.optimize.api.IterationListener) TrainingListener(org.deeplearning4j.optimize.api.TrainingListener) FeedForwardLayer(org.deeplearning4j.nn.conf.layers.FeedForwardLayer) FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer) RecurrentLayer(org.deeplearning4j.nn.api.layers.RecurrentLayer)

Example 8 with TrainingListener

use of org.deeplearning4j.optimize.api.TrainingListener in project deeplearning4j by deeplearning4j.

the class RBM method computeGradientAndScore.

@Override
public void computeGradientAndScore() {
    int k = layerConf().getK();
    //POSITIVE PHASE
    // hprob0, hstate0
    Pair<INDArray, INDArray> probHidden = sampleHiddenGivenVisible(input());
    /*
         * Start the gibbs sampling.
         */
    //        INDArray chainStart = probHidden.getSecond();
    INDArray chainStart = probHidden.getFirst();
    /*
         * Note that at a later date, we can explore alternative methods of
         * storing the chain transitions for different kinds of sampling
         * and exploring the search space.
         */
    Pair<Pair<INDArray, INDArray>, Pair<INDArray, INDArray>> matrices;
    //negative value samples
    INDArray negVProb = null;
    //negative value samples
    INDArray negVSamples = null;
    //negative hidden means or expected values
    INDArray negHProb = null;
    //negative hidden samples
    INDArray negHSamples = null;
    for (int i = 0; i < k; i++) {
        //NEGATIVE PHASE
        if (i == 0)
            matrices = gibbhVh(chainStart);
        else
            matrices = gibbhVh(negHSamples);
        //get the cost updates for sampling in the chain after k iterations
        negVProb = matrices.getFirst().getFirst();
        negVSamples = matrices.getFirst().getSecond();
        negHProb = matrices.getSecond().getFirst();
        negHSamples = matrices.getSecond().getSecond();
    }
    /*
         * Update gradient parameters - note taking mean based on batchsize is handled in LayerUpdater
         */
    INDArray wGradient = input().transposei().mmul(probHidden.getFirst()).subi(negVProb.transpose().mmul(negHProb));
    INDArray hBiasGradient;
    if (layerConf().getSparsity() != 0)
        //all hidden units must stay around this number
        hBiasGradient = probHidden.getFirst().rsub(layerConf().getSparsity()).sum(0);
    else
        //update rule: the expected values of the hidden input - the negative hidden  means adjusted by the learning rate
        hBiasGradient = probHidden.getFirst().sub(negHProb).sum(0);
    //update rule: the expected values of the input - the negative samples adjusted by the learning rate
    INDArray delta = input.sub(negVProb);
    INDArray vBiasGradient = delta.sum(0);
    if (conf.isPretrain()) {
        wGradient.negi();
        hBiasGradient.negi();
        vBiasGradient.negi();
    }
    gradient = createGradient(wGradient, vBiasGradient, hBiasGradient);
    // this is compared to input on
    setScoreWithZ(negVSamples);
    if (trainingListeners != null && trainingListeners.size() > 0) {
        for (TrainingListener tl : trainingListeners) {
            tl.onBackwardPass(this);
        }
    }
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) TrainingListener(org.deeplearning4j.optimize.api.TrainingListener) Pair(org.deeplearning4j.berkeley.Pair)

Aggregations

TrainingListener (org.deeplearning4j.optimize.api.TrainingListener)8 INDArray (org.nd4j.linalg.api.ndarray.INDArray)5 IOutputLayer (org.deeplearning4j.nn.api.layers.IOutputLayer)3 AsyncDataSetIterator (org.deeplearning4j.datasets.iterator.AsyncDataSetIterator)2 RecurrentLayer (org.deeplearning4j.nn.api.layers.RecurrentLayer)2 FeedForwardLayer (org.deeplearning4j.nn.conf.layers.FeedForwardLayer)2 FrozenLayer (org.deeplearning4j.nn.layers.FrozenLayer)2 Solver (org.deeplearning4j.optimize.Solver)2 IterationListener (org.deeplearning4j.optimize.api.IterationListener)2 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)2 Pair (org.deeplearning4j.berkeley.Pair)1 AsyncMultiDataSetIterator (org.deeplearning4j.datasets.iterator.AsyncMultiDataSetIterator)1 SingletonMultiDataSetIterator (org.deeplearning4j.datasets.iterator.impl.SingletonMultiDataSetIterator)1 Layer (org.deeplearning4j.nn.api.Layer)1 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)1 Gradient (org.deeplearning4j.nn.gradient.Gradient)1 GraphVertex (org.deeplearning4j.nn.graph.vertex.GraphVertex)1 IActivation (org.nd4j.linalg.activations.IActivation)1 Level1 (org.nd4j.linalg.api.blas.Level1)1 DataSet (org.nd4j.linalg.dataset.DataSet)1