use of org.deeplearning4j.optimize.api.TrainingListener in project deeplearning4j by deeplearning4j.
the class MultiLayerNetwork method computeGradientAndScore.
@Override
public void computeGradientAndScore() {
//Calculate activations (which are stored in each layer, and used in backprop)
if (layerWiseConfigurations.getBackpropType() == BackpropType.TruncatedBPTT) {
List<INDArray> activations = rnnActivateUsingStoredState(getInput(), true, true);
if (trainingListeners.size() > 0) {
for (TrainingListener tl : trainingListeners) {
tl.onForwardPass(this, activations);
}
}
truncatedBPTTGradient();
} else {
//First: do a feed-forward through the network
//Note that we don't actually need to do the full forward pass through the output layer right now; but we do
// need the input to the output layer to be set (such that backprop can be done)
List<INDArray> activations = feedForwardToLayer(layers.length - 2, true);
if (trainingListeners.size() > 0) {
//TODO: We possibly do want output layer activations in some cases here...
for (TrainingListener tl : trainingListeners) {
tl.onForwardPass(this, activations);
}
}
INDArray actSecondLastLayer = activations.get(activations.size() - 1);
if (layerWiseConfigurations.getInputPreProcess(layers.length - 1) != null)
actSecondLastLayer = layerWiseConfigurations.getInputPreProcess(layers.length - 1).preProcess(actSecondLastLayer, getInputMiniBatchSize());
getOutputLayer().setInput(actSecondLastLayer);
//Then: compute gradients
backprop();
}
//Calculate score
if (!(getOutputLayer() instanceof IOutputLayer)) {
throw new IllegalStateException("Cannot calculate gradient and score with respect to labels: final layer is not an IOutputLayer");
}
score = ((IOutputLayer) getOutputLayer()).computeScore(calcL1(true), calcL2(true), true);
//Listeners
if (trainingListeners.size() > 0) {
for (TrainingListener tl : trainingListeners) {
tl.onBackwardPass(this);
}
}
}
use of org.deeplearning4j.optimize.api.TrainingListener in project deeplearning4j by deeplearning4j.
the class MultiLayerNetwork method setListeners.
@Override
public void setListeners(Collection<IterationListener> listeners) {
this.listeners = listeners;
if (layers == null) {
init();
}
for (Layer layer : layers) {
layer.setListeners(listeners);
}
if (solver != null) {
solver.setListeners(listeners);
}
this.trainingListeners.clear();
if (listeners != null) {
for (IterationListener il : listeners) {
if (il instanceof TrainingListener) {
this.trainingListeners.add((TrainingListener) il);
}
}
}
}
use of org.deeplearning4j.optimize.api.TrainingListener in project deeplearning4j by deeplearning4j.
the class RBM method computeGradientAndScore.
@Override
public void computeGradientAndScore() {
int k = layerConf().getK();
//POSITIVE PHASE
// hprob0, hstate0
Pair<INDArray, INDArray> probHidden = sampleHiddenGivenVisible(input());
/*
* Start the gibbs sampling.
*/
// INDArray chainStart = probHidden.getSecond();
INDArray chainStart = probHidden.getFirst();
/*
* Note that at a later date, we can explore alternative methods of
* storing the chain transitions for different kinds of sampling
* and exploring the search space.
*/
Pair<Pair<INDArray, INDArray>, Pair<INDArray, INDArray>> matrices;
//negative value samples
INDArray negVProb = null;
//negative value samples
INDArray negVSamples = null;
//negative hidden means or expected values
INDArray negHProb = null;
//negative hidden samples
INDArray negHSamples = null;
for (int i = 0; i < k; i++) {
//NEGATIVE PHASE
if (i == 0)
matrices = gibbhVh(chainStart);
else
matrices = gibbhVh(negHSamples);
//get the cost updates for sampling in the chain after k iterations
negVProb = matrices.getFirst().getFirst();
negVSamples = matrices.getFirst().getSecond();
negHProb = matrices.getSecond().getFirst();
negHSamples = matrices.getSecond().getSecond();
}
/*
* Update gradient parameters - note taking mean based on batchsize is handled in LayerUpdater
*/
INDArray wGradient = input().transposei().mmul(probHidden.getFirst()).subi(negVProb.transpose().mmul(negHProb));
INDArray hBiasGradient;
if (layerConf().getSparsity() != 0)
//all hidden units must stay around this number
hBiasGradient = probHidden.getFirst().rsub(layerConf().getSparsity()).sum(0);
else
//update rule: the expected values of the hidden input - the negative hidden means adjusted by the learning rate
hBiasGradient = probHidden.getFirst().sub(negHProb).sum(0);
//update rule: the expected values of the input - the negative samples adjusted by the learning rate
INDArray delta = input.sub(negVProb);
INDArray vBiasGradient = delta.sum(0);
if (conf.isPretrain()) {
wGradient.negi();
hBiasGradient.negi();
vBiasGradient.negi();
}
gradient = createGradient(wGradient, vBiasGradient, hBiasGradient);
// this is compared to input on
setScoreWithZ(negVSamples);
if (trainingListeners != null && trainingListeners.size() > 0) {
for (TrainingListener tl : trainingListeners) {
tl.onBackwardPass(this);
}
}
}
Aggregations