use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.
the class MultiLayerNetwork method calcBackpropGradients.
/** Calculate gradients and errors. Used in two places:
* (a) backprop (for standard multi layer network learning)
* (b) backpropGradient (layer method, for when MultiLayerNetwork is used as a layer)
* @param epsilon Errors (technically errors .* activations). Not used if withOutputLayer = true
* @param withOutputLayer if true: assume last layer is output layer, and calculate errors based on labels. In this
* case, the epsilon input is not used (may/should be null).
* If false: calculate backprop gradients
* @return Gradients and the error (epsilon) at the input
*/
protected Pair<Gradient, INDArray> calcBackpropGradients(INDArray epsilon, boolean withOutputLayer) {
if (flattenedGradients == null)
initGradientsView();
String multiGradientKey;
Gradient gradient = new DefaultGradient(flattenedGradients);
Layer currLayer;
//calculate and apply the backward gradient for every layer
/**
* Skip the output layer for the indexing and just loop backwards updating the coefficients for each layer.
* (when withOutputLayer == true)
*
* Activate applies the activation function for each layer and sets that as the input for the following layer.
*
* Typical literature contains most trivial case for the error calculation: wT * weights
* This interpretation transpose a few things to get mini batch because ND4J is rows vs columns organization for params
*/
int numLayers = getnLayers();
//Store gradients is a list; used to ensure iteration order in DefaultGradient linked hash map. i.e., layer 0 first instead of output layer
LinkedList<Triple<String, INDArray, Character>> gradientList = new LinkedList<>();
int layerFrom;
Pair<Gradient, INDArray> currPair;
if (withOutputLayer) {
if (!(getOutputLayer() instanceof IOutputLayer)) {
log.warn("Warning: final layer isn't output layer. You cannot use backprop without an output layer.");
return null;
}
IOutputLayer outputLayer = (IOutputLayer) getOutputLayer();
if (labels == null)
throw new IllegalStateException("No labels found");
outputLayer.setLabels(labels);
currPair = outputLayer.backpropGradient(null);
for (Map.Entry<String, INDArray> entry : currPair.getFirst().gradientForVariable().entrySet()) {
String origName = entry.getKey();
multiGradientKey = String.valueOf(numLayers - 1) + "_" + origName;
gradientList.addLast(new Triple<>(multiGradientKey, entry.getValue(), currPair.getFirst().flatteningOrderForVariable(origName)));
}
if (getLayerWiseConfigurations().getInputPreProcess(numLayers - 1) != null)
currPair = new Pair<>(currPair.getFirst(), this.layerWiseConfigurations.getInputPreProcess(numLayers - 1).backprop(currPair.getSecond(), getInputMiniBatchSize()));
layerFrom = numLayers - 2;
} else {
currPair = new Pair<>(null, epsilon);
layerFrom = numLayers - 1;
}
// Calculate gradients for previous layers & drops output layer in count
for (int j = layerFrom; j >= 0; j--) {
currLayer = getLayer(j);
if (currLayer instanceof FrozenLayer)
break;
currPair = currLayer.backpropGradient(currPair.getSecond());
LinkedList<Triple<String, INDArray, Character>> tempList = new LinkedList<>();
for (Map.Entry<String, INDArray> entry : currPair.getFirst().gradientForVariable().entrySet()) {
String origName = entry.getKey();
multiGradientKey = String.valueOf(j) + "_" + origName;
tempList.addFirst(new Triple<>(multiGradientKey, entry.getValue(), currPair.getFirst().flatteningOrderForVariable(origName)));
}
for (Triple<String, INDArray, Character> triple : tempList) gradientList.addFirst(triple);
//Pass epsilon through input processor before passing to next layer (if applicable)
if (getLayerWiseConfigurations().getInputPreProcess(j) != null)
currPair = new Pair<>(currPair.getFirst(), getLayerWiseConfigurations().getInputPreProcess(j).backprop(currPair.getSecond(), getInputMiniBatchSize()));
}
//Add gradients to Gradients (map), in correct order
for (Triple<String, INDArray, Character> triple : gradientList) {
gradient.setGradientFor(triple.getFirst(), triple.getSecond(), triple.getThird());
}
return new Pair<>(gradient, currPair.getSecond());
}
use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.
the class MultiLayerNetwork method scoreExamples.
/**Calculate the score for each example in a DataSet individually. Unlike {@link #score(DataSet)} and {@link #score(DataSet, boolean)}
* this method does not average/sum over examples. This method allows for examples to be scored individually (at test time only), which
* may be useful for example for autoencoder architectures and the like.<br>
* Each row of the output (assuming addRegularizationTerms == true) is equivalent to calling score(DataSet) with a single example.
* @param data The data to score
* @param addRegularizationTerms If true: add l1/l2 regularization terms (if any) to the score. If false: don't add regularization terms
* @return An INDArray (column vector) of size input.numRows(); the ith entry is the score (loss value) of the ith example
*/
public INDArray scoreExamples(DataSet data, boolean addRegularizationTerms) {
boolean hasMaskArray = data.hasMaskArrays();
if (hasMaskArray)
setLayerMaskArrays(data.getFeaturesMaskArray(), data.getLabelsMaskArray());
feedForward(data.getFeatureMatrix(), false);
setLabels(data.getLabels());
INDArray out;
if (getOutputLayer() instanceof IOutputLayer) {
IOutputLayer ol = (IOutputLayer) getOutputLayer();
ol.setLabels(data.getLabels());
double l1 = (addRegularizationTerms ? calcL1(true) : 0.0);
double l2 = (addRegularizationTerms ? calcL2(true) : 0.0);
out = ol.computeScoreForExamples(l1, l2);
} else {
throw new UnsupportedOperationException("Cannot calculate score with respect to labels without an OutputLayer");
}
if (hasMaskArray)
clearLayerMaskArrays();
return out;
}
Aggregations