use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.
the class KerasSequentialModel method getMultiLayerConfiguration.
/**
* Configure a MultiLayerConfiguration from this Keras Sequential model configuration.
*
* @return MultiLayerConfiguration
*/
public MultiLayerConfiguration getMultiLayerConfiguration() throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException {
if (!this.className.equals(MODEL_CLASS_NAME_SEQUENTIAL))
throw new InvalidKerasConfigurationException("Keras model class name " + this.className + " incompatible with MultiLayerNetwork");
if (this.inputLayerNames.size() != 1)
throw new InvalidKerasConfigurationException("MultiLayeNetwork expects only 1 input (found " + this.inputLayerNames.size() + ")");
if (this.outputLayerNames.size() != 1)
throw new InvalidKerasConfigurationException("MultiLayeNetwork expects only 1 output (found " + this.outputLayerNames.size() + ")");
NeuralNetConfiguration.Builder modelBuilder = new NeuralNetConfiguration.Builder();
NeuralNetConfiguration.ListBuilder listBuilder = modelBuilder.list();
/* Add layers one at a time. */
KerasLayer prevLayer = null;
int layerIndex = 0;
for (KerasLayer layer : this.layersOrdered) {
if (layer.usesRegularization())
modelBuilder.setUseRegularization(true);
if (layer.isLayer()) {
int nbInbound = layer.getInboundLayerNames().size();
if (nbInbound != 1)
throw new InvalidKerasConfigurationException("Layers in MultiLayerConfiguration must have exactly one inbound layer (found " + nbInbound + " for layer " + layer.getLayerName() + ")");
if (prevLayer != null) {
InputType[] inputTypes = new InputType[1];
InputPreProcessor preprocessor = null;
if (prevLayer.isInputPreProcessor()) {
inputTypes[0] = this.outputTypes.get(prevLayer.getInboundLayerNames().get(0));
preprocessor = prevLayer.getInputPreprocessor(inputTypes);
} else {
inputTypes[0] = this.outputTypes.get(prevLayer.getLayerName());
preprocessor = layer.getInputPreprocessor(inputTypes);
}
if (preprocessor != null)
listBuilder.inputPreProcessor(layerIndex, preprocessor);
}
listBuilder.layer(layerIndex++, layer.getLayer());
if (this.outputLayerNames.contains(layer.getLayerName()) && !(layer.getLayer() instanceof IOutputLayer))
log.warn("Model cannot be trained: output layer " + layer.getLayerName() + " is not an IOutputLayer (no loss function specified)");
} else if (layer.getVertex() != null)
throw new InvalidKerasConfigurationException("Cannot add vertex to MultiLayerConfiguration (class name " + layer.getClassName() + ", layer name " + layer.getLayerName() + ")");
prevLayer = layer;
}
InputType inputType = this.layersOrdered.get(0).getOutputType();
if (inputType != null)
listBuilder.setInputType(inputType);
/* Whether to use standard backprop (or BPTT) or truncated BPTT. */
if (this.useTruncatedBPTT && this.truncatedBPTT > 0)
listBuilder.backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(truncatedBPTT).tBPTTBackwardLength(truncatedBPTT);
else
listBuilder.backpropType(BackpropType.Standard);
return listBuilder.build();
}
use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.
the class ComputationGraph method scoreExamples.
/**
* Calculate the score for each example in a DataSet individually. Unlike {@link #score(MultiDataSet)} and {@link #score(MultiDataSet, boolean)}
* this method does not average/sum over examples. This method allows for examples to be scored individually (at test time only), which
* may be useful for example for autoencoder architectures and the like.<br>
* Each row of the output (assuming addRegularizationTerms == true) is equivalent to calling score(MultiDataSet) with a single example.
*
* @param data The data to score
* @param addRegularizationTerms If true: add l1/l2 regularization terms (if any) to the score. If false: don't add regularization terms
* @return An INDArray (column vector) of size input.numRows(); the ith entry is the score (loss value) of the ith example
*/
public INDArray scoreExamples(MultiDataSet data, boolean addRegularizationTerms) {
boolean hasMaskArray = data.hasMaskArrays();
if (hasMaskArray)
setLayerMaskArrays(data.getFeaturesMaskArrays(), data.getLabelsMaskArrays());
feedForward(data.getFeatures(), false);
setLabels(data.getLabels());
INDArray out = null;
double l1 = (addRegularizationTerms ? calcL1() : 0.0);
double l2 = (addRegularizationTerms ? calcL2() : 0.0);
int i = 0;
for (String s : configuration.getNetworkOutputs()) {
Layer outLayer = verticesMap.get(s).getLayer();
if (outLayer == null || !(outLayer instanceof IOutputLayer)) {
throw new UnsupportedOperationException("Cannot calculate score: vertex \"" + s + "\" is not an output layer");
}
IOutputLayer ol = (IOutputLayer) outLayer;
ol.setLabels(labels[i++]);
INDArray scoreCurrLayer = ol.computeScoreForExamples(l1, l2);
if (out == null)
out = scoreCurrLayer;
else
out.addi(scoreCurrLayer);
//Only want to add l1/l2 once...
l1 = 0.0;
l2 = 0.0;
}
if (hasMaskArray)
clearLayerMaskArrays();
return out;
}
use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.
the class MultiLayerNetwork method computeGradientAndScore.
@Override
public void computeGradientAndScore() {
//Calculate activations (which are stored in each layer, and used in backprop)
if (layerWiseConfigurations.getBackpropType() == BackpropType.TruncatedBPTT) {
List<INDArray> activations = rnnActivateUsingStoredState(getInput(), true, true);
if (trainingListeners.size() > 0) {
for (TrainingListener tl : trainingListeners) {
tl.onForwardPass(this, activations);
}
}
truncatedBPTTGradient();
} else {
//First: do a feed-forward through the network
//Note that we don't actually need to do the full forward pass through the output layer right now; but we do
// need the input to the output layer to be set (such that backprop can be done)
List<INDArray> activations = feedForwardToLayer(layers.length - 2, true);
if (trainingListeners.size() > 0) {
//TODO: We possibly do want output layer activations in some cases here...
for (TrainingListener tl : trainingListeners) {
tl.onForwardPass(this, activations);
}
}
INDArray actSecondLastLayer = activations.get(activations.size() - 1);
if (layerWiseConfigurations.getInputPreProcess(layers.length - 1) != null)
actSecondLastLayer = layerWiseConfigurations.getInputPreProcess(layers.length - 1).preProcess(actSecondLastLayer, getInputMiniBatchSize());
getOutputLayer().setInput(actSecondLastLayer);
//Then: compute gradients
backprop();
}
//Calculate score
if (!(getOutputLayer() instanceof IOutputLayer)) {
throw new IllegalStateException("Cannot calculate gradient and score with respect to labels: final layer is not an IOutputLayer");
}
score = ((IOutputLayer) getOutputLayer()).computeScore(calcL1(true), calcL2(true), true);
//Listeners
if (trainingListeners.size() > 0) {
for (TrainingListener tl : trainingListeners) {
tl.onBackwardPass(this);
}
}
}
use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.
the class MultiLayerNetwork method finetune.
/**
* Run SGD based on the given labels
*/
public void finetune() {
if (!layerWiseConfigurations.isBackprop()) {
log.warn("Warning: finetune is not applied.");
return;
}
if (!(getOutputLayer() instanceof IOutputLayer)) {
log.warn("Output layer not instance of output layer returning.");
return;
}
if (flattenedGradients == null)
initGradientsView();
if (labels == null)
throw new IllegalStateException("No labels found");
log.info("Finetune phase");
IOutputLayer output = (IOutputLayer) getOutputLayer();
if (output.conf().getOptimizationAlgo() != OptimizationAlgorithm.HESSIAN_FREE) {
feedForward();
output.fit(output.input(), labels);
} else {
throw new UnsupportedOperationException();
}
}
use of org.deeplearning4j.nn.api.layers.IOutputLayer in project deeplearning4j by deeplearning4j.
the class MultiLayerNetwork method truncatedBPTTGradient.
/** Equivalent to backprop(), but calculates gradient for truncated BPTT instead. */
protected void truncatedBPTTGradient() {
if (flattenedGradients == null)
initGradientsView();
String multiGradientKey;
gradient = new DefaultGradient();
Layer currLayer;
if (!(getOutputLayer() instanceof IOutputLayer)) {
log.warn("Warning: final layer isn't output layer. You cannot use backprop (truncated BPTT) without an output layer.");
return;
}
IOutputLayer outputLayer = (IOutputLayer) getOutputLayer();
if (labels == null)
throw new IllegalStateException("No labels found");
if (outputLayer.conf().getLayer().getWeightInit() == WeightInit.ZERO) {
throw new IllegalStateException("Output layer weights cannot be initialized to zero when using backprop.");
}
outputLayer.setLabels(labels);
//calculate and apply the backward gradient for every layer
int numLayers = getnLayers();
//Store gradients is a list; used to ensure iteration order in DefaultGradient linked hash map. i.e., layer 0 first instead of output layer
LinkedList<Pair<String, INDArray>> gradientList = new LinkedList<>();
Pair<Gradient, INDArray> currPair = outputLayer.backpropGradient(null);
for (Map.Entry<String, INDArray> entry : currPair.getFirst().gradientForVariable().entrySet()) {
multiGradientKey = String.valueOf(numLayers - 1) + "_" + entry.getKey();
gradientList.addLast(new Pair<>(multiGradientKey, entry.getValue()));
}
if (getLayerWiseConfigurations().getInputPreProcess(numLayers - 1) != null)
currPair = new Pair<>(currPair.getFirst(), this.layerWiseConfigurations.getInputPreProcess(numLayers - 1).backprop(currPair.getSecond(), getInputMiniBatchSize()));
// Calculate gradients for previous layers & drops output layer in count
for (int j = numLayers - 2; j >= 0; j--) {
currLayer = getLayer(j);
if (currLayer instanceof RecurrentLayer) {
currPair = ((RecurrentLayer) currLayer).tbpttBackpropGradient(currPair.getSecond(), layerWiseConfigurations.getTbpttBackLength());
} else {
currPair = currLayer.backpropGradient(currPair.getSecond());
}
LinkedList<Pair<String, INDArray>> tempList = new LinkedList<>();
for (Map.Entry<String, INDArray> entry : currPair.getFirst().gradientForVariable().entrySet()) {
multiGradientKey = String.valueOf(j) + "_" + entry.getKey();
tempList.addFirst(new Pair<>(multiGradientKey, entry.getValue()));
}
for (Pair<String, INDArray> pair : tempList) gradientList.addFirst(pair);
//Pass epsilon through input processor before passing to next layer (if applicable)
if (getLayerWiseConfigurations().getInputPreProcess(j) != null)
currPair = new Pair<>(currPair.getFirst(), getLayerWiseConfigurations().getInputPreProcess(j).backprop(currPair.getSecond(), getInputMiniBatchSize()));
}
//Add gradients to Gradients, in correct order
for (Pair<String, INDArray> pair : gradientList) gradient.setGradientFor(pair.getFirst(), pair.getSecond());
}
Aggregations