use of org.deeplearning4j.nn.graph.vertex.GraphVertex in project deeplearning4j by deeplearning4j.
the class ComputationGraph method rnnActivateUsingStoredState.
/**
* Similar to rnnTimeStep and feedForward() methods. Difference here is that this method:<br>
* (a) like rnnTimeStep does forward pass using stored state for RNN layers, and<br>
* (b) unlike rnnTimeStep does not modify the RNN layer state<br>
* Therefore multiple calls to this method with the same input should have the same output.<br>
* Typically used during training only. Use rnnTimeStep for prediction/forward pass at test time.
*
* @param inputs Input to network
* @param training Whether training or not
* @param storeLastForTBPTT set to true if used as part of truncated BPTT training
* @return Activations for each layer (including input, as per feedforward() etc)
*/
public Map<String, INDArray> rnnActivateUsingStoredState(INDArray[] inputs, boolean training, boolean storeLastForTBPTT) {
Map<String, INDArray> layerActivations = new HashMap<>();
//Do forward pass according to the topological ordering of the network
for (int currVertexIdx : topologicalOrder) {
GraphVertex current = vertices[currVertexIdx];
if (current.isInputVertex()) {
VertexIndices[] inputsTo = current.getOutputVertices();
INDArray input = inputs[current.getVertexIndex()];
layerActivations.put(current.getVertexName(), input);
for (VertexIndices v : inputsTo) {
int vIdx = v.getVertexIndex();
int vIdxInputNum = v.getVertexEdgeNumber();
//This input: the 'vIdxInputNum'th input to vertex 'vIdx'
//TODO When to dup?
vertices[vIdx].setInput(vIdxInputNum, input.dup());
}
} else {
INDArray out;
if (current.hasLayer()) {
Layer l = current.getLayer();
if (l instanceof RecurrentLayer) {
out = ((RecurrentLayer) l).rnnActivateUsingStoredState(current.getInputs()[0], training, storeLastForTBPTT);
} else if (l instanceof MultiLayerNetwork) {
List<INDArray> temp = ((MultiLayerNetwork) l).rnnActivateUsingStoredState(current.getInputs()[0], training, storeLastForTBPTT);
out = temp.get(temp.size() - 1);
} else {
//non-recurrent layer
out = current.doForward(training);
}
layerActivations.put(current.getVertexName(), out);
} else {
out = current.doForward(training);
}
//Now, set the inputs for the next vertices:
VertexIndices[] outputsTo = current.getOutputVertices();
if (outputsTo != null) {
for (VertexIndices v : outputsTo) {
int vIdx = v.getVertexIndex();
int inputNum = v.getVertexEdgeNumber();
//This (jth) connection from the output: is the 'inputNum'th input to vertex 'vIdx'
vertices[vIdx].setInput(inputNum, out);
}
}
}
}
return layerActivations;
}
use of org.deeplearning4j.nn.graph.vertex.GraphVertex in project deeplearning4j by deeplearning4j.
the class ComputationGraph method summary.
/**
* String detailing the architecture of the computation graph.
* Vertices are printed in a topological sort order.
* Columns are Vertex Names with layer/vertex type, nIn, nOut, Total number of parameters and the Shapes of the parameters
* And the inputs to the vertex
* Will also give information about frozen layers/vertices, if any.
* @return Summary as a string
*/
public String summary() {
String ret = "\n";
ret += StringUtils.repeat("=", 140);
ret += "\n";
ret += String.format("%-40s%-15s%-15s%-30s %s\n", "VertexName (VertexType)", "nIn,nOut", "TotalParams", "ParamsShape", "Vertex Inputs");
ret += StringUtils.repeat("=", 140);
ret += "\n";
int frozenParams = 0;
for (int currVertexIdx : topologicalOrder) {
GraphVertex current = vertices[currVertexIdx];
String name = current.getVertexName();
String[] classNameArr = current.getClass().toString().split("\\.");
String className = classNameArr[classNameArr.length - 1];
String connections = "-";
if (!current.isInputVertex()) {
connections = configuration.getVertexInputs().get(name).toString();
}
String paramCount = "-";
String in = "-";
String out = "-";
String paramShape = "-";
if (current.hasLayer()) {
Layer currentLayer = ((LayerVertex) current).getLayer();
classNameArr = currentLayer.getClass().getName().split("\\.");
className = classNameArr[classNameArr.length - 1];
paramCount = String.valueOf(currentLayer.numParams());
if (currentLayer.numParams() > 0) {
paramShape = "";
in = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNIn());
out = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNOut());
Set<String> paraNames = currentLayer.conf().getLearningRateByParam().keySet();
for (String aP : paraNames) {
String paramS = ArrayUtils.toString(currentLayer.paramTable().get(aP).shape());
paramShape += aP + ":" + paramS + ", ";
}
paramShape = paramShape.subSequence(0, paramShape.lastIndexOf(",")).toString();
}
if (currentLayer instanceof FrozenLayer) {
frozenParams += currentLayer.numParams();
classNameArr = ((FrozenLayer) currentLayer).getInsideLayer().getClass().getName().split("\\.");
className = "Frozen " + classNameArr[classNameArr.length - 1];
}
}
ret += String.format("%-40s%-15s%-15s%-30s %s", name + " (" + className + ")", in + "," + out, paramCount, paramShape, connections);
ret += "\n";
}
ret += StringUtils.repeat("-", 140);
ret += String.format("\n%30s %d", "Total Parameters: ", params().length());
ret += String.format("\n%30s %d", "Trainable Parameters: ", params().length() - frozenParams);
ret += String.format("\n%30s %d", "Frozen Parameters: ", frozenParams);
ret += "\n";
ret += StringUtils.repeat("=", 140);
ret += "\n";
return ret;
}
use of org.deeplearning4j.nn.graph.vertex.GraphVertex in project deeplearning4j by deeplearning4j.
the class ComputationGraph method computeGradientAndScore.
@Override
public void computeGradientAndScore() {
//Calculate activations (which are stored in each layer, and used in backprop)
if (configuration.getBackpropType() == BackpropType.TruncatedBPTT) {
Map<String, INDArray> activations = rnnActivateUsingStoredState(inputs, true, true);
if (trainingListeners.size() > 0) {
for (TrainingListener tl : trainingListeners) {
tl.onForwardPass(this, activations);
}
}
calcBackpropGradients(true);
} else {
Map<String, INDArray> activations = feedForward(true, true);
if (trainingListeners.size() > 0) {
for (TrainingListener tl : trainingListeners) {
tl.onForwardPass(this, activations);
}
}
calcBackpropGradients(false);
}
//Score: sum of the scores for the various output layers...
double l1 = calcL1();
double l2 = calcL2();
score = 0.0;
for (String s : configuration.getNetworkOutputs()) {
GraphVertex gv = verticesMap.get(s);
score += ((IOutputLayer) gv.getLayer()).computeScore(l1, l2, true);
//Only want to add l1/l2 once...
l1 = 0.0;
l2 = 0.0;
}
//Listeners
if (trainingListeners.size() > 0) {
for (TrainingListener tl : trainingListeners) {
tl.onBackwardPass(this);
}
}
}
use of org.deeplearning4j.nn.graph.vertex.GraphVertex in project deeplearning4j by deeplearning4j.
the class ComputationGraph method feedForward.
private Map<String, INDArray> feedForward(boolean train, boolean excludeOutputLayers) {
Map<String, INDArray> layerActivations = new HashMap<>();
//Do forward pass according to the topological ordering of the network
for (int i = 0; i < topologicalOrder.length; i++) {
GraphVertex current = vertices[topologicalOrder[i]];
if (current.isInputVertex()) {
VertexIndices[] inputsTo = current.getOutputVertices();
INDArray input = inputs[current.getVertexIndex()];
layerActivations.put(current.getVertexName(), input);
for (VertexIndices v : inputsTo) {
int vIdx = v.getVertexIndex();
int vIdxInputNum = v.getVertexEdgeNumber();
//This input: the 'vIdxInputNum'th input to vertex 'vIdx'
vertices[vIdx].setInput(vIdxInputNum, input.dup());
}
} else {
//Do forward pass:
if (excludeOutputLayers && current.isOutputVertex() && current.hasLayer() && current.getLayer() instanceof IOutputLayer) {
// we only need to ensure the input to the output layers is set properly
continue;
}
INDArray out = current.doForward(train);
if (current.hasLayer()) {
layerActivations.put(current.getVertexName(), out);
}
//Now, set the inputs for the next vertices:
VertexIndices[] outputsTo = current.getOutputVertices();
if (outputsTo != null) {
for (VertexIndices v : outputsTo) {
int vIdx = v.getVertexIndex();
int inputNum = v.getVertexEdgeNumber();
//This (jth) connection from the output: is the 'inputNum'th input to vertex 'vIdx'
vertices[vIdx].setInput(inputNum, out);
}
}
}
}
return layerActivations;
}
use of org.deeplearning4j.nn.graph.vertex.GraphVertex in project deeplearning4j by deeplearning4j.
the class ComputationGraph method calcBackpropGradients.
/**
* Do backprop (gradient calculation)
*
* @param truncatedBPTT false: normal backprop. true: calculate gradients using truncated BPTT for RNN layers
* @param externalEpsilons null usually (for typical supervised learning). If not null (and length > 0) then assume that
* the user has provided some errors externally, as they would do for example in reinforcement
* learning situations.
*/
protected void calcBackpropGradients(boolean truncatedBPTT, INDArray... externalEpsilons) {
if (flattenedGradients == null)
initGradientsView();
LinkedList<Triple<String, INDArray, Character>> gradients = new LinkedList<>();
//Do backprop according to the reverse of the topological ordering of the network
//If true: already set epsilon for this vertex; later epsilons should be *added* to the existing one, not set
boolean[] setVertexEpsilon = new boolean[topologicalOrder.length];
for (int i = topologicalOrder.length - 1; i >= 0; i--) {
GraphVertex current = vertices[topologicalOrder[i]];
if (current.isInputVertex())
//No op
continue;
//FIXME: make the frozen vertex feature extraction more flexible
if (current.hasLayer() && current.getLayer() instanceof FrozenLayer)
break;
if (current.isOutputVertex()) {
//Two reasons for a vertex to be an output vertex:
//(a) it's an output layer (i.e., instanceof IOutputLayer), or
//(b) it's a normal layer, but it has been marked as an output layer for use in external errors - for reinforcement learning, for example
int thisOutputNumber = configuration.getNetworkOutputs().indexOf(current.getVertexName());
if (current.getLayer() instanceof IOutputLayer) {
IOutputLayer outputLayer = (IOutputLayer) current.getLayer();
INDArray currLabels = labels[thisOutputNumber];
outputLayer.setLabels(currLabels);
} else {
current.setEpsilon(externalEpsilons[thisOutputNumber]);
setVertexEpsilon[topologicalOrder[i]] = true;
}
}
Pair<Gradient, INDArray[]> pair = current.doBackward(truncatedBPTT);
INDArray[] epsilons = pair.getSecond();
//Inputs to the current GraphVertex:
VertexIndices[] inputVertices = current.getInputVertices();
//Set epsilons for the vertices that provide inputs to this vertex:
if (inputVertices != null) {
int j = 0;
for (VertexIndices v : inputVertices) {
GraphVertex gv = vertices[v.getVertexIndex()];
if (setVertexEpsilon[gv.getVertexIndex()]) {
//This vertex: must output to multiple vertices... we want to add the epsilons here
INDArray currentEps = gv.getEpsilon();
//TODO: in some circumstances, it may be safe to do in-place add (but not always)
gv.setEpsilon(currentEps.add(epsilons[j++]));
} else {
gv.setEpsilon(epsilons[j++]);
}
setVertexEpsilon[gv.getVertexIndex()] = true;
}
}
if (pair.getFirst() != null) {
Gradient g = pair.getFirst();
Map<String, INDArray> map = g.gradientForVariable();
LinkedList<Triple<String, INDArray, Character>> tempList = new LinkedList<>();
for (Map.Entry<String, INDArray> entry : map.entrySet()) {
String origName = entry.getKey();
String newName = current.getVertexName() + "_" + origName;
tempList.addFirst(new Triple<>(newName, entry.getValue(), g.flatteningOrderForVariable(origName)));
}
for (Triple<String, INDArray, Character> t : tempList) gradients.addFirst(t);
}
}
//Now, add the gradients in the order we need them in for flattening (same as params order)
Gradient gradient = new DefaultGradient(flattenedGradients);
for (Triple<String, INDArray, Character> t : gradients) {
gradient.setGradientFor(t.getFirst(), t.getSecond(), t.getThird());
}
this.gradient = gradient;
}
Aggregations