use of org.deeplearning4j.nn.graph.vertex.VertexIndices in project deeplearning4j by deeplearning4j.
the class RemoteFlowIterationListener method flattenToY.
/**
* This method returns all Layers connected to the currentInput
*
* @param vertices
* @param currentInput
* @param currentY
* @return
*/
protected List<LayerInfo> flattenToY(ModelInfo model, GraphVertex[] vertices, List<String> currentInput, int currentY) {
List<LayerInfo> results = new ArrayList<>();
int x = 0;
for (int v = 0; v < vertices.length; v++) {
GraphVertex vertex = vertices[v];
VertexIndices[] indices = vertex.getInputVertices();
if (indices != null)
for (int i = 0; i < indices.length; i++) {
GraphVertex cv = vertices[indices[i].getVertexIndex()];
String inputName = cv.getVertexName();
for (String input : currentInput) {
if (inputName.equals(input)) {
// log.info("Vertex: " + vertex.getVertexName() + " has Input: " + input);
try {
LayerInfo info = model.getLayerInfoByName(vertex.getVertexName());
if (info == null)
info = getLayerInfo(vertex.getLayer(), x, currentY, 121);
info.setName(vertex.getVertexName());
// special case here: vertex isn't a layer
if (vertex.getLayer() == null) {
info.setLayerType(vertex.getClass().getSimpleName());
}
if (info.getName().endsWith("-merge"))
info.setLayerType("MERGE");
if (model.getLayerInfoByName(vertex.getVertexName()) == null) {
x++;
model.addLayer(info);
results.add(info);
}
// now we should map connections
LayerInfo connection = model.getLayerInfoByName(input);
if (connection != null) {
connection.addConnection(info);
// log.info("Adding connection ["+ connection.getName()+"] -> ["+ info.getName()+"]");
} else {
// the only reason to have null here, is direct input connection
//connection.addConnection(0,0);
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
}
return results;
}
use of org.deeplearning4j.nn.graph.vertex.VertexIndices in project deeplearning4j by deeplearning4j.
the class ComputationGraph method rnnActivateUsingStoredState.
/**
* Similar to rnnTimeStep and feedForward() methods. Difference here is that this method:<br>
* (a) like rnnTimeStep does forward pass using stored state for RNN layers, and<br>
* (b) unlike rnnTimeStep does not modify the RNN layer state<br>
* Therefore multiple calls to this method with the same input should have the same output.<br>
* Typically used during training only. Use rnnTimeStep for prediction/forward pass at test time.
*
* @param inputs Input to network
* @param training Whether training or not
* @param storeLastForTBPTT set to true if used as part of truncated BPTT training
* @return Activations for each layer (including input, as per feedforward() etc)
*/
public Map<String, INDArray> rnnActivateUsingStoredState(INDArray[] inputs, boolean training, boolean storeLastForTBPTT) {
Map<String, INDArray> layerActivations = new HashMap<>();
//Do forward pass according to the topological ordering of the network
for (int currVertexIdx : topologicalOrder) {
GraphVertex current = vertices[currVertexIdx];
if (current.isInputVertex()) {
VertexIndices[] inputsTo = current.getOutputVertices();
INDArray input = inputs[current.getVertexIndex()];
layerActivations.put(current.getVertexName(), input);
for (VertexIndices v : inputsTo) {
int vIdx = v.getVertexIndex();
int vIdxInputNum = v.getVertexEdgeNumber();
//This input: the 'vIdxInputNum'th input to vertex 'vIdx'
//TODO When to dup?
vertices[vIdx].setInput(vIdxInputNum, input.dup());
}
} else {
INDArray out;
if (current.hasLayer()) {
Layer l = current.getLayer();
if (l instanceof RecurrentLayer) {
out = ((RecurrentLayer) l).rnnActivateUsingStoredState(current.getInputs()[0], training, storeLastForTBPTT);
} else if (l instanceof MultiLayerNetwork) {
List<INDArray> temp = ((MultiLayerNetwork) l).rnnActivateUsingStoredState(current.getInputs()[0], training, storeLastForTBPTT);
out = temp.get(temp.size() - 1);
} else {
//non-recurrent layer
out = current.doForward(training);
}
layerActivations.put(current.getVertexName(), out);
} else {
out = current.doForward(training);
}
//Now, set the inputs for the next vertices:
VertexIndices[] outputsTo = current.getOutputVertices();
if (outputsTo != null) {
for (VertexIndices v : outputsTo) {
int vIdx = v.getVertexIndex();
int inputNum = v.getVertexEdgeNumber();
//This (jth) connection from the output: is the 'inputNum'th input to vertex 'vIdx'
vertices[vIdx].setInput(inputNum, out);
}
}
}
}
return layerActivations;
}
use of org.deeplearning4j.nn.graph.vertex.VertexIndices in project deeplearning4j by deeplearning4j.
the class ComputationGraph method feedForward.
private Map<String, INDArray> feedForward(boolean train, boolean excludeOutputLayers) {
Map<String, INDArray> layerActivations = new HashMap<>();
//Do forward pass according to the topological ordering of the network
for (int i = 0; i < topologicalOrder.length; i++) {
GraphVertex current = vertices[topologicalOrder[i]];
if (current.isInputVertex()) {
VertexIndices[] inputsTo = current.getOutputVertices();
INDArray input = inputs[current.getVertexIndex()];
layerActivations.put(current.getVertexName(), input);
for (VertexIndices v : inputsTo) {
int vIdx = v.getVertexIndex();
int vIdxInputNum = v.getVertexEdgeNumber();
//This input: the 'vIdxInputNum'th input to vertex 'vIdx'
vertices[vIdx].setInput(vIdxInputNum, input.dup());
}
} else {
//Do forward pass:
if (excludeOutputLayers && current.isOutputVertex() && current.hasLayer() && current.getLayer() instanceof IOutputLayer) {
// we only need to ensure the input to the output layers is set properly
continue;
}
INDArray out = current.doForward(train);
if (current.hasLayer()) {
layerActivations.put(current.getVertexName(), out);
}
//Now, set the inputs for the next vertices:
VertexIndices[] outputsTo = current.getOutputVertices();
if (outputsTo != null) {
for (VertexIndices v : outputsTo) {
int vIdx = v.getVertexIndex();
int inputNum = v.getVertexEdgeNumber();
//This (jth) connection from the output: is the 'inputNum'th input to vertex 'vIdx'
vertices[vIdx].setInput(inputNum, out);
}
}
}
}
return layerActivations;
}
use of org.deeplearning4j.nn.graph.vertex.VertexIndices in project deeplearning4j by deeplearning4j.
the class ComputationGraph method calcBackpropGradients.
/**
* Do backprop (gradient calculation)
*
* @param truncatedBPTT false: normal backprop. true: calculate gradients using truncated BPTT for RNN layers
* @param externalEpsilons null usually (for typical supervised learning). If not null (and length > 0) then assume that
* the user has provided some errors externally, as they would do for example in reinforcement
* learning situations.
*/
protected void calcBackpropGradients(boolean truncatedBPTT, INDArray... externalEpsilons) {
if (flattenedGradients == null)
initGradientsView();
LinkedList<Triple<String, INDArray, Character>> gradients = new LinkedList<>();
//Do backprop according to the reverse of the topological ordering of the network
//If true: already set epsilon for this vertex; later epsilons should be *added* to the existing one, not set
boolean[] setVertexEpsilon = new boolean[topologicalOrder.length];
for (int i = topologicalOrder.length - 1; i >= 0; i--) {
GraphVertex current = vertices[topologicalOrder[i]];
if (current.isInputVertex())
//No op
continue;
//FIXME: make the frozen vertex feature extraction more flexible
if (current.hasLayer() && current.getLayer() instanceof FrozenLayer)
break;
if (current.isOutputVertex()) {
//Two reasons for a vertex to be an output vertex:
//(a) it's an output layer (i.e., instanceof IOutputLayer), or
//(b) it's a normal layer, but it has been marked as an output layer for use in external errors - for reinforcement learning, for example
int thisOutputNumber = configuration.getNetworkOutputs().indexOf(current.getVertexName());
if (current.getLayer() instanceof IOutputLayer) {
IOutputLayer outputLayer = (IOutputLayer) current.getLayer();
INDArray currLabels = labels[thisOutputNumber];
outputLayer.setLabels(currLabels);
} else {
current.setEpsilon(externalEpsilons[thisOutputNumber]);
setVertexEpsilon[topologicalOrder[i]] = true;
}
}
Pair<Gradient, INDArray[]> pair = current.doBackward(truncatedBPTT);
INDArray[] epsilons = pair.getSecond();
//Inputs to the current GraphVertex:
VertexIndices[] inputVertices = current.getInputVertices();
//Set epsilons for the vertices that provide inputs to this vertex:
if (inputVertices != null) {
int j = 0;
for (VertexIndices v : inputVertices) {
GraphVertex gv = vertices[v.getVertexIndex()];
if (setVertexEpsilon[gv.getVertexIndex()]) {
//This vertex: must output to multiple vertices... we want to add the epsilons here
INDArray currentEps = gv.getEpsilon();
//TODO: in some circumstances, it may be safe to do in-place add (but not always)
gv.setEpsilon(currentEps.add(epsilons[j++]));
} else {
gv.setEpsilon(epsilons[j++]);
}
setVertexEpsilon[gv.getVertexIndex()] = true;
}
}
if (pair.getFirst() != null) {
Gradient g = pair.getFirst();
Map<String, INDArray> map = g.gradientForVariable();
LinkedList<Triple<String, INDArray, Character>> tempList = new LinkedList<>();
for (Map.Entry<String, INDArray> entry : map.entrySet()) {
String origName = entry.getKey();
String newName = current.getVertexName() + "_" + origName;
tempList.addFirst(new Triple<>(newName, entry.getValue(), g.flatteningOrderForVariable(origName)));
}
for (Triple<String, INDArray, Character> t : tempList) gradients.addFirst(t);
}
}
//Now, add the gradients in the order we need them in for flattening (same as params order)
Gradient gradient = new DefaultGradient(flattenedGradients);
for (Triple<String, INDArray, Character> t : gradients) {
gradient.setGradientFor(t.getFirst(), t.getSecond(), t.getThird());
}
this.gradient = gradient;
}
use of org.deeplearning4j.nn.graph.vertex.VertexIndices in project deeplearning4j by deeplearning4j.
the class TransferLearningHelper method initHelperGraph.
/**
* Runs through the comp graph and saves off a new model that is simply the "unfrozen" part of the origModel
* This "unfrozen" model is then used for training with featurized data
*/
private void initHelperGraph() {
int[] backPropOrder = origGraph.topologicalSortOrder().clone();
ArrayUtils.reverse(backPropOrder);
Set<String> allFrozen = new HashSet<>();
if (applyFrozen) {
Collections.addAll(allFrozen, frozenOutputAt);
}
for (int i = 0; i < backPropOrder.length; i++) {
org.deeplearning4j.nn.graph.vertex.GraphVertex gv = origGraph.getVertices()[backPropOrder[i]];
if (applyFrozen && allFrozen.contains(gv.getVertexName())) {
if (gv.hasLayer()) {
//Need to freeze this layer
org.deeplearning4j.nn.api.Layer l = gv.getLayer();
gv.setLayerAsFrozen();
//We also need to place the layer in the CompGraph Layer[] (replacing the old one)
//This could no doubt be done more efficiently
org.deeplearning4j.nn.api.Layer[] layers = origGraph.getLayers();
for (int j = 0; j < layers.length; j++) {
if (layers[j] == l) {
//Place the new frozen layer to replace the original layer
layers[j] = gv.getLayer();
break;
}
}
}
//Also: mark any inputs as to be frozen also
VertexIndices[] inputs = gv.getInputVertices();
if (inputs != null && inputs.length > 0) {
for (int j = 0; j < inputs.length; j++) {
int inputVertexIdx = inputs[j].getVertexIndex();
String alsoFreeze = origGraph.getVertices()[inputVertexIdx].getVertexName();
allFrozen.add(alsoFreeze);
}
}
} else {
if (gv.hasLayer()) {
if (gv.getLayer() instanceof FrozenLayer) {
allFrozen.add(gv.getVertexName());
//also need to add parents to list of allFrozen
VertexIndices[] inputs = gv.getInputVertices();
if (inputs != null && inputs.length > 0) {
for (int j = 0; j < inputs.length; j++) {
int inputVertexIdx = inputs[j].getVertexIndex();
String alsoFrozen = origGraph.getVertices()[inputVertexIdx].getVertexName();
allFrozen.add(alsoFrozen);
}
}
}
}
}
}
for (int i = 0; i < backPropOrder.length; i++) {
org.deeplearning4j.nn.graph.vertex.GraphVertex gv = origGraph.getVertices()[backPropOrder[i]];
String gvName = gv.getVertexName();
//is it an unfrozen vertex that has an input vertex that is frozen?
if (!allFrozen.contains(gvName) && !gv.isInputVertex()) {
VertexIndices[] inputs = gv.getInputVertices();
for (int j = 0; j < inputs.length; j++) {
int inputVertexIdx = inputs[j].getVertexIndex();
String inputVertex = origGraph.getVertices()[inputVertexIdx].getVertexName();
if (allFrozen.contains(inputVertex)) {
frozenInputVertices.add(inputVertex);
}
}
}
}
TransferLearning.GraphBuilder builder = new TransferLearning.GraphBuilder(origGraph);
for (String toRemove : allFrozen) {
if (frozenInputVertices.contains(toRemove)) {
builder.removeVertexKeepConnections(toRemove);
} else {
builder.removeVertexAndConnections(toRemove);
}
}
Set<String> frozenInputVerticesSorted = new HashSet<>();
frozenInputVerticesSorted.addAll(origGraph.getConfiguration().getNetworkInputs());
frozenInputVerticesSorted.removeAll(allFrozen);
//remove input vertices - just to add back in a predictable order
for (String existingInput : frozenInputVerticesSorted) {
builder.removeVertexKeepConnections(existingInput);
}
frozenInputVerticesSorted.addAll(frozenInputVertices);
//Sort all inputs to the computation graph - in order to have a predictable order
graphInputs = new ArrayList(frozenInputVerticesSorted);
Collections.sort(graphInputs);
for (String asInput : frozenInputVerticesSorted) {
//add back in the right order
builder.addInputs(asInput);
}
unFrozenSubsetGraph = builder.build();
copyOrigParamsToSubsetGraph();
if (frozenInputVertices.isEmpty()) {
throw new IllegalArgumentException("No frozen layers found");
}
}
Aggregations