use of org.deeplearning4j.nn.graph.vertex.VertexIndices in project deeplearning4j by deeplearning4j.
the class ComputationGraph method init.
/**
* Initialize the ComputationGraph, optionally with an existing parameters array.
* If an existing parameters array is specified, it will be used (and the values will not be modified) in the network;
* if no parameters array is specified, parameters will be initialized randomly according to the network configuration.
*
* @param parameters Network parameter. May be null. If null: randomly initialize.
* @param cloneParametersArray Whether the parameter array (if any) should be cloned, or used directly
*/
public void init(INDArray parameters, boolean cloneParametersArray) {
if (initCalled)
return;
//First: build topological ordering, based on configuration. Used for forward pass, backprop and order of parameters/gradients
topologicalOrder = topologicalSortOrder();
//Initialization: create the GraphVertex objects, based on configuration structure
Map<String, org.deeplearning4j.nn.conf.graph.GraphVertex> configVertexMap = configuration.getVertices();
//Names of all of the (data) inputs to the ComputationGraph
List<String> networkInputNames = configuration.getNetworkInputs();
//Inputs for each layer and GraphNode:
Map<String, List<String>> vertexInputs = configuration.getVertexInputs();
this.vertices = new GraphVertex[networkInputNames.size() + configuration.getVertices().size()];
//All names: inputs, layers and graph nodes (index to name map)
Map<String, Integer> allNamesReverse = new HashMap<>();
//Create network input vertices:
int vertexNumber = 0;
for (String name : networkInputNames) {
//Output vertices: set later
GraphVertex gv = new InputVertex(this, name, vertexNumber, null);
allNamesReverse.put(name, vertexNumber);
vertices[vertexNumber++] = gv;
}
//Go through layers, and work out total number of parameters. Then allocate full parameters array
int numParams = 0;
int[] numParamsForVertex = new int[topologicalOrder.length];
int i = 0;
for (; i < configuration.getNetworkInputs().size(); i++) {
//No parameters for input vertices
numParamsForVertex[i] = 0;
}
for (Map.Entry<String, org.deeplearning4j.nn.conf.graph.GraphVertex> nodeEntry : configVertexMap.entrySet()) {
org.deeplearning4j.nn.conf.graph.GraphVertex n = nodeEntry.getValue();
numParamsForVertex[i] = n.numParams(true);
numParams += numParamsForVertex[i];
i++;
}
boolean initializeParams;
if (parameters != null) {
if (!parameters.isRowVector())
throw new IllegalArgumentException("Invalid parameters: should be a row vector");
if (parameters.length() != numParams)
throw new IllegalArgumentException("Invalid parameters: expected length " + numParams + ", got length " + parameters.length());
if (cloneParametersArray)
flattenedParams = parameters.dup();
else
flattenedParams = parameters;
initializeParams = false;
} else {
flattenedParams = Nd4j.create(1, numParams);
initializeParams = true;
}
//Given the topological ordering: work out the subset of the parameters array used for each layer
// Then extract out for use when initializing the Layers
INDArray[] paramsViewForVertex = new INDArray[topologicalOrder.length];
int paramOffsetSoFar = 0;
i = 0;
for (int vertexIdx : topologicalOrder) {
int nParamsThisVertex = numParamsForVertex[vertexIdx];
if (nParamsThisVertex != 0) {
paramsViewForVertex[vertexIdx] = flattenedParams.get(NDArrayIndex.point(0), NDArrayIndex.interval(paramOffsetSoFar, paramOffsetSoFar + nParamsThisVertex));
}
i++;
paramOffsetSoFar += nParamsThisVertex;
}
int numLayers = 0;
List<Layer> tempLayerList = new ArrayList<>();
defaultConfiguration.clearVariables();
List<String> variables = defaultConfiguration.variables(false);
for (Map.Entry<String, org.deeplearning4j.nn.conf.graph.GraphVertex> nodeEntry : configVertexMap.entrySet()) {
org.deeplearning4j.nn.conf.graph.GraphVertex n = nodeEntry.getValue();
String name = nodeEntry.getKey();
GraphVertex gv = n.instantiate(this, name, vertexNumber, paramsViewForVertex[vertexNumber], initializeParams);
if (gv.hasLayer()) {
numLayers++;
Layer l = gv.getLayer();
tempLayerList.add(l);
List<String> layerVariables = l.conf().variables();
if (layerVariables != null) {
for (String s : layerVariables) {
variables.add(gv.getVertexName() + "_" + s);
}
}
}
allNamesReverse.put(name, vertexNumber);
vertices[vertexNumber++] = gv;
}
layers = tempLayerList.toArray(new Layer[numLayers]);
//Create the lookup table, so we can find vertices easily by name
verticesMap = new HashMap<>();
for (GraphVertex gv : vertices) {
verticesMap.put(gv.getVertexName(), gv);
}
//Now: do another pass to set the input and output indices, for each vertex
// These indices are used during forward and backward passes
//To get output indices: need to essentially build the graph in reverse...
//Key: vertex. Values: vertices that this node is an input for
Map<String, List<String>> verticesOutputTo = new HashMap<>();
for (GraphVertex gv : vertices) {
String vertexName = gv.getVertexName();
List<String> vertexInputNames;
vertexInputNames = vertexInputs.get(vertexName);
if (vertexInputNames == null)
continue;
//Build reverse network structure:
for (String s : vertexInputNames) {
List<String> list = verticesOutputTo.get(s);
if (list == null) {
list = new ArrayList<>();
verticesOutputTo.put(s, list);
}
//Edge: s -> vertexName
list.add(vertexName);
}
}
for (GraphVertex gv : vertices) {
String vertexName = gv.getVertexName();
int vertexIndex = gv.getVertexIndex();
List<String> vertexInputNames;
vertexInputNames = vertexInputs.get(vertexName);
if (vertexInputNames == null)
continue;
VertexIndices[] inputIndices = new VertexIndices[vertexInputNames.size()];
for (int j = 0; j < vertexInputNames.size(); j++) {
String inName = vertexInputNames.get(j);
int inputVertexIndex = allNamesReverse.get(inName);
//Output of vertex 'inputVertexIndex' is the jth input to the current vertex
//For input indices, we need to know which output connection of vertex 'inputVertexIndex' this represents
GraphVertex inputVertex = vertices[inputVertexIndex];
//First: get the outputs of the input vertex...
List<String> inputVertexOutputsTo = verticesOutputTo.get(inName);
int outputNumberOfInput = inputVertexOutputsTo.indexOf(vertexName);
if (outputNumberOfInput == -1)
throw new IllegalStateException("Could not find vertex " + vertexIndex + " in the list of outputs " + "for vertex " + inputVertex + "; error in graph structure?");
//Overall here: the 'outputNumberOfInput'th output of vertex 'inputVertexIndex' is the jth input to the current vertex
inputIndices[j] = new VertexIndices(inputVertexIndex, outputNumberOfInput);
}
gv.setInputVertices(inputIndices);
}
//Handle the outputs for this vertex
for (GraphVertex gv : vertices) {
String vertexName = gv.getVertexName();
List<String> thisVertexOutputsTo = verticesOutputTo.get(vertexName);
if (thisVertexOutputsTo == null || thisVertexOutputsTo.isEmpty())
//Output vertex
continue;
VertexIndices[] outputIndices = new VertexIndices[thisVertexOutputsTo.size()];
int j = 0;
for (String s : thisVertexOutputsTo) {
//First, we have gv -> s
//Which input in s does gv connect to? s may in general have multiple inputs...
List<String> nextVertexInputNames = vertexInputs.get(s);
int outputVertexInputNumber = nextVertexInputNames.indexOf(vertexName);
int outputVertexIndex = allNamesReverse.get(s);
outputIndices[j++] = new VertexIndices(outputVertexIndex, outputVertexInputNumber);
}
gv.setOutputVertices(outputIndices);
}
initCalled = true;
}
use of org.deeplearning4j.nn.graph.vertex.VertexIndices in project deeplearning4j by deeplearning4j.
the class ComputationGraph method setLayerMaskArrays.
/**
* Set the mask arrays for features and labels. Mask arrays are typically used in situations such as one-to-many
* and many-to-one learning with recurrent neural networks, as well as for supporting time series of varying lengths
* within the same minibatch.<br>
* For example, with RNN data sets with input of shape [miniBatchSize,nIn,timeSeriesLength] and outputs of shape
* [miniBatchSize,nOut,timeSeriesLength], the features and mask arrays will have shape [miniBatchSize,timeSeriesLength]
* and contain values 0 or 1 at each element (to specify whether a given input/example is present - or merely padding -
* at a given time step).<br>
* <b>NOTE</b>: This method is not usually used directly. Instead, the various feedForward and fit methods handle setting
* of masking internally.
*
* @param featureMaskArrays Mask array for features (input)
* @param labelMaskArrays Mask array for labels (output)
* @see #clearLayerMaskArrays()
*/
public void setLayerMaskArrays(INDArray[] featureMaskArrays, INDArray[] labelMaskArrays) {
this.clearLayerMaskArrays();
this.inputMaskArrays = featureMaskArrays;
this.labelMaskArrays = labelMaskArrays;
if (featureMaskArrays != null) {
if (featureMaskArrays.length != numInputArrays) {
throw new IllegalArgumentException("Invalid number of feature mask arrays");
}
int minibatchSize = -1;
for (INDArray i : featureMaskArrays) {
if (i != null) {
minibatchSize = i.size(0);
}
}
//Here: need to do forward pass through the network according to the topological ordering of the network
Map<Integer, Pair<INDArray, MaskState>> map = new HashMap<>();
for (int i = 0; i < topologicalOrder.length; i++) {
GraphVertex current = vertices[topologicalOrder[i]];
if (current.isInputVertex()) {
INDArray fMask = featureMaskArrays[current.getVertexIndex()];
map.put(current.getVertexIndex(), new Pair<>(fMask, MaskState.Active));
} else {
VertexIndices[] inputVertices = current.getInputVertices();
//Now: work out the mask arrays to feed forward...
//new INDArray[inputVertices.length];
INDArray[] inputMasks = null;
MaskState maskState = null;
for (int j = 0; j < inputVertices.length; j++) {
Pair<INDArray, MaskState> p = map.get(inputVertices[j].getVertexIndex());
if (p != null) {
if (inputMasks == null) {
inputMasks = new INDArray[inputVertices.length];
}
inputMasks[j] = p.getFirst();
if (maskState == null || maskState == MaskState.Passthrough) {
maskState = p.getSecond();
}
}
}
Pair<INDArray, MaskState> outPair = current.feedForwardMaskArrays(inputMasks, maskState, minibatchSize);
map.put(topologicalOrder[i], outPair);
}
}
}
if (labelMaskArrays != null) {
if (labelMaskArrays.length != numOutputArrays) {
throw new IllegalArgumentException("Invalid number of label mask arrays");
}
for (int i = 0; i < labelMaskArrays.length; i++) {
if (labelMaskArrays[i] == null) {
// This output doesn't have a mask, we can skip it.
continue;
}
String outputName = configuration.getNetworkOutputs().get(i);
GraphVertex v = verticesMap.get(outputName);
Layer ol = v.getLayer();
ol.setMaskArray(labelMaskArrays[i]);
}
}
}
use of org.deeplearning4j.nn.graph.vertex.VertexIndices in project deeplearning4j by deeplearning4j.
the class ComputationGraph method rnnTimeStep.
//------------------------------------------------------------------------------
//RNN-specific functionality
/**
* If this ComputationGraph contains one or more RNN layers: conduct forward pass (prediction)
* but using previous stored state for any RNN layers. The activations for the final step are
* also stored in the RNN layers for use next time rnnTimeStep() is called.<br>
* This method can be used to generate output one or more steps at a time instead of always having to do
* forward pass from t=0. Example uses are for streaming data, and for generating samples from network output
* one step at a time (where samples are then fed back into the network as input)<br>
* If no previous state is present in RNN layers (i.e., initially or after calling rnnClearPreviousState()),
* the default initialization (usually 0) is used.<br>
* Supports mini-batch (i.e., multiple predictions/forward pass in parallel) as well as for single examples.<br>
*
* @param inputs Input to network. May be for one or multiple time steps. For single time step:
* input has shape [miniBatchSize,inputSize] or [miniBatchSize,inputSize,1]. miniBatchSize=1 for single example.<br>
* For multiple time steps: [miniBatchSize,inputSize,inputTimeSeriesLength]
* @return Output activations. If output is RNN layer (such as RnnOutputLayer): if all inputs have shape [miniBatchSize,inputSize]
* i.e., is 2d, then outputs have shape [miniBatchSize,outputSize] (i.e., also 2d) instead of [miniBatchSize,outputSize,1].<br>
* Otherwise output is 3d [miniBatchSize,outputSize,inputTimeSeriesLength] when using RnnOutputLayer (or unmodified otherwise).
*/
public INDArray[] rnnTimeStep(INDArray... inputs) {
this.inputs = inputs;
//Idea: if 2d in, want 2d out
boolean inputIs2d = true;
for (INDArray i : inputs) {
if (i.rank() != 2) {
inputIs2d = false;
break;
}
}
INDArray[] outputs = new INDArray[this.numOutputArrays];
//Based on: feedForward()
for (int currVertexIdx : topologicalOrder) {
GraphVertex current = vertices[currVertexIdx];
if (current.isInputVertex()) {
VertexIndices[] inputsTo = current.getOutputVertices();
INDArray input = inputs[current.getVertexIndex()];
for (VertexIndices v : inputsTo) {
int vIdx = v.getVertexIndex();
int vIdxInputNum = v.getVertexEdgeNumber();
//This input: the 'vIdxInputNum'th input to vertex 'vIdx'
//TODO When to dup?
vertices[vIdx].setInput(vIdxInputNum, input.dup());
}
} else {
INDArray out;
if (current.hasLayer()) {
//Layer
Layer l = current.getLayer();
if (l instanceof RecurrentLayer) {
out = ((RecurrentLayer) l).rnnTimeStep(current.getInputs()[0]);
} else if (l instanceof MultiLayerNetwork) {
out = ((MultiLayerNetwork) l).rnnTimeStep(current.getInputs()[0]);
} else {
//non-recurrent layer
out = current.doForward(false);
}
} else {
//GraphNode
out = current.doForward(false);
}
if (current.isOutputVertex()) {
//Get the index of this output vertex...
int idx = configuration.getNetworkOutputs().indexOf(current.getVertexName());
outputs[idx] = out;
}
//Now, set the inputs for the next vertices:
VertexIndices[] outputsTo = current.getOutputVertices();
if (outputsTo != null) {
for (VertexIndices v : outputsTo) {
int vIdx = v.getVertexIndex();
int inputNum = v.getVertexEdgeNumber();
//This (jth) connection from the output: is the 'inputNum'th input to vertex 'vIdx'
vertices[vIdx].setInput(inputNum, out);
}
}
}
}
//As per MultiLayerNetwork.rnnTimeStep(): if inputs are all 2d, then outputs are all 2d
if (inputIs2d) {
for (int i = 0; i < outputs.length; i++) {
if (outputs[i].rank() == 3 && outputs[i].size(2) == 1) {
//Return 2d output with shape [miniBatchSize,nOut]
// instead of 3d output with shape [miniBatchSize,nOut,1]
outputs[i] = outputs[i].tensorAlongDimension(0, 1, 0);
}
}
}
this.inputs = null;
return outputs;
}
use of org.deeplearning4j.nn.graph.vertex.VertexIndices in project deeplearning4j by deeplearning4j.
the class ComputationGraph method pretrainLayer.
/**
* Pretrain a specified layer with the given MultiDataSetIterator
*
* @param layerName Layer name
* @param iter Training data
*/
public void pretrainLayer(String layerName, MultiDataSetIterator iter) {
if (!configuration.isPretrain())
return;
if (flattenedGradients == null)
initGradientsView();
if (!verticesMap.containsKey(layerName)) {
throw new IllegalStateException("Invalid vertex name: " + layerName);
}
if (!verticesMap.get(layerName).hasLayer()) {
//No op
return;
}
int layerIndex = verticesMap.get(layerName).getVertexIndex();
//Need to do partial forward pass. Simply folowing the topological ordering won't be efficient, as we might
// end up doing forward pass on layers we don't need to.
//However, we can start with the topological order, and prune out any layers we don't need to do
LinkedList<Integer> partialTopoSort = new LinkedList<>();
Set<Integer> seenSoFar = new HashSet<>();
partialTopoSort.add(topologicalOrder[layerIndex]);
seenSoFar.add(topologicalOrder[layerIndex]);
for (int j = layerIndex - 1; j >= 0; j--) {
//Do we need to do forward pass on this GraphVertex?
//If it is input to any other layer we need, then yes. Otherwise: no
VertexIndices[] outputsTo = vertices[topologicalOrder[j]].getOutputVertices();
boolean needed = false;
for (VertexIndices vi : outputsTo) {
if (seenSoFar.contains(vi.getVertexIndex())) {
needed = true;
break;
}
}
if (needed) {
partialTopoSort.addFirst(topologicalOrder[j]);
seenSoFar.add(topologicalOrder[j]);
}
}
int[] fwdPassOrder = new int[partialTopoSort.size()];
int k = 0;
for (Integer g : partialTopoSort) fwdPassOrder[k++] = g;
GraphVertex gv = vertices[fwdPassOrder[fwdPassOrder.length - 1]];
Layer layer = gv.getLayer();
if (!iter.hasNext() && iter.resetSupported()) {
iter.reset();
}
while (iter.hasNext()) {
MultiDataSet multiDataSet = iter.next();
setInputs(multiDataSet.getFeatures());
for (int j = 0; j < fwdPassOrder.length - 1; j++) {
GraphVertex current = vertices[fwdPassOrder[j]];
if (current.isInputVertex()) {
VertexIndices[] inputsTo = current.getOutputVertices();
INDArray input = inputs[current.getVertexIndex()];
for (VertexIndices v : inputsTo) {
int vIdx = v.getVertexIndex();
int vIdxInputNum = v.getVertexEdgeNumber();
//This input: the 'vIdxInputNum'th input to vertex 'vIdx'
//TODO When to dup?
vertices[vIdx].setInput(vIdxInputNum, input.dup());
}
} else {
//Do forward pass:
INDArray out = current.doForward(true);
//Now, set the inputs for the next vertices:
VertexIndices[] outputsTo = current.getOutputVertices();
if (outputsTo != null) {
for (VertexIndices v : outputsTo) {
int vIdx = v.getVertexIndex();
int inputNum = v.getVertexEdgeNumber();
//This (jth) connection from the output: is the 'inputNum'th input to vertex 'vIdx'
vertices[vIdx].setInput(inputNum, out);
}
}
}
}
//At this point: have done all of the required forward pass stuff. Can now pretrain layer on current input
layer.fit(gv.getInputs()[0]);
layer.conf().setPretrain(false);
}
}
use of org.deeplearning4j.nn.graph.vertex.VertexIndices in project deeplearning4j by deeplearning4j.
the class FlowIterationListener method flattenToY.
/**
* This method returns all Layers connected to the currentInput
*
* @param vertices
* @param currentInput
* @param currentY
* @return
*/
protected List<LayerInfo> flattenToY(ModelInfo model, GraphVertex[] vertices, List<String> currentInput, int currentY) {
List<LayerInfo> results = new ArrayList<>();
int x = 0;
for (int v = 0; v < vertices.length; v++) {
GraphVertex vertex = vertices[v];
VertexIndices[] indices = vertex.getInputVertices();
if (indices != null)
for (int i = 0; i < indices.length; i++) {
GraphVertex cv = vertices[indices[i].getVertexIndex()];
String inputName = cv.getVertexName();
for (String input : currentInput) {
if (inputName.equals(input)) {
// log.info("Vertex: " + vertex.getVertexName() + " has Input: " + input);
try {
LayerInfo info = model.getLayerInfoByName(vertex.getVertexName());
if (info == null)
info = getLayerInfo(vertex.getLayer(), x, currentY, 121);
info.setName(vertex.getVertexName());
// special case here: vertex isn't a layer
if (vertex.getLayer() == null) {
info.setLayerType(vertex.getClass().getSimpleName());
}
if (info.getName().endsWith("-merge"))
info.setLayerType("MERGE");
if (model.getLayerInfoByName(vertex.getVertexName()) == null) {
x++;
model.addLayer(info);
results.add(info);
}
// now we should map connections
LayerInfo connection = model.getLayerInfoByName(input);
if (connection != null) {
connection.addConnection(info);
// log.info("Adding connection ["+ connection.getName()+"] -> ["+ info.getName()+"]");
} else {
// the only reason to have null here, is direct input connection
//connection.addConnection(0,0);
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
}
return results;
}
Aggregations