use of org.deeplearning4j.nn.graph.vertex.GraphVertex in project deeplearning4j by deeplearning4j.
the class TestGraphNodes method testL2Node.
@Test
public void testL2Node() {
Nd4j.getRandom().setSeed(12345);
GraphVertex l2 = new L2Vertex(null, "", -1, 1e-8);
INDArray in1 = Nd4j.rand(5, 2);
INDArray in2 = Nd4j.rand(5, 2);
l2.setInputs(in1, in2);
INDArray out = l2.doForward(false);
INDArray expOut = Nd4j.create(5, 1);
for (int i = 0; i < 5; i++) {
double d2 = 0.0;
for (int j = 0; j < in1.size(1); j++) {
double temp = (in1.getDouble(i, j) - in2.getDouble(i, j));
d2 += temp * temp;
}
d2 = Math.sqrt(d2);
expOut.putScalar(i, 0, d2);
}
assertEquals(expOut, out);
//dL/dlambda
INDArray epsilon = Nd4j.rand(5, 1);
INDArray diff = in1.sub(in2);
//Out == sqrt(s) = s^1/2. Therefore: s^(-1/2) = 1/out
INDArray sNegHalf = out.rdiv(1.0);
INDArray dLda = diff.mulColumnVector(epsilon.mul(sNegHalf));
INDArray dLdb = diff.mulColumnVector(epsilon.mul(sNegHalf)).neg();
l2.setEpsilon(epsilon);
Pair<Gradient, INDArray[]> p = l2.doBackward(false);
assertEquals(dLda, p.getSecond()[0]);
assertEquals(dLdb, p.getSecond()[1]);
}
use of org.deeplearning4j.nn.graph.vertex.GraphVertex in project deeplearning4j by deeplearning4j.
the class TestGraphNodes method testLastTimeStepVertex.
@Test
public void testLastTimeStepVertex() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in").addVertex("lastTS", new LastTimeStepVertex("in"), "in").addLayer("out", new OutputLayer.Builder().nIn(1).nOut(1).build(), "lastTS").setOutputs("out").build();
ComputationGraph graph = new ComputationGraph(conf);
graph.init();
//First: test without input mask array
Nd4j.getRandom().setSeed(12345);
INDArray in = Nd4j.rand(new int[] { 3, 5, 6 });
INDArray expOut = in.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(5));
GraphVertex gv = graph.getVertex("lastTS");
gv.setInputs(in);
//Forward pass:
INDArray outFwd = gv.doForward(true);
assertEquals(expOut, outFwd);
//Backward pass:
gv.setEpsilon(expOut);
Pair<Gradient, INDArray[]> pair = gv.doBackward(false);
INDArray eps = pair.getSecond()[0];
assertArrayEquals(in.shape(), eps.shape());
assertEquals(Nd4j.zeros(3, 5, 5), eps.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4, true)));
assertEquals(expOut, eps.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(5)));
//Second: test with input mask array
INDArray inMask = Nd4j.zeros(3, 6);
inMask.putRow(0, Nd4j.create(new double[] { 1, 1, 1, 0, 0, 0 }));
inMask.putRow(1, Nd4j.create(new double[] { 1, 1, 1, 1, 0, 0 }));
inMask.putRow(2, Nd4j.create(new double[] { 1, 1, 1, 1, 1, 0 }));
graph.setLayerMaskArrays(new INDArray[] { inMask }, null);
expOut = Nd4j.zeros(3, 5);
expOut.putRow(0, in.get(NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.point(2)));
expOut.putRow(1, in.get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.point(3)));
expOut.putRow(2, in.get(NDArrayIndex.point(2), NDArrayIndex.all(), NDArrayIndex.point(4)));
gv.setInputs(in);
outFwd = gv.doForward(true);
assertEquals(expOut, outFwd);
String json = conf.toJson();
ComputationGraphConfiguration conf2 = ComputationGraphConfiguration.fromJson(json);
assertEquals(conf, conf2);
}
use of org.deeplearning4j.nn.graph.vertex.GraphVertex in project deeplearning4j by deeplearning4j.
the class ComputationGraph method init.
/**
* Initialize the ComputationGraph, optionally with an existing parameters array.
* If an existing parameters array is specified, it will be used (and the values will not be modified) in the network;
* if no parameters array is specified, parameters will be initialized randomly according to the network configuration.
*
* @param parameters Network parameter. May be null. If null: randomly initialize.
* @param cloneParametersArray Whether the parameter array (if any) should be cloned, or used directly
*/
public void init(INDArray parameters, boolean cloneParametersArray) {
if (initCalled)
return;
//First: build topological ordering, based on configuration. Used for forward pass, backprop and order of parameters/gradients
topologicalOrder = topologicalSortOrder();
//Initialization: create the GraphVertex objects, based on configuration structure
Map<String, org.deeplearning4j.nn.conf.graph.GraphVertex> configVertexMap = configuration.getVertices();
//Names of all of the (data) inputs to the ComputationGraph
List<String> networkInputNames = configuration.getNetworkInputs();
//Inputs for each layer and GraphNode:
Map<String, List<String>> vertexInputs = configuration.getVertexInputs();
this.vertices = new GraphVertex[networkInputNames.size() + configuration.getVertices().size()];
//All names: inputs, layers and graph nodes (index to name map)
Map<String, Integer> allNamesReverse = new HashMap<>();
//Create network input vertices:
int vertexNumber = 0;
for (String name : networkInputNames) {
//Output vertices: set later
GraphVertex gv = new InputVertex(this, name, vertexNumber, null);
allNamesReverse.put(name, vertexNumber);
vertices[vertexNumber++] = gv;
}
//Go through layers, and work out total number of parameters. Then allocate full parameters array
int numParams = 0;
int[] numParamsForVertex = new int[topologicalOrder.length];
int i = 0;
for (; i < configuration.getNetworkInputs().size(); i++) {
//No parameters for input vertices
numParamsForVertex[i] = 0;
}
for (Map.Entry<String, org.deeplearning4j.nn.conf.graph.GraphVertex> nodeEntry : configVertexMap.entrySet()) {
org.deeplearning4j.nn.conf.graph.GraphVertex n = nodeEntry.getValue();
numParamsForVertex[i] = n.numParams(true);
numParams += numParamsForVertex[i];
i++;
}
boolean initializeParams;
if (parameters != null) {
if (!parameters.isRowVector())
throw new IllegalArgumentException("Invalid parameters: should be a row vector");
if (parameters.length() != numParams)
throw new IllegalArgumentException("Invalid parameters: expected length " + numParams + ", got length " + parameters.length());
if (cloneParametersArray)
flattenedParams = parameters.dup();
else
flattenedParams = parameters;
initializeParams = false;
} else {
flattenedParams = Nd4j.create(1, numParams);
initializeParams = true;
}
//Given the topological ordering: work out the subset of the parameters array used for each layer
// Then extract out for use when initializing the Layers
INDArray[] paramsViewForVertex = new INDArray[topologicalOrder.length];
int paramOffsetSoFar = 0;
i = 0;
for (int vertexIdx : topologicalOrder) {
int nParamsThisVertex = numParamsForVertex[vertexIdx];
if (nParamsThisVertex != 0) {
paramsViewForVertex[vertexIdx] = flattenedParams.get(NDArrayIndex.point(0), NDArrayIndex.interval(paramOffsetSoFar, paramOffsetSoFar + nParamsThisVertex));
}
i++;
paramOffsetSoFar += nParamsThisVertex;
}
int numLayers = 0;
List<Layer> tempLayerList = new ArrayList<>();
defaultConfiguration.clearVariables();
List<String> variables = defaultConfiguration.variables(false);
for (Map.Entry<String, org.deeplearning4j.nn.conf.graph.GraphVertex> nodeEntry : configVertexMap.entrySet()) {
org.deeplearning4j.nn.conf.graph.GraphVertex n = nodeEntry.getValue();
String name = nodeEntry.getKey();
GraphVertex gv = n.instantiate(this, name, vertexNumber, paramsViewForVertex[vertexNumber], initializeParams);
if (gv.hasLayer()) {
numLayers++;
Layer l = gv.getLayer();
tempLayerList.add(l);
List<String> layerVariables = l.conf().variables();
if (layerVariables != null) {
for (String s : layerVariables) {
variables.add(gv.getVertexName() + "_" + s);
}
}
}
allNamesReverse.put(name, vertexNumber);
vertices[vertexNumber++] = gv;
}
layers = tempLayerList.toArray(new Layer[numLayers]);
//Create the lookup table, so we can find vertices easily by name
verticesMap = new HashMap<>();
for (GraphVertex gv : vertices) {
verticesMap.put(gv.getVertexName(), gv);
}
//Now: do another pass to set the input and output indices, for each vertex
// These indices are used during forward and backward passes
//To get output indices: need to essentially build the graph in reverse...
//Key: vertex. Values: vertices that this node is an input for
Map<String, List<String>> verticesOutputTo = new HashMap<>();
for (GraphVertex gv : vertices) {
String vertexName = gv.getVertexName();
List<String> vertexInputNames;
vertexInputNames = vertexInputs.get(vertexName);
if (vertexInputNames == null)
continue;
//Build reverse network structure:
for (String s : vertexInputNames) {
List<String> list = verticesOutputTo.get(s);
if (list == null) {
list = new ArrayList<>();
verticesOutputTo.put(s, list);
}
//Edge: s -> vertexName
list.add(vertexName);
}
}
for (GraphVertex gv : vertices) {
String vertexName = gv.getVertexName();
int vertexIndex = gv.getVertexIndex();
List<String> vertexInputNames;
vertexInputNames = vertexInputs.get(vertexName);
if (vertexInputNames == null)
continue;
VertexIndices[] inputIndices = new VertexIndices[vertexInputNames.size()];
for (int j = 0; j < vertexInputNames.size(); j++) {
String inName = vertexInputNames.get(j);
int inputVertexIndex = allNamesReverse.get(inName);
//Output of vertex 'inputVertexIndex' is the jth input to the current vertex
//For input indices, we need to know which output connection of vertex 'inputVertexIndex' this represents
GraphVertex inputVertex = vertices[inputVertexIndex];
//First: get the outputs of the input vertex...
List<String> inputVertexOutputsTo = verticesOutputTo.get(inName);
int outputNumberOfInput = inputVertexOutputsTo.indexOf(vertexName);
if (outputNumberOfInput == -1)
throw new IllegalStateException("Could not find vertex " + vertexIndex + " in the list of outputs " + "for vertex " + inputVertex + "; error in graph structure?");
//Overall here: the 'outputNumberOfInput'th output of vertex 'inputVertexIndex' is the jth input to the current vertex
inputIndices[j] = new VertexIndices(inputVertexIndex, outputNumberOfInput);
}
gv.setInputVertices(inputIndices);
}
//Handle the outputs for this vertex
for (GraphVertex gv : vertices) {
String vertexName = gv.getVertexName();
List<String> thisVertexOutputsTo = verticesOutputTo.get(vertexName);
if (thisVertexOutputsTo == null || thisVertexOutputsTo.isEmpty())
//Output vertex
continue;
VertexIndices[] outputIndices = new VertexIndices[thisVertexOutputsTo.size()];
int j = 0;
for (String s : thisVertexOutputsTo) {
//First, we have gv -> s
//Which input in s does gv connect to? s may in general have multiple inputs...
List<String> nextVertexInputNames = vertexInputs.get(s);
int outputVertexInputNumber = nextVertexInputNames.indexOf(vertexName);
int outputVertexIndex = allNamesReverse.get(s);
outputIndices[j++] = new VertexIndices(outputVertexIndex, outputVertexInputNumber);
}
gv.setOutputVertices(outputIndices);
}
initCalled = true;
}
use of org.deeplearning4j.nn.graph.vertex.GraphVertex in project deeplearning4j by deeplearning4j.
the class ComputationGraph method setLayerMaskArrays.
/**
* Set the mask arrays for features and labels. Mask arrays are typically used in situations such as one-to-many
* and many-to-one learning with recurrent neural networks, as well as for supporting time series of varying lengths
* within the same minibatch.<br>
* For example, with RNN data sets with input of shape [miniBatchSize,nIn,timeSeriesLength] and outputs of shape
* [miniBatchSize,nOut,timeSeriesLength], the features and mask arrays will have shape [miniBatchSize,timeSeriesLength]
* and contain values 0 or 1 at each element (to specify whether a given input/example is present - or merely padding -
* at a given time step).<br>
* <b>NOTE</b>: This method is not usually used directly. Instead, the various feedForward and fit methods handle setting
* of masking internally.
*
* @param featureMaskArrays Mask array for features (input)
* @param labelMaskArrays Mask array for labels (output)
* @see #clearLayerMaskArrays()
*/
public void setLayerMaskArrays(INDArray[] featureMaskArrays, INDArray[] labelMaskArrays) {
this.clearLayerMaskArrays();
this.inputMaskArrays = featureMaskArrays;
this.labelMaskArrays = labelMaskArrays;
if (featureMaskArrays != null) {
if (featureMaskArrays.length != numInputArrays) {
throw new IllegalArgumentException("Invalid number of feature mask arrays");
}
int minibatchSize = -1;
for (INDArray i : featureMaskArrays) {
if (i != null) {
minibatchSize = i.size(0);
}
}
//Here: need to do forward pass through the network according to the topological ordering of the network
Map<Integer, Pair<INDArray, MaskState>> map = new HashMap<>();
for (int i = 0; i < topologicalOrder.length; i++) {
GraphVertex current = vertices[topologicalOrder[i]];
if (current.isInputVertex()) {
INDArray fMask = featureMaskArrays[current.getVertexIndex()];
map.put(current.getVertexIndex(), new Pair<>(fMask, MaskState.Active));
} else {
VertexIndices[] inputVertices = current.getInputVertices();
//Now: work out the mask arrays to feed forward...
//new INDArray[inputVertices.length];
INDArray[] inputMasks = null;
MaskState maskState = null;
for (int j = 0; j < inputVertices.length; j++) {
Pair<INDArray, MaskState> p = map.get(inputVertices[j].getVertexIndex());
if (p != null) {
if (inputMasks == null) {
inputMasks = new INDArray[inputVertices.length];
}
inputMasks[j] = p.getFirst();
if (maskState == null || maskState == MaskState.Passthrough) {
maskState = p.getSecond();
}
}
}
Pair<INDArray, MaskState> outPair = current.feedForwardMaskArrays(inputMasks, maskState, minibatchSize);
map.put(topologicalOrder[i], outPair);
}
}
}
if (labelMaskArrays != null) {
if (labelMaskArrays.length != numOutputArrays) {
throw new IllegalArgumentException("Invalid number of label mask arrays");
}
for (int i = 0; i < labelMaskArrays.length; i++) {
if (labelMaskArrays[i] == null) {
// This output doesn't have a mask, we can skip it.
continue;
}
String outputName = configuration.getNetworkOutputs().get(i);
GraphVertex v = verticesMap.get(outputName);
Layer ol = v.getLayer();
ol.setMaskArray(labelMaskArrays[i]);
}
}
}
use of org.deeplearning4j.nn.graph.vertex.GraphVertex in project deeplearning4j by deeplearning4j.
the class ComputationGraph method rnnTimeStep.
//------------------------------------------------------------------------------
//RNN-specific functionality
/**
* If this ComputationGraph contains one or more RNN layers: conduct forward pass (prediction)
* but using previous stored state for any RNN layers. The activations for the final step are
* also stored in the RNN layers for use next time rnnTimeStep() is called.<br>
* This method can be used to generate output one or more steps at a time instead of always having to do
* forward pass from t=0. Example uses are for streaming data, and for generating samples from network output
* one step at a time (where samples are then fed back into the network as input)<br>
* If no previous state is present in RNN layers (i.e., initially or after calling rnnClearPreviousState()),
* the default initialization (usually 0) is used.<br>
* Supports mini-batch (i.e., multiple predictions/forward pass in parallel) as well as for single examples.<br>
*
* @param inputs Input to network. May be for one or multiple time steps. For single time step:
* input has shape [miniBatchSize,inputSize] or [miniBatchSize,inputSize,1]. miniBatchSize=1 for single example.<br>
* For multiple time steps: [miniBatchSize,inputSize,inputTimeSeriesLength]
* @return Output activations. If output is RNN layer (such as RnnOutputLayer): if all inputs have shape [miniBatchSize,inputSize]
* i.e., is 2d, then outputs have shape [miniBatchSize,outputSize] (i.e., also 2d) instead of [miniBatchSize,outputSize,1].<br>
* Otherwise output is 3d [miniBatchSize,outputSize,inputTimeSeriesLength] when using RnnOutputLayer (or unmodified otherwise).
*/
public INDArray[] rnnTimeStep(INDArray... inputs) {
this.inputs = inputs;
//Idea: if 2d in, want 2d out
boolean inputIs2d = true;
for (INDArray i : inputs) {
if (i.rank() != 2) {
inputIs2d = false;
break;
}
}
INDArray[] outputs = new INDArray[this.numOutputArrays];
//Based on: feedForward()
for (int currVertexIdx : topologicalOrder) {
GraphVertex current = vertices[currVertexIdx];
if (current.isInputVertex()) {
VertexIndices[] inputsTo = current.getOutputVertices();
INDArray input = inputs[current.getVertexIndex()];
for (VertexIndices v : inputsTo) {
int vIdx = v.getVertexIndex();
int vIdxInputNum = v.getVertexEdgeNumber();
//This input: the 'vIdxInputNum'th input to vertex 'vIdx'
//TODO When to dup?
vertices[vIdx].setInput(vIdxInputNum, input.dup());
}
} else {
INDArray out;
if (current.hasLayer()) {
//Layer
Layer l = current.getLayer();
if (l instanceof RecurrentLayer) {
out = ((RecurrentLayer) l).rnnTimeStep(current.getInputs()[0]);
} else if (l instanceof MultiLayerNetwork) {
out = ((MultiLayerNetwork) l).rnnTimeStep(current.getInputs()[0]);
} else {
//non-recurrent layer
out = current.doForward(false);
}
} else {
//GraphNode
out = current.doForward(false);
}
if (current.isOutputVertex()) {
//Get the index of this output vertex...
int idx = configuration.getNetworkOutputs().indexOf(current.getVertexName());
outputs[idx] = out;
}
//Now, set the inputs for the next vertices:
VertexIndices[] outputsTo = current.getOutputVertices();
if (outputsTo != null) {
for (VertexIndices v : outputsTo) {
int vIdx = v.getVertexIndex();
int inputNum = v.getVertexEdgeNumber();
//This (jth) connection from the output: is the 'inputNum'th input to vertex 'vIdx'
vertices[vIdx].setInput(inputNum, out);
}
}
}
}
//As per MultiLayerNetwork.rnnTimeStep(): if inputs are all 2d, then outputs are all 2d
if (inputIs2d) {
for (int i = 0; i < outputs.length; i++) {
if (outputs[i].rank() == 3 && outputs[i].size(2) == 1) {
//Return 2d output with shape [miniBatchSize,nOut]
// instead of 3d output with shape [miniBatchSize,nOut,1]
outputs[i] = outputs[i].tensorAlongDimension(0, 1, 0);
}
}
}
this.inputs = null;
return outputs;
}
Aggregations