use of org.deeplearning4j.nn.conf.inputs.InputType in project deeplearning4j by deeplearning4j.
the class ComputationGraphConfiguration method addPreProcessors.
/**
* Add preprocessors automatically, given the specified types of inputs for the network. Inputs are specified using the
* {@link InputType} class, in the same order in which the inputs were defined in the original configuration.<br>
* For example, in a network with two inputs: a convolutional input (28x28x1 images) and feed forward inputs, use
* {@code .addPreProcessors(InputType.convolutional(1,28,28),InputType.feedForward())}.<br>
* For the CNN->Dense and CNN->RNN transitions, the nIns on the Dense/RNN layers will also be added automatically.
* <b>NOTE</b>: This method will be called automatically when using the
* {@link GraphBuilder#setInputTypes(InputType...)} functionality.
* See that method for details.
*/
public void addPreProcessors(InputType... inputTypes) {
if (inputTypes == null || inputTypes.length != networkInputs.size()) {
throw new IllegalArgumentException("Invalid number of InputTypes: cannot add preprocessors if number of InputType " + "objects differs from number of network inputs");
}
//Now: need to do essentially a forward pass through the network, to work out what type of preprocessors to add
//To do this: need to know what the output types are for each GraphVertex.
//First step: build network in reverse order (i.e., define map of a -> list(b) instead of list(a) -> b)
//Key: vertex. Values: vertices that this node is an input for
Map<String, List<String>> verticesOutputTo = new HashMap<>();
for (Map.Entry<String, GraphVertex> entry : vertices.entrySet()) {
String vertexName = entry.getKey();
List<String> vertexInputNames;
vertexInputNames = vertexInputs.get(vertexName);
if (vertexInputNames == null)
continue;
//Build reverse network structure:
for (String s : vertexInputNames) {
List<String> list = verticesOutputTo.get(s);
if (list == null) {
list = new ArrayList<>();
verticesOutputTo.put(s, list);
}
//Edge: s -> vertexName
list.add(vertexName);
}
}
//Now: do topological sort
//Set of all nodes with no incoming edges
LinkedList<String> noIncomingEdges = new LinkedList<>(networkInputs);
List<String> topologicalOrdering = new ArrayList<>();
Map<String, Set<String>> inputEdges = new HashMap<>();
for (Map.Entry<String, List<String>> entry : vertexInputs.entrySet()) {
inputEdges.put(entry.getKey(), new HashSet<>(entry.getValue()));
}
while (!noIncomingEdges.isEmpty()) {
String next = noIncomingEdges.removeFirst();
topologicalOrdering.add(next);
//Remove edges next -> vertexOuputsTo[...] from graph;
List<String> nextEdges = verticesOutputTo.get(next);
if (nextEdges != null && !nextEdges.isEmpty()) {
for (String s : nextEdges) {
Set<String> set = inputEdges.get(s);
set.remove(next);
if (set.isEmpty()) {
//No remaining edges for vertex i -> add to list for processing
noIncomingEdges.add(s);
}
}
}
}
//If any edges remain in the graph: graph has cycles:
for (Map.Entry<String, Set<String>> entry : inputEdges.entrySet()) {
Set<String> set = entry.getValue();
if (set == null)
continue;
if (!set.isEmpty())
throw new IllegalStateException("Invalid configuration: cycle detected in graph. Cannot calculate topological ordering with graph cycle (" + "cycle includes vertex \"" + entry.getKey() + "\")");
}
//Now, given the topological sort: do equivalent of forward pass
Map<String, InputType> vertexOutputs = new HashMap<>();
int currLayerIdx = -1;
for (String s : topologicalOrdering) {
int inputIdx = networkInputs.indexOf(s);
if (inputIdx != -1) {
vertexOutputs.put(s, inputTypes[inputIdx]);
continue;
}
GraphVertex gv = vertices.get(s);
List<InputType> inputTypeList = new ArrayList<>();
if (gv instanceof LayerVertex) {
//Add preprocessor, if necessary:
String in = vertexInputs.get(s).get(0);
InputType layerInput = vertexOutputs.get(in);
inputTypeList.add(layerInput);
LayerVertex lv = (LayerVertex) gv;
Layer l = lv.getLayerConf().getLayer();
//Preprocessors - add if necessary
if (lv.getPreProcessor() == null) {
//But don't override preprocessors that are manually defined; if none has been defined,
//add the appropriate preprocessor for this input type/layer combination
InputPreProcessor preproc = l.getPreProcessorForInputType(layerInput);
lv.setPreProcessor(preproc);
}
//Set nIn value for layer (if not already set)
InputType afterPreproc = layerInput;
if (lv.getPreProcessor() != null) {
InputPreProcessor ip = lv.getPreProcessor();
afterPreproc = ip.getOutputType(layerInput);
}
l.setNIn(afterPreproc, false);
currLayerIdx++;
} else {
List<String> inputs = vertexInputs.get(s);
if (inputs != null) {
for (String inputVertexName : inputs) {
inputTypeList.add(vertexOutputs.get(inputVertexName));
}
}
}
InputType outputFromVertex = gv.getOutputType(currLayerIdx, inputTypeList.toArray(new InputType[inputTypeList.size()]));
vertexOutputs.put(s, outputFromVertex);
}
}
use of org.deeplearning4j.nn.conf.inputs.InputType in project deeplearning4j by deeplearning4j.
the class ElementWiseVertex method getOutputType.
@Override
public InputType getOutputType(int layerIndex, InputType... vertexInputs) throws InvalidInputTypeException {
if (vertexInputs.length == 1)
return vertexInputs[0];
InputType first = vertexInputs[0];
if (first.getType() != InputType.Type.CNN) {
//FF, RNN or flat CNN data inputs
for (int i = 1; i < vertexInputs.length; i++) {
if (vertexInputs[i].getType() != first.getType()) {
throw new InvalidInputTypeException("Invalid input: ElementWise vertex cannot process activations of different types:" + " first type = " + first.getType() + ", input type " + (i + 1) + " = " + vertexInputs[i].getType());
}
}
} else {
//CNN inputs... also check that the depth, width and heights match:
InputType.InputTypeConvolutional firstConv = (InputType.InputTypeConvolutional) first;
int fd = firstConv.getDepth();
int fw = firstConv.getWidth();
int fh = firstConv.getHeight();
for (int i = 1; i < vertexInputs.length; i++) {
if (vertexInputs[i].getType() != InputType.Type.CNN) {
throw new InvalidInputTypeException("Invalid input: ElementWise vertex cannot process activations of different types:" + " first type = " + InputType.Type.CNN + ", input type " + (i + 1) + " = " + vertexInputs[i].getType());
}
InputType.InputTypeConvolutional otherConv = (InputType.InputTypeConvolutional) vertexInputs[i];
int od = otherConv.getDepth();
int ow = otherConv.getWidth();
int oh = otherConv.getHeight();
if (fd != od || fw != ow || fh != oh) {
throw new InvalidInputTypeException("Invalid input: ElementWise vertex cannot process CNN activations of different sizes:" + "first [depth,width,height] = [" + fd + "," + fw + "," + fh + "], input " + i + " = [" + od + "," + ow + "," + oh + "]");
}
}
}
//Same output shape/size as
return first;
}
use of org.deeplearning4j.nn.conf.inputs.InputType in project deeplearning4j by deeplearning4j.
the class ActivationLayerTest method testInputTypes.
@Test
public void testInputTypes() {
org.deeplearning4j.nn.conf.layers.ActivationLayer l = new org.deeplearning4j.nn.conf.layers.ActivationLayer.Builder().activation(Activation.RELU).build();
InputType in1 = InputType.feedForward(20);
InputType in2 = InputType.convolutional(28, 28, 1);
assertEquals(in1, l.getOutputType(0, in1));
assertEquals(in2, l.getOutputType(0, in2));
assertNull(l.getPreProcessorForInputType(in1));
assertNull(l.getPreProcessorForInputType(in2));
}
use of org.deeplearning4j.nn.conf.inputs.InputType in project deeplearning4j by deeplearning4j.
the class TestConvolutionModes method testConvolutionModeInputTypes.
@Test
public void testConvolutionModeInputTypes() {
//Test 1: input 3x3, stride 1, kernel 2
int inH = 3;
int inW = 3;
int kH = 2;
int kW = 2;
int sH = 1;
int sW = 1;
int pH = 0;
int pW = 0;
int minibatch = 3;
int dIn = 5;
int dOut = 7;
int[] kernel = { kH, kW };
int[] stride = { sH, sW };
int[] padding = { pH, pW };
INDArray inData = Nd4j.create(minibatch, dIn, inH, inW);
InputType inputType = InputType.convolutional(inH, inW, dIn);
//Strict mode: expect 2x2 out -> (inH - kernel + 2*padding)/stride + 1 = (3-2+0)/1+1 = 2
InputType.InputTypeConvolutional it = (InputType.InputTypeConvolutional) InputTypeUtil.getOutputTypeCnnLayers(inputType, kernel, stride, padding, ConvolutionMode.Strict, dOut, -1, "layerName", ConvolutionLayer.class);
assertEquals(2, it.getHeight());
assertEquals(2, it.getWidth());
assertEquals(dOut, it.getDepth());
int[] outSize = ConvolutionUtils.getOutputSize(inData, kernel, stride, padding, ConvolutionMode.Strict);
assertEquals(2, outSize[0]);
assertEquals(2, outSize[1]);
//Truncate: same as strict here
it = (InputType.InputTypeConvolutional) InputTypeUtil.getOutputTypeCnnLayers(inputType, kernel, stride, padding, ConvolutionMode.Truncate, dOut, -1, "layerName", ConvolutionLayer.class);
assertEquals(2, it.getHeight());
assertEquals(2, it.getWidth());
assertEquals(dOut, it.getDepth());
outSize = ConvolutionUtils.getOutputSize(inData, kernel, stride, padding, ConvolutionMode.Truncate);
assertEquals(2, outSize[0]);
assertEquals(2, outSize[1]);
//Same mode: ceil(in / stride) = 3
it = (InputType.InputTypeConvolutional) InputTypeUtil.getOutputTypeCnnLayers(inputType, kernel, stride, null, ConvolutionMode.Same, dOut, -1, "layerName", ConvolutionLayer.class);
assertEquals(3, it.getHeight());
assertEquals(3, it.getWidth());
assertEquals(dOut, it.getDepth());
outSize = ConvolutionUtils.getOutputSize(inData, kernel, stride, null, ConvolutionMode.Same);
assertEquals(3, outSize[0]);
assertEquals(3, outSize[1]);
//Test 2: input 3x4, stride 2, kernel 3
inH = 3;
inW = 4;
kH = 3;
kW = 3;
sH = 2;
sW = 2;
kernel = new int[] { kH, kW };
stride = new int[] { sH, sW };
padding = new int[] { pH, pW };
inData = Nd4j.create(minibatch, dIn, inH, inW);
inputType = InputType.convolutional(inH, inW, dIn);
//Strict mode: (4-3+0)/2+1 is not an integer -> exception
try {
InputTypeUtil.getOutputTypeCnnLayers(inputType, kernel, stride, padding, ConvolutionMode.Strict, dOut, -1, "layerName", ConvolutionLayer.class);
fail("Expected exception");
} catch (DL4JException e) {
System.out.println(e.getMessage());
}
try {
outSize = ConvolutionUtils.getOutputSize(inData, kernel, stride, padding, ConvolutionMode.Strict);
fail("Exception expected");
} catch (DL4JException e) {
System.out.println(e.getMessage());
}
//Truncate: (3-3+0)/2+1 = 1 in height dim; (4-3+0)/2+1 = 1 in width dim
it = (InputType.InputTypeConvolutional) InputTypeUtil.getOutputTypeCnnLayers(inputType, kernel, stride, padding, ConvolutionMode.Truncate, dOut, -1, "layerName", ConvolutionLayer.class);
assertEquals(1, it.getHeight());
assertEquals(1, it.getWidth());
assertEquals(dOut, it.getDepth());
outSize = ConvolutionUtils.getOutputSize(inData, kernel, stride, padding, ConvolutionMode.Truncate);
assertEquals(1, outSize[0]);
assertEquals(1, outSize[1]);
//Same mode: ceil(3/2) = 2 in height dim; ceil(4/2) = 2 in width dimension
it = (InputType.InputTypeConvolutional) InputTypeUtil.getOutputTypeCnnLayers(inputType, kernel, stride, null, ConvolutionMode.Same, dOut, -1, "layerName", ConvolutionLayer.class);
assertEquals(2, it.getHeight());
assertEquals(2, it.getWidth());
assertEquals(dOut, it.getDepth());
outSize = ConvolutionUtils.getOutputSize(inData, kernel, stride, null, ConvolutionMode.Same);
assertEquals(2, outSize[0]);
assertEquals(2, outSize[1]);
}
use of org.deeplearning4j.nn.conf.inputs.InputType in project deeplearning4j by deeplearning4j.
the class KerasModel method getComputationGraphConfiguration.
/**
* Configure a ComputationGraph from this Keras Model configuration.
*
* @return ComputationGraph
*/
public ComputationGraphConfiguration getComputationGraphConfiguration() throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException {
if (!this.className.equals(MODEL_CLASS_NAME_MODEL) && !this.className.equals(MODEL_CLASS_NAME_SEQUENTIAL))
throw new InvalidKerasConfigurationException("Keras model class name " + this.className + " incompatible with ComputationGraph");
NeuralNetConfiguration.Builder modelBuilder = new NeuralNetConfiguration.Builder();
ComputationGraphConfiguration.GraphBuilder graphBuilder = modelBuilder.graphBuilder();
/* Build String array of input layer names, add to ComputationGraph. */
String[] inputLayerNameArray = new String[this.inputLayerNames.size()];
this.inputLayerNames.toArray(inputLayerNameArray);
graphBuilder.addInputs(inputLayerNameArray);
/* Build InputType array of input layer types, add to ComputationGraph. */
List<InputType> inputTypeList = new ArrayList<InputType>();
for (String inputLayerName : this.inputLayerNames) inputTypeList.add(this.layers.get(inputLayerName).getOutputType());
InputType[] inputTypes = new InputType[inputTypeList.size()];
inputTypeList.toArray(inputTypes);
graphBuilder.setInputTypes(inputTypes);
/* Build String array of output layer names, add to ComputationGraph. */
String[] outputLayerNameArray = new String[this.outputLayerNames.size()];
this.outputLayerNames.toArray(outputLayerNameArray);
graphBuilder.setOutputs(outputLayerNameArray);
Map<String, InputPreProcessor> preprocessors = new HashMap<String, InputPreProcessor>();
/* Add layersOrdered one at a time. */
for (KerasLayer layer : this.layersOrdered) {
/* Get inbound layer names. */
List<String> inboundLayerNames = layer.getInboundLayerNames();
String[] inboundLayerNamesArray = new String[inboundLayerNames.size()];
inboundLayerNames.toArray(inboundLayerNamesArray);
/* Get inbound InputTypes and InputPreProcessor, if necessary. */
List<InputType> inboundTypeList = new ArrayList<InputType>();
for (String layerName : inboundLayerNames) inboundTypeList.add(this.outputTypes.get(layerName));
InputType[] inboundTypeArray = new InputType[inboundTypeList.size()];
inboundTypeList.toArray(inboundTypeArray);
InputPreProcessor preprocessor = layer.getInputPreprocessor(inboundTypeArray);
if (layer.usesRegularization())
modelBuilder.setUseRegularization(true);
if (layer.isLayer()) {
/* Add DL4J layer. */
if (preprocessor != null)
preprocessors.put(layer.getLayerName(), preprocessor);
graphBuilder.addLayer(layer.getLayerName(), layer.getLayer(), inboundLayerNamesArray);
if (this.outputLayerNames.contains(layer.getLayerName()) && !(layer.getLayer() instanceof IOutputLayer))
log.warn("Model cannot be trained: output layer " + layer.getLayerName() + " is not an IOutputLayer (no loss function specified)");
} else if (layer.isVertex()) {
/* Add DL4J vertex. */
if (preprocessor != null)
preprocessors.put(layer.getLayerName(), preprocessor);
graphBuilder.addVertex(layer.getLayerName(), layer.getVertex(), inboundLayerNamesArray);
if (this.outputLayerNames.contains(layer.getLayerName()) && !(layer.getVertex() instanceof IOutputLayer))
log.warn("Model cannot be trained: output vertex " + layer.getLayerName() + " is not an IOutputLayer (no loss function specified)");
} else if (layer.isInputPreProcessor()) {
if (preprocessor == null)
throw new UnsupportedKerasConfigurationException("Layer " + layer.getLayerName() + " could not be mapped to Layer, Vertex, or InputPreProcessor");
graphBuilder.addVertex(layer.getLayerName(), new PreprocessorVertex(preprocessor), inboundLayerNamesArray);
}
if (this.outputLayerNames.contains(layer.getLayerName()))
log.warn("Model cannot be trained: output " + layer.getLayerName() + " is not an IOutputLayer (no loss function specified)");
}
graphBuilder.setInputPreProcessors(preprocessors);
/* Whether to use standard backprop (or BPTT) or truncated BPTT. */
if (this.useTruncatedBPTT && this.truncatedBPTT > 0)
graphBuilder.backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(truncatedBPTT).tBPTTBackwardLength(truncatedBPTT);
else
graphBuilder.backpropType(BackpropType.Standard);
return graphBuilder.build();
}
Aggregations