use of org.deeplearning4j.nn.conf.graph.LayerVertex in project deeplearning4j by deeplearning4j.
the class TrainModule method getLayerInfoTable.
private String[][] getLayerInfoTable(int layerIdx, TrainModuleUtils.GraphInfo gi, I18N i18N, boolean noData, StatsStorage ss, String wid) {
List<String[]> layerInfoRows = new ArrayList<>();
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerName"), gi.getVertexNames().get(layerIdx) });
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerType"), "" });
if (!noData) {
Persistable p = ss.getStaticInfo(currentSessionID, StatsListener.TYPE_ID, wid);
if (p != null) {
StatsInitializationReport initReport = (StatsInitializationReport) p;
String configJson = initReport.getModelConfigJson();
String modelClass = initReport.getModelClassName();
//TODO error handling...
String layerType = "";
Layer layer = null;
NeuralNetConfiguration nnc = null;
if (modelClass.endsWith("MultiLayerNetwork")) {
MultiLayerConfiguration conf = MultiLayerConfiguration.fromJson(configJson);
//-1 because of input
int confIdx = layerIdx - 1;
if (confIdx >= 0) {
nnc = conf.getConf(confIdx);
layer = nnc.getLayer();
} else {
//Input layer
layerType = "Input";
}
} else if (modelClass.endsWith("ComputationGraph")) {
ComputationGraphConfiguration conf = ComputationGraphConfiguration.fromJson(configJson);
String vertexName = gi.getVertexNames().get(layerIdx);
Map<String, GraphVertex> vertices = conf.getVertices();
if (vertices.containsKey(vertexName) && vertices.get(vertexName) instanceof LayerVertex) {
LayerVertex lv = (LayerVertex) vertices.get(vertexName);
nnc = lv.getLayerConf();
layer = nnc.getLayer();
} else if (conf.getNetworkInputs().contains(vertexName)) {
layerType = "Input";
} else {
GraphVertex gv = conf.getVertices().get(vertexName);
if (gv != null) {
layerType = gv.getClass().getSimpleName();
}
}
} else if (modelClass.endsWith("VariationalAutoencoder")) {
layerType = gi.getVertexTypes().get(layerIdx);
Map<String, String> map = gi.getVertexInfo().get(layerIdx);
for (Map.Entry<String, String> entry : map.entrySet()) {
layerInfoRows.add(new String[] { entry.getKey(), entry.getValue() });
}
}
if (layer != null) {
layerType = getLayerType(layer);
}
if (layer != null) {
String activationFn = null;
if (layer instanceof FeedForwardLayer) {
FeedForwardLayer ffl = (FeedForwardLayer) layer;
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerNIn"), String.valueOf(ffl.getNIn()) });
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerSize"), String.valueOf(ffl.getNOut()) });
activationFn = layer.getActivationFn().toString();
}
int nParams = layer.initializer().numParams(nnc);
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerNParams"), String.valueOf(nParams) });
if (nParams > 0) {
WeightInit wi = layer.getWeightInit();
String str = wi.toString();
if (wi == WeightInit.DISTRIBUTION) {
str += layer.getDist();
}
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerWeightInit"), str });
Updater u = layer.getUpdater();
String us = (u == null ? "" : u.toString());
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerUpdater"), us });
//TODO: Maybe L1/L2, dropout, updater-specific values etc
}
if (layer instanceof ConvolutionLayer || layer instanceof SubsamplingLayer) {
int[] kernel;
int[] stride;
int[] padding;
if (layer instanceof ConvolutionLayer) {
ConvolutionLayer cl = (ConvolutionLayer) layer;
kernel = cl.getKernelSize();
stride = cl.getStride();
padding = cl.getPadding();
} else {
SubsamplingLayer ssl = (SubsamplingLayer) layer;
kernel = ssl.getKernelSize();
stride = ssl.getStride();
padding = ssl.getPadding();
activationFn = null;
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerSubsamplingPoolingType"), ssl.getPoolingType().toString() });
}
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerCnnKernel"), Arrays.toString(kernel) });
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerCnnStride"), Arrays.toString(stride) });
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerCnnPadding"), Arrays.toString(padding) });
}
if (activationFn != null) {
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerActivationFn"), activationFn });
}
}
layerInfoRows.get(1)[1] = layerType;
}
}
return layerInfoRows.toArray(new String[layerInfoRows.size()][0]);
}
use of org.deeplearning4j.nn.conf.graph.LayerVertex in project deeplearning4j by deeplearning4j.
the class TrainModuleUtils method buildGraphInfo.
public static GraphInfo buildGraphInfo(ComputationGraphConfiguration config) {
List<String> layerNames = new ArrayList<>();
List<String> layerTypes = new ArrayList<>();
List<List<Integer>> layerInputs = new ArrayList<>();
List<Map<String, String>> layerInfo = new ArrayList<>();
Map<String, GraphVertex> vertices = config.getVertices();
Map<String, List<String>> vertexInputs = config.getVertexInputs();
List<String> networkInputs = config.getNetworkInputs();
List<String> originalVertexName = new ArrayList<>();
Map<String, Integer> vertexToIndexMap = new HashMap<>();
int vertexCount = 0;
for (String s : networkInputs) {
vertexToIndexMap.put(s, vertexCount++);
layerNames.add(s);
originalVertexName.add(s);
layerTypes.add(s);
layerInputs.add(Collections.emptyList());
layerInfo.add(Collections.emptyMap());
}
for (String s : vertices.keySet()) {
vertexToIndexMap.put(s, vertexCount++);
}
int layerCount = 0;
for (Map.Entry<String, GraphVertex> entry : vertices.entrySet()) {
GraphVertex gv = entry.getValue();
layerNames.add(entry.getKey());
List<String> inputsThisVertex = vertexInputs.get(entry.getKey());
List<Integer> inputIndexes = new ArrayList<>();
for (String s : inputsThisVertex) {
inputIndexes.add(vertexToIndexMap.get(s));
}
layerInputs.add(inputIndexes);
if (gv instanceof LayerVertex) {
NeuralNetConfiguration c = ((LayerVertex) gv).getLayerConf();
Layer layer = c.getLayer();
String layerType = layer.getClass().getSimpleName().replaceAll("Layer$", "");
layerTypes.add(layerType);
//Extract layer info
Map<String, String> map = getLayerInfo(c, layer);
layerInfo.add(map);
} else {
String layerType = gv.getClass().getSimpleName();
layerTypes.add(layerType);
//TODO
Map<String, String> thisVertexInfo = Collections.emptyMap();
layerInfo.add(thisVertexInfo);
}
originalVertexName.add(entry.getKey());
}
return new GraphInfo(layerNames, layerTypes, layerInputs, layerInfo, originalVertexName);
}
use of org.deeplearning4j.nn.conf.graph.LayerVertex in project deeplearning4j by deeplearning4j.
the class TestComputationGraphNetwork method testCnnFlatInputType1.
@Test
public void testCnnFlatInputType1() {
//First: check conv input type. Expect: no preprocessor, nIn set appropriately
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in").setInputTypes(InputType.convolutional(10, 8, 3)).addLayer("layer", new ConvolutionLayer.Builder().kernelSize(2, 2).padding(0, 0).stride(1, 1).build(), "in").addLayer("out", new OutputLayer.Builder().nOut(10).build(), "layer").setOutputs("out").pretrain(false).backprop(true).build();
LayerVertex lv = (LayerVertex) conf.getVertices().get("layer");
FeedForwardLayer l = ((FeedForwardLayer) (lv).getLayerConf().getLayer());
assertEquals(3, l.getNIn());
assertNull(lv.getPreProcessor());
//Check the equivalent config, but with flat conv data input instead
//In this case, the only difference should be the addition of a preprocessor
//First: check conv input type. Expect: no preprocessor, nIn set appropriately
conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in").setInputTypes(InputType.convolutionalFlat(10, 8, 3)).addLayer("layer", new ConvolutionLayer.Builder().kernelSize(2, 2).padding(0, 0).stride(1, 1).build(), "in").addLayer("out", new OutputLayer.Builder().nOut(10).build(), "layer").setOutputs("out").pretrain(false).backprop(true).build();
lv = (LayerVertex) conf.getVertices().get("layer");
l = ((FeedForwardLayer) (lv).getLayerConf().getLayer());
assertEquals(3, l.getNIn());
assertNotNull(lv.getPreProcessor());
InputPreProcessor preProcessor = lv.getPreProcessor();
assertTrue(preProcessor instanceof FeedForwardToCnnPreProcessor);
FeedForwardToCnnPreProcessor preproc = (FeedForwardToCnnPreProcessor) preProcessor;
assertEquals(10, preproc.getInputHeight());
assertEquals(8, preproc.getInputWidth());
assertEquals(3, preproc.getNumChannels());
//Finally, check configuration with a subsampling layer
conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in").setInputTypes(InputType.convolutionalFlat(10, 8, 3)).addLayer("l0", new SubsamplingLayer.Builder().kernelSize(2, 2).stride(1, 1).padding(0, 0).build(), "in").addLayer("layer", new ConvolutionLayer.Builder().kernelSize(2, 2).padding(0, 0).stride(1, 1).build(), "l0").addLayer("out", new OutputLayer.Builder().nOut(10).build(), "layer").setOutputs("out").pretrain(false).backprop(true).build();
//Check subsampling layer:
lv = (LayerVertex) conf.getVertices().get("l0");
SubsamplingLayer sl = ((SubsamplingLayer) (lv).getLayerConf().getLayer());
assertNotNull(lv.getPreProcessor());
preProcessor = lv.getPreProcessor();
assertTrue(preProcessor instanceof FeedForwardToCnnPreProcessor);
preproc = (FeedForwardToCnnPreProcessor) preProcessor;
assertEquals(10, preproc.getInputHeight());
assertEquals(8, preproc.getInputWidth());
assertEquals(3, preproc.getNumChannels());
//Check dense layer
lv = (LayerVertex) conf.getVertices().get("layer");
l = ((FeedForwardLayer) (lv).getLayerConf().getLayer());
assertEquals(3, l.getNIn());
assertNull(lv.getPreProcessor());
}
use of org.deeplearning4j.nn.conf.graph.LayerVertex in project deeplearning4j by deeplearning4j.
the class GradientCheckUtil method checkGradients.
/**Check backprop gradients for a ComputationGraph
* @param graph ComputationGraph to test. This must be initialized.
* @param epsilon Usually on the order of 1e-4 or so.
* @param maxRelError Maximum relative error. Usually < 0.01, though maybe more for deep networks
* @param minAbsoluteError Minimum absolute error to cause a failure. Numerical gradients can be non-zero due to precision issues.
* For example, 0.0 vs. 1e-18: relative error is 1.0, but not really a failure
* @param print Whether to print full pass/failure details for each parameter gradient
* @param exitOnFirstError If true: return upon first failure. If false: continue checking even if
* one parameter gradient has failed. Typically use false for debugging, true for unit tests.
* @param inputs Input arrays to use for forward pass. May be mini-batch data.
* @param labels Labels/targets (output) arrays to use to calculate backprop gradient. May be mini-batch data.
* @return true if gradients are passed, false otherwise.
*/
public static boolean checkGradients(ComputationGraph graph, double epsilon, double maxRelError, double minAbsoluteError, boolean print, boolean exitOnFirstError, INDArray[] inputs, INDArray[] labels) {
//Basic sanity checks on input:
if (epsilon <= 0.0 || epsilon > 0.1)
throw new IllegalArgumentException("Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so");
if (maxRelError <= 0.0 || maxRelError > 0.25)
throw new IllegalArgumentException("Invalid maxRelativeError: " + maxRelError);
if (graph.getNumInputArrays() != inputs.length)
throw new IllegalArgumentException("Invalid input arrays: expect " + graph.getNumInputArrays() + " inputs");
if (graph.getNumOutputArrays() != labels.length)
throw new IllegalArgumentException("Invalid labels arrays: expect " + graph.getNumOutputArrays() + " outputs");
//Check configuration
int layerCount = 0;
for (String vertexName : graph.getConfiguration().getVertices().keySet()) {
GraphVertex gv = graph.getConfiguration().getVertices().get(vertexName);
if (!(gv instanceof LayerVertex))
continue;
LayerVertex lv = (LayerVertex) gv;
org.deeplearning4j.nn.conf.Updater u = lv.getLayerConf().getLayer().getUpdater();
if (u == org.deeplearning4j.nn.conf.Updater.SGD) {
//Must have LR of 1.0
double lr = lv.getLayerConf().getLayer().getLearningRate();
if (lr != 1.0) {
throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer \"" + vertexName + "\"; got " + u);
}
} else if (u != org.deeplearning4j.nn.conf.Updater.NONE) {
throw new IllegalStateException("Must have Updater.NONE (or SGD + lr=1.0) for layer \"" + vertexName + "\"; got " + u);
}
double dropout = lv.getLayerConf().getLayer().getDropOut();
if (lv.getLayerConf().isUseRegularization() && dropout != 0.0) {
throw new IllegalStateException("Must have dropout == 0.0 for gradient checks - got dropout = " + dropout + " for layer " + layerCount);
}
IActivation activation = lv.getLayerConf().getLayer().getActivationFn();
if (activation != null) {
if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) {
log.warn("Layer \"" + vertexName + "\" is possibly using an unsuitable activation function: " + activation.getClass() + ". Activation functions for gradient checks must be smooth (like sigmoid, tanh, softmax) and not " + "contain discontinuities like ReLU or LeakyReLU (these may cause spurious failures)");
}
}
}
for (int i = 0; i < inputs.length; i++) graph.setInput(i, inputs[i]);
for (int i = 0; i < labels.length; i++) graph.setLabel(i, labels[i]);
graph.computeGradientAndScore();
Pair<Gradient, Double> gradAndScore = graph.gradientAndScore();
ComputationGraphUpdater updater = new ComputationGraphUpdater(graph);
updater.update(graph, gradAndScore.getFirst(), 0, graph.batchSize());
//need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done)
INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup();
//need dup: params are a *view* of full parameters
INDArray originalParams = graph.params().dup();
int nParams = originalParams.length();
Map<String, INDArray> paramTable = graph.paramTable();
List<String> paramNames = new ArrayList<>(paramTable.keySet());
int[] paramEnds = new int[paramNames.size()];
paramEnds[0] = paramTable.get(paramNames.get(0)).length();
for (int i = 1; i < paramEnds.length; i++) {
paramEnds[i] = paramEnds[i - 1] + paramTable.get(paramNames.get(i)).length();
}
int currParamNameIdx = 0;
int totalNFailures = 0;
double maxError = 0.0;
MultiDataSet mds = new MultiDataSet(inputs, labels);
//Assumption here: params is a view that we can modify in-place
INDArray params = graph.params();
for (int i = 0; i < nParams; i++) {
//Get param name
if (i >= paramEnds[currParamNameIdx]) {
currParamNameIdx++;
}
String paramName = paramNames.get(currParamNameIdx);
//(w+epsilon): Do forward pass and score
double origValue = params.getDouble(i);
params.putScalar(i, origValue + epsilon);
//training == true for batch norm, etc (scores and gradients need to be calculated on same thing)
double scorePlus = graph.score(mds, true);
//(w-epsilon): Do forward pass and score
params.putScalar(i, origValue - epsilon);
double scoreMinus = graph.score(mds, true);
//Reset original param value
params.putScalar(i, origValue);
//Calculate numerical parameter gradient:
double scoreDelta = scorePlus - scoreMinus;
double numericalGradient = scoreDelta / (2 * epsilon);
if (Double.isNaN(numericalGradient))
throw new IllegalStateException("Numerical gradient was NaN for parameter " + i + " of " + nParams);
double backpropGradient = gradientToCheck.getDouble(i);
//http://cs231n.github.io/neural-networks-3/#gradcheck
//use mean centered
double relError = Math.abs(backpropGradient - numericalGradient) / (Math.abs(numericalGradient) + Math.abs(backpropGradient));
if (backpropGradient == 0.0 && numericalGradient == 0.0)
//Edge case: i.e., RNNs with time series length of 1.0
relError = 0.0;
if (relError > maxError)
maxError = relError;
if (relError > maxRelError || Double.isNaN(relError)) {
double absError = Math.abs(backpropGradient - numericalGradient);
if (absError < minAbsoluteError) {
log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + "; absolute error = " + absError + " < minAbsoluteError = " + minAbsoluteError);
} else {
if (print)
log.info("Param " + i + " (" + paramName + ") FAILED: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + ", scorePlus=" + scorePlus + ", scoreMinus= " + scoreMinus);
if (exitOnFirstError)
return false;
totalNFailures++;
}
} else if (print) {
log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError);
}
}
if (print) {
int nPass = nParams - totalNFailures;
log.info("GradientCheckUtil.checkGradients(): " + nParams + " params checked, " + nPass + " passed, " + totalNFailures + " failed. Largest relative error = " + maxError);
}
return totalNFailures == 0;
}
use of org.deeplearning4j.nn.conf.graph.LayerVertex in project deeplearning4j by deeplearning4j.
the class ComputationGraphConfiguration method fromJson.
/**
* Create a computation graph configuration from json
*
* @param json the neural net configuration from json
* @return {@link ComputationGraphConfiguration}
*/
public static ComputationGraphConfiguration fromJson(String json) {
//As per MultiLayerConfiguration.fromJson()
ObjectMapper mapper = NeuralNetConfiguration.mapper();
ComputationGraphConfiguration conf;
try {
conf = mapper.readValue(json, ComputationGraphConfiguration.class);
} catch (IOException e) {
throw new RuntimeException(e);
}
//To maintain backward compatibility after activation function refactoring (configs generated with v0.7.1 or earlier)
// Previously: enumeration used for activation functions. Now: use classes
int layerCount = 0;
Map<String, GraphVertex> vertexMap = conf.getVertices();
JsonNode vertices = null;
for (Map.Entry<String, GraphVertex> entry : vertexMap.entrySet()) {
if (!(entry.getValue() instanceof LayerVertex)) {
continue;
}
LayerVertex lv = (LayerVertex) entry.getValue();
if (lv.getLayerConf() != null && lv.getLayerConf().getLayer() != null) {
Layer layer = lv.getLayerConf().getLayer();
if (layer.getActivationFn() == null) {
String layerName = layer.getLayerName();
try {
if (vertices == null) {
JsonNode jsonNode = mapper.readTree(json);
vertices = jsonNode.get("vertices");
}
JsonNode vertexNode = vertices.get(layerName);
JsonNode layerVertexNode = vertexNode.get("LayerVertex");
if (layerVertexNode == null || !layerVertexNode.has("layerConf") || !layerVertexNode.get("layerConf").has("layer")) {
continue;
}
JsonNode layerWrapperNode = layerVertexNode.get("layerConf").get("layer");
if (layerWrapperNode == null || layerWrapperNode.size() != 1) {
continue;
}
JsonNode layerNode = layerWrapperNode.elements().next();
//Should only have 1 element: "dense", "output", etc
JsonNode activationFunction = layerNode.get("activationFunction");
if (activationFunction != null) {
IActivation ia = Activation.fromString(activationFunction.asText()).getActivationFunction();
layer.setActivationFn(ia);
}
} catch (IOException e) {
log.warn("Layer with null ActivationFn field or pre-0.7.2 activation function detected: could not parse JSON", e);
}
}
}
}
return conf;
}
Aggregations