use of org.deeplearning4j.nn.conf.graph.GraphVertex in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testUpdaters.
@Test
public void testUpdaters() throws Exception {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS).momentum(0.9).graphBuilder().addInputs(// 40x40x1
"input").addLayer("l0_cnn", new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 }).nOut(100).build(), // out: 40x40x100
"input").addLayer("l1_max", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 }, new int[] { 2, 2 }, new int[] { 1, 1 }).build(), // 21x21x100
"l0_cnn").addLayer("l2_cnn", new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 2, 2 }, new int[] { 1, 1 }).nOut(200).build(), // 11x11x200
"l1_max").addLayer("l3_max", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 3, 3 }, new int[] { 2, 2 }, new int[] { 1, 1 }).build(), // 6x6x200
"l2_cnn").addLayer("l4_fc", new DenseLayer.Builder().nOut(1024).build(), // output: 1x1x1024
"l3_max").addLayer("l5_out", new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(10).activation(Activation.SOFTMAX).build(), "l4_fc").setOutputs("l5_out").backprop(true).pretrain(false).setInputTypes(InputType.convolutional(40, 40, 1)).build();
//First: check that the nIns are set properly...
Map<String, GraphVertex> map = conf.getVertices();
LayerVertex l0_cnn = (LayerVertex) map.get("l0_cnn");
LayerVertex l2_cnn = (LayerVertex) map.get("l2_cnn");
LayerVertex l4_fc = (LayerVertex) map.get("l4_fc");
LayerVertex l5_out = (LayerVertex) map.get("l5_out");
assertEquals(1, ((FeedForwardLayer) l0_cnn.getLayerConf().getLayer()).getNIn());
assertEquals(100, ((FeedForwardLayer) l2_cnn.getLayerConf().getLayer()).getNIn());
assertEquals(6 * 6 * 200, ((FeedForwardLayer) l4_fc.getLayerConf().getLayer()).getNIn());
assertEquals(1024, ((FeedForwardLayer) l5_out.getLayerConf().getLayer()).getNIn());
//Check updaters state:
ComputationGraph g = new ComputationGraph(conf);
g.init();
g.initGradientsView();
ComputationGraphUpdater updater = g.getUpdater();
//First: get the updaters array
Field layerUpdatersField = updater.getClass().getDeclaredField("layerUpdaters");
layerUpdatersField.setAccessible(true);
org.deeplearning4j.nn.api.Updater[] layerUpdaters = (org.deeplearning4j.nn.api.Updater[]) layerUpdatersField.get(updater);
//And get the map between names and updater indexes
Field layerUpdatersMapField = updater.getClass().getDeclaredField("layerUpdatersMap");
layerUpdatersMapField.setAccessible(true);
Map<String, Integer> layerUpdatersMap = (Map<String, Integer>) layerUpdatersMapField.get(updater);
//Go through each layer; check that the updater state size matches the parameters size
org.deeplearning4j.nn.api.Layer[] layers = g.getLayers();
for (org.deeplearning4j.nn.api.Layer l : layers) {
String layerName = l.conf().getLayer().getLayerName();
int nParams = l.numParams();
Map<String, INDArray> paramTable = l.paramTable();
Map<String, Integer> parameterSizeCounts = new LinkedHashMap<>();
for (Map.Entry<String, INDArray> e : paramTable.entrySet()) {
parameterSizeCounts.put(e.getKey(), e.getValue().length());
}
int updaterIdx = layerUpdatersMap.get(layerName);
org.deeplearning4j.nn.api.Updater u = layerUpdaters[updaterIdx];
LayerUpdater lu = (LayerUpdater) u;
Field updaterForVariableField = LayerUpdater.class.getDeclaredField("updaterForVariable");
updaterForVariableField.setAccessible(true);
Map<String, GradientUpdater> updaterForVariable = (Map<String, GradientUpdater>) updaterForVariableField.get(lu);
Map<String, Integer> updaterStateSizeCounts = new HashMap<>();
for (Map.Entry<String, GradientUpdater> entry : updaterForVariable.entrySet()) {
GradientUpdater gu = entry.getValue();
Nesterovs nesterovs = (Nesterovs) gu;
INDArray v = nesterovs.getV();
int length = (v == null ? -1 : v.length());
updaterStateSizeCounts.put(entry.getKey(), length);
}
//Check subsampling layers:
if (l.numParams() == 0) {
assertEquals(0, updaterForVariable.size());
}
System.out.println(layerName + "\t" + nParams + "\t" + parameterSizeCounts + "\t Updater size: " + updaterStateSizeCounts);
//Now, with nesterov updater: 1 history value per parameter
for (String s : parameterSizeCounts.keySet()) {
int paramSize = parameterSizeCounts.get(s);
int updaterSize = updaterStateSizeCounts.get(s);
assertEquals(layerName + "/" + s, paramSize, updaterSize);
}
}
//minibatch, depth, height, width
INDArray in = Nd4j.create(2, 1, 40, 40);
INDArray l = Nd4j.create(2, 10);
DataSet ds = new DataSet(in, l);
g.fit(ds);
}
use of org.deeplearning4j.nn.conf.graph.GraphVertex in project deeplearning4j by deeplearning4j.
the class ComputationGraphConfiguration method addPreProcessors.
/**
* Add preprocessors automatically, given the specified types of inputs for the network. Inputs are specified using the
* {@link InputType} class, in the same order in which the inputs were defined in the original configuration.<br>
* For example, in a network with two inputs: a convolutional input (28x28x1 images) and feed forward inputs, use
* {@code .addPreProcessors(InputType.convolutional(1,28,28),InputType.feedForward())}.<br>
* For the CNN->Dense and CNN->RNN transitions, the nIns on the Dense/RNN layers will also be added automatically.
* <b>NOTE</b>: This method will be called automatically when using the
* {@link GraphBuilder#setInputTypes(InputType...)} functionality.
* See that method for details.
*/
public void addPreProcessors(InputType... inputTypes) {
if (inputTypes == null || inputTypes.length != networkInputs.size()) {
throw new IllegalArgumentException("Invalid number of InputTypes: cannot add preprocessors if number of InputType " + "objects differs from number of network inputs");
}
//Now: need to do essentially a forward pass through the network, to work out what type of preprocessors to add
//To do this: need to know what the output types are for each GraphVertex.
//First step: build network in reverse order (i.e., define map of a -> list(b) instead of list(a) -> b)
//Key: vertex. Values: vertices that this node is an input for
Map<String, List<String>> verticesOutputTo = new HashMap<>();
for (Map.Entry<String, GraphVertex> entry : vertices.entrySet()) {
String vertexName = entry.getKey();
List<String> vertexInputNames;
vertexInputNames = vertexInputs.get(vertexName);
if (vertexInputNames == null)
continue;
//Build reverse network structure:
for (String s : vertexInputNames) {
List<String> list = verticesOutputTo.get(s);
if (list == null) {
list = new ArrayList<>();
verticesOutputTo.put(s, list);
}
//Edge: s -> vertexName
list.add(vertexName);
}
}
//Now: do topological sort
//Set of all nodes with no incoming edges
LinkedList<String> noIncomingEdges = new LinkedList<>(networkInputs);
List<String> topologicalOrdering = new ArrayList<>();
Map<String, Set<String>> inputEdges = new HashMap<>();
for (Map.Entry<String, List<String>> entry : vertexInputs.entrySet()) {
inputEdges.put(entry.getKey(), new HashSet<>(entry.getValue()));
}
while (!noIncomingEdges.isEmpty()) {
String next = noIncomingEdges.removeFirst();
topologicalOrdering.add(next);
//Remove edges next -> vertexOuputsTo[...] from graph;
List<String> nextEdges = verticesOutputTo.get(next);
if (nextEdges != null && !nextEdges.isEmpty()) {
for (String s : nextEdges) {
Set<String> set = inputEdges.get(s);
set.remove(next);
if (set.isEmpty()) {
//No remaining edges for vertex i -> add to list for processing
noIncomingEdges.add(s);
}
}
}
}
//If any edges remain in the graph: graph has cycles:
for (Map.Entry<String, Set<String>> entry : inputEdges.entrySet()) {
Set<String> set = entry.getValue();
if (set == null)
continue;
if (!set.isEmpty())
throw new IllegalStateException("Invalid configuration: cycle detected in graph. Cannot calculate topological ordering with graph cycle (" + "cycle includes vertex \"" + entry.getKey() + "\")");
}
//Now, given the topological sort: do equivalent of forward pass
Map<String, InputType> vertexOutputs = new HashMap<>();
int currLayerIdx = -1;
for (String s : topologicalOrdering) {
int inputIdx = networkInputs.indexOf(s);
if (inputIdx != -1) {
vertexOutputs.put(s, inputTypes[inputIdx]);
continue;
}
GraphVertex gv = vertices.get(s);
List<InputType> inputTypeList = new ArrayList<>();
if (gv instanceof LayerVertex) {
//Add preprocessor, if necessary:
String in = vertexInputs.get(s).get(0);
InputType layerInput = vertexOutputs.get(in);
inputTypeList.add(layerInput);
LayerVertex lv = (LayerVertex) gv;
Layer l = lv.getLayerConf().getLayer();
//Preprocessors - add if necessary
if (lv.getPreProcessor() == null) {
//But don't override preprocessors that are manually defined; if none has been defined,
//add the appropriate preprocessor for this input type/layer combination
InputPreProcessor preproc = l.getPreProcessorForInputType(layerInput);
lv.setPreProcessor(preproc);
}
//Set nIn value for layer (if not already set)
InputType afterPreproc = layerInput;
if (lv.getPreProcessor() != null) {
InputPreProcessor ip = lv.getPreProcessor();
afterPreproc = ip.getOutputType(layerInput);
}
l.setNIn(afterPreproc, false);
currLayerIdx++;
} else {
List<String> inputs = vertexInputs.get(s);
if (inputs != null) {
for (String inputVertexName : inputs) {
inputTypeList.add(vertexOutputs.get(inputVertexName));
}
}
}
InputType outputFromVertex = gv.getOutputType(currLayerIdx, inputTypeList.toArray(new InputType[inputTypeList.size()]));
vertexOutputs.put(s, outputFromVertex);
}
}
use of org.deeplearning4j.nn.conf.graph.GraphVertex in project deeplearning4j by deeplearning4j.
the class TrainModule method getLayerInfoTable.
private String[][] getLayerInfoTable(int layerIdx, TrainModuleUtils.GraphInfo gi, I18N i18N, boolean noData, StatsStorage ss, String wid) {
List<String[]> layerInfoRows = new ArrayList<>();
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerName"), gi.getVertexNames().get(layerIdx) });
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerType"), "" });
if (!noData) {
Persistable p = ss.getStaticInfo(currentSessionID, StatsListener.TYPE_ID, wid);
if (p != null) {
StatsInitializationReport initReport = (StatsInitializationReport) p;
String configJson = initReport.getModelConfigJson();
String modelClass = initReport.getModelClassName();
//TODO error handling...
String layerType = "";
Layer layer = null;
NeuralNetConfiguration nnc = null;
if (modelClass.endsWith("MultiLayerNetwork")) {
MultiLayerConfiguration conf = MultiLayerConfiguration.fromJson(configJson);
//-1 because of input
int confIdx = layerIdx - 1;
if (confIdx >= 0) {
nnc = conf.getConf(confIdx);
layer = nnc.getLayer();
} else {
//Input layer
layerType = "Input";
}
} else if (modelClass.endsWith("ComputationGraph")) {
ComputationGraphConfiguration conf = ComputationGraphConfiguration.fromJson(configJson);
String vertexName = gi.getVertexNames().get(layerIdx);
Map<String, GraphVertex> vertices = conf.getVertices();
if (vertices.containsKey(vertexName) && vertices.get(vertexName) instanceof LayerVertex) {
LayerVertex lv = (LayerVertex) vertices.get(vertexName);
nnc = lv.getLayerConf();
layer = nnc.getLayer();
} else if (conf.getNetworkInputs().contains(vertexName)) {
layerType = "Input";
} else {
GraphVertex gv = conf.getVertices().get(vertexName);
if (gv != null) {
layerType = gv.getClass().getSimpleName();
}
}
} else if (modelClass.endsWith("VariationalAutoencoder")) {
layerType = gi.getVertexTypes().get(layerIdx);
Map<String, String> map = gi.getVertexInfo().get(layerIdx);
for (Map.Entry<String, String> entry : map.entrySet()) {
layerInfoRows.add(new String[] { entry.getKey(), entry.getValue() });
}
}
if (layer != null) {
layerType = getLayerType(layer);
}
if (layer != null) {
String activationFn = null;
if (layer instanceof FeedForwardLayer) {
FeedForwardLayer ffl = (FeedForwardLayer) layer;
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerNIn"), String.valueOf(ffl.getNIn()) });
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerSize"), String.valueOf(ffl.getNOut()) });
activationFn = layer.getActivationFn().toString();
}
int nParams = layer.initializer().numParams(nnc);
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerNParams"), String.valueOf(nParams) });
if (nParams > 0) {
WeightInit wi = layer.getWeightInit();
String str = wi.toString();
if (wi == WeightInit.DISTRIBUTION) {
str += layer.getDist();
}
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerWeightInit"), str });
Updater u = layer.getUpdater();
String us = (u == null ? "" : u.toString());
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerUpdater"), us });
//TODO: Maybe L1/L2, dropout, updater-specific values etc
}
if (layer instanceof ConvolutionLayer || layer instanceof SubsamplingLayer) {
int[] kernel;
int[] stride;
int[] padding;
if (layer instanceof ConvolutionLayer) {
ConvolutionLayer cl = (ConvolutionLayer) layer;
kernel = cl.getKernelSize();
stride = cl.getStride();
padding = cl.getPadding();
} else {
SubsamplingLayer ssl = (SubsamplingLayer) layer;
kernel = ssl.getKernelSize();
stride = ssl.getStride();
padding = ssl.getPadding();
activationFn = null;
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerSubsamplingPoolingType"), ssl.getPoolingType().toString() });
}
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerCnnKernel"), Arrays.toString(kernel) });
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerCnnStride"), Arrays.toString(stride) });
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerCnnPadding"), Arrays.toString(padding) });
}
if (activationFn != null) {
layerInfoRows.add(new String[] { i18N.getMessage("train.model.layerinfotable.layerActivationFn"), activationFn });
}
}
layerInfoRows.get(1)[1] = layerType;
}
}
return layerInfoRows.toArray(new String[layerInfoRows.size()][0]);
}
use of org.deeplearning4j.nn.conf.graph.GraphVertex in project deeplearning4j by deeplearning4j.
the class TrainModuleUtils method buildGraphInfo.
public static GraphInfo buildGraphInfo(ComputationGraphConfiguration config) {
List<String> layerNames = new ArrayList<>();
List<String> layerTypes = new ArrayList<>();
List<List<Integer>> layerInputs = new ArrayList<>();
List<Map<String, String>> layerInfo = new ArrayList<>();
Map<String, GraphVertex> vertices = config.getVertices();
Map<String, List<String>> vertexInputs = config.getVertexInputs();
List<String> networkInputs = config.getNetworkInputs();
List<String> originalVertexName = new ArrayList<>();
Map<String, Integer> vertexToIndexMap = new HashMap<>();
int vertexCount = 0;
for (String s : networkInputs) {
vertexToIndexMap.put(s, vertexCount++);
layerNames.add(s);
originalVertexName.add(s);
layerTypes.add(s);
layerInputs.add(Collections.emptyList());
layerInfo.add(Collections.emptyMap());
}
for (String s : vertices.keySet()) {
vertexToIndexMap.put(s, vertexCount++);
}
int layerCount = 0;
for (Map.Entry<String, GraphVertex> entry : vertices.entrySet()) {
GraphVertex gv = entry.getValue();
layerNames.add(entry.getKey());
List<String> inputsThisVertex = vertexInputs.get(entry.getKey());
List<Integer> inputIndexes = new ArrayList<>();
for (String s : inputsThisVertex) {
inputIndexes.add(vertexToIndexMap.get(s));
}
layerInputs.add(inputIndexes);
if (gv instanceof LayerVertex) {
NeuralNetConfiguration c = ((LayerVertex) gv).getLayerConf();
Layer layer = c.getLayer();
String layerType = layer.getClass().getSimpleName().replaceAll("Layer$", "");
layerTypes.add(layerType);
//Extract layer info
Map<String, String> map = getLayerInfo(c, layer);
layerInfo.add(map);
} else {
String layerType = gv.getClass().getSimpleName();
layerTypes.add(layerType);
//TODO
Map<String, String> thisVertexInfo = Collections.emptyMap();
layerInfo.add(thisVertexInfo);
}
originalVertexName.add(entry.getKey());
}
return new GraphInfo(layerNames, layerTypes, layerInputs, layerInfo, originalVertexName);
}
use of org.deeplearning4j.nn.conf.graph.GraphVertex in project deeplearning4j by deeplearning4j.
the class GradientCheckUtil method checkGradients.
/**Check backprop gradients for a ComputationGraph
* @param graph ComputationGraph to test. This must be initialized.
* @param epsilon Usually on the order of 1e-4 or so.
* @param maxRelError Maximum relative error. Usually < 0.01, though maybe more for deep networks
* @param minAbsoluteError Minimum absolute error to cause a failure. Numerical gradients can be non-zero due to precision issues.
* For example, 0.0 vs. 1e-18: relative error is 1.0, but not really a failure
* @param print Whether to print full pass/failure details for each parameter gradient
* @param exitOnFirstError If true: return upon first failure. If false: continue checking even if
* one parameter gradient has failed. Typically use false for debugging, true for unit tests.
* @param inputs Input arrays to use for forward pass. May be mini-batch data.
* @param labels Labels/targets (output) arrays to use to calculate backprop gradient. May be mini-batch data.
* @return true if gradients are passed, false otherwise.
*/
public static boolean checkGradients(ComputationGraph graph, double epsilon, double maxRelError, double minAbsoluteError, boolean print, boolean exitOnFirstError, INDArray[] inputs, INDArray[] labels) {
//Basic sanity checks on input:
if (epsilon <= 0.0 || epsilon > 0.1)
throw new IllegalArgumentException("Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so");
if (maxRelError <= 0.0 || maxRelError > 0.25)
throw new IllegalArgumentException("Invalid maxRelativeError: " + maxRelError);
if (graph.getNumInputArrays() != inputs.length)
throw new IllegalArgumentException("Invalid input arrays: expect " + graph.getNumInputArrays() + " inputs");
if (graph.getNumOutputArrays() != labels.length)
throw new IllegalArgumentException("Invalid labels arrays: expect " + graph.getNumOutputArrays() + " outputs");
//Check configuration
int layerCount = 0;
for (String vertexName : graph.getConfiguration().getVertices().keySet()) {
GraphVertex gv = graph.getConfiguration().getVertices().get(vertexName);
if (!(gv instanceof LayerVertex))
continue;
LayerVertex lv = (LayerVertex) gv;
org.deeplearning4j.nn.conf.Updater u = lv.getLayerConf().getLayer().getUpdater();
if (u == org.deeplearning4j.nn.conf.Updater.SGD) {
//Must have LR of 1.0
double lr = lv.getLayerConf().getLayer().getLearningRate();
if (lr != 1.0) {
throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer \"" + vertexName + "\"; got " + u);
}
} else if (u != org.deeplearning4j.nn.conf.Updater.NONE) {
throw new IllegalStateException("Must have Updater.NONE (or SGD + lr=1.0) for layer \"" + vertexName + "\"; got " + u);
}
double dropout = lv.getLayerConf().getLayer().getDropOut();
if (lv.getLayerConf().isUseRegularization() && dropout != 0.0) {
throw new IllegalStateException("Must have dropout == 0.0 for gradient checks - got dropout = " + dropout + " for layer " + layerCount);
}
IActivation activation = lv.getLayerConf().getLayer().getActivationFn();
if (activation != null) {
if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) {
log.warn("Layer \"" + vertexName + "\" is possibly using an unsuitable activation function: " + activation.getClass() + ". Activation functions for gradient checks must be smooth (like sigmoid, tanh, softmax) and not " + "contain discontinuities like ReLU or LeakyReLU (these may cause spurious failures)");
}
}
}
for (int i = 0; i < inputs.length; i++) graph.setInput(i, inputs[i]);
for (int i = 0; i < labels.length; i++) graph.setLabel(i, labels[i]);
graph.computeGradientAndScore();
Pair<Gradient, Double> gradAndScore = graph.gradientAndScore();
ComputationGraphUpdater updater = new ComputationGraphUpdater(graph);
updater.update(graph, gradAndScore.getFirst(), 0, graph.batchSize());
//need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done)
INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup();
//need dup: params are a *view* of full parameters
INDArray originalParams = graph.params().dup();
int nParams = originalParams.length();
Map<String, INDArray> paramTable = graph.paramTable();
List<String> paramNames = new ArrayList<>(paramTable.keySet());
int[] paramEnds = new int[paramNames.size()];
paramEnds[0] = paramTable.get(paramNames.get(0)).length();
for (int i = 1; i < paramEnds.length; i++) {
paramEnds[i] = paramEnds[i - 1] + paramTable.get(paramNames.get(i)).length();
}
int currParamNameIdx = 0;
int totalNFailures = 0;
double maxError = 0.0;
MultiDataSet mds = new MultiDataSet(inputs, labels);
//Assumption here: params is a view that we can modify in-place
INDArray params = graph.params();
for (int i = 0; i < nParams; i++) {
//Get param name
if (i >= paramEnds[currParamNameIdx]) {
currParamNameIdx++;
}
String paramName = paramNames.get(currParamNameIdx);
//(w+epsilon): Do forward pass and score
double origValue = params.getDouble(i);
params.putScalar(i, origValue + epsilon);
//training == true for batch norm, etc (scores and gradients need to be calculated on same thing)
double scorePlus = graph.score(mds, true);
//(w-epsilon): Do forward pass and score
params.putScalar(i, origValue - epsilon);
double scoreMinus = graph.score(mds, true);
//Reset original param value
params.putScalar(i, origValue);
//Calculate numerical parameter gradient:
double scoreDelta = scorePlus - scoreMinus;
double numericalGradient = scoreDelta / (2 * epsilon);
if (Double.isNaN(numericalGradient))
throw new IllegalStateException("Numerical gradient was NaN for parameter " + i + " of " + nParams);
double backpropGradient = gradientToCheck.getDouble(i);
//http://cs231n.github.io/neural-networks-3/#gradcheck
//use mean centered
double relError = Math.abs(backpropGradient - numericalGradient) / (Math.abs(numericalGradient) + Math.abs(backpropGradient));
if (backpropGradient == 0.0 && numericalGradient == 0.0)
//Edge case: i.e., RNNs with time series length of 1.0
relError = 0.0;
if (relError > maxError)
maxError = relError;
if (relError > maxRelError || Double.isNaN(relError)) {
double absError = Math.abs(backpropGradient - numericalGradient);
if (absError < minAbsoluteError) {
log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + "; absolute error = " + absError + " < minAbsoluteError = " + minAbsoluteError);
} else {
if (print)
log.info("Param " + i + " (" + paramName + ") FAILED: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + ", scorePlus=" + scorePlus + ", scoreMinus= " + scoreMinus);
if (exitOnFirstError)
return false;
totalNFailures++;
}
} else if (print) {
log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError);
}
}
if (print) {
int nPass = nParams - totalNFailures;
log.info("GradientCheckUtil.checkGradients(): " + nParams + " params checked, " + nPass + " passed, " + totalNFailures + " failed. Largest relative error = " + maxError);
}
return totalNFailures == 0;
}
Aggregations