use of org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testUpdaters.
@Test
public void testUpdaters() throws Exception {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS).momentum(0.9).graphBuilder().addInputs(// 40x40x1
"input").addLayer("l0_cnn", new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 }).nOut(100).build(), // out: 40x40x100
"input").addLayer("l1_max", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 }, new int[] { 2, 2 }, new int[] { 1, 1 }).build(), // 21x21x100
"l0_cnn").addLayer("l2_cnn", new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 2, 2 }, new int[] { 1, 1 }).nOut(200).build(), // 11x11x200
"l1_max").addLayer("l3_max", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 3, 3 }, new int[] { 2, 2 }, new int[] { 1, 1 }).build(), // 6x6x200
"l2_cnn").addLayer("l4_fc", new DenseLayer.Builder().nOut(1024).build(), // output: 1x1x1024
"l3_max").addLayer("l5_out", new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(10).activation(Activation.SOFTMAX).build(), "l4_fc").setOutputs("l5_out").backprop(true).pretrain(false).setInputTypes(InputType.convolutional(40, 40, 1)).build();
//First: check that the nIns are set properly...
Map<String, GraphVertex> map = conf.getVertices();
LayerVertex l0_cnn = (LayerVertex) map.get("l0_cnn");
LayerVertex l2_cnn = (LayerVertex) map.get("l2_cnn");
LayerVertex l4_fc = (LayerVertex) map.get("l4_fc");
LayerVertex l5_out = (LayerVertex) map.get("l5_out");
assertEquals(1, ((FeedForwardLayer) l0_cnn.getLayerConf().getLayer()).getNIn());
assertEquals(100, ((FeedForwardLayer) l2_cnn.getLayerConf().getLayer()).getNIn());
assertEquals(6 * 6 * 200, ((FeedForwardLayer) l4_fc.getLayerConf().getLayer()).getNIn());
assertEquals(1024, ((FeedForwardLayer) l5_out.getLayerConf().getLayer()).getNIn());
//Check updaters state:
ComputationGraph g = new ComputationGraph(conf);
g.init();
g.initGradientsView();
ComputationGraphUpdater updater = g.getUpdater();
//First: get the updaters array
Field layerUpdatersField = updater.getClass().getDeclaredField("layerUpdaters");
layerUpdatersField.setAccessible(true);
org.deeplearning4j.nn.api.Updater[] layerUpdaters = (org.deeplearning4j.nn.api.Updater[]) layerUpdatersField.get(updater);
//And get the map between names and updater indexes
Field layerUpdatersMapField = updater.getClass().getDeclaredField("layerUpdatersMap");
layerUpdatersMapField.setAccessible(true);
Map<String, Integer> layerUpdatersMap = (Map<String, Integer>) layerUpdatersMapField.get(updater);
//Go through each layer; check that the updater state size matches the parameters size
org.deeplearning4j.nn.api.Layer[] layers = g.getLayers();
for (org.deeplearning4j.nn.api.Layer l : layers) {
String layerName = l.conf().getLayer().getLayerName();
int nParams = l.numParams();
Map<String, INDArray> paramTable = l.paramTable();
Map<String, Integer> parameterSizeCounts = new LinkedHashMap<>();
for (Map.Entry<String, INDArray> e : paramTable.entrySet()) {
parameterSizeCounts.put(e.getKey(), e.getValue().length());
}
int updaterIdx = layerUpdatersMap.get(layerName);
org.deeplearning4j.nn.api.Updater u = layerUpdaters[updaterIdx];
LayerUpdater lu = (LayerUpdater) u;
Field updaterForVariableField = LayerUpdater.class.getDeclaredField("updaterForVariable");
updaterForVariableField.setAccessible(true);
Map<String, GradientUpdater> updaterForVariable = (Map<String, GradientUpdater>) updaterForVariableField.get(lu);
Map<String, Integer> updaterStateSizeCounts = new HashMap<>();
for (Map.Entry<String, GradientUpdater> entry : updaterForVariable.entrySet()) {
GradientUpdater gu = entry.getValue();
Nesterovs nesterovs = (Nesterovs) gu;
INDArray v = nesterovs.getV();
int length = (v == null ? -1 : v.length());
updaterStateSizeCounts.put(entry.getKey(), length);
}
//Check subsampling layers:
if (l.numParams() == 0) {
assertEquals(0, updaterForVariable.size());
}
System.out.println(layerName + "\t" + nParams + "\t" + parameterSizeCounts + "\t Updater size: " + updaterStateSizeCounts);
//Now, with nesterov updater: 1 history value per parameter
for (String s : parameterSizeCounts.keySet()) {
int paramSize = parameterSizeCounts.get(s);
int updaterSize = updaterStateSizeCounts.get(s);
assertEquals(layerName + "/" + s, paramSize, updaterSize);
}
}
//minibatch, depth, height, width
INDArray in = Nd4j.create(2, 1, 40, 40);
INDArray l = Nd4j.create(2, 10);
DataSet ds = new DataSet(in, l);
g.fit(ds);
}
use of org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater in project deeplearning4j by deeplearning4j.
the class ComputationGraph method clone.
@Override
public ComputationGraph clone() {
ComputationGraph cg = new ComputationGraph(configuration.clone());
cg.init(params().dup(), false);
if (solver != null) {
//If solver is null: updater hasn't been initialized -> getUpdater call will force initialization, however
ComputationGraphUpdater u = this.getUpdater();
INDArray updaterState = u.getStateViewArray();
if (updaterState != null) {
cg.getUpdater().setStateViewArray(updaterState.dup());
}
}
cg.listeners = this.listeners;
for (int i = 0; i < topologicalOrder.length; i++) {
if (!vertices[topologicalOrder[i]].hasLayer())
continue;
String layerName = vertices[topologicalOrder[i]].getVertexName();
if (getLayer(layerName) instanceof FrozenLayer) {
cg.getVertex(layerName).setLayerAsFrozen();
}
}
return cg;
}
use of org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater in project deeplearning4j by deeplearning4j.
the class ComputationGraph method getUpdater.
/**
* Get the ComputationGraphUpdater for the network
*/
public ComputationGraphUpdater getUpdater() {
if (solver == null) {
solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this).build();
solver.getOptimizer().setUpdaterComputationGraph(new ComputationGraphUpdater(this));
}
return solver.getOptimizer().getComputationGraphUpdater();
}
use of org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater in project deeplearning4j by deeplearning4j.
the class GradientCheckUtil method checkGradients.
/**Check backprop gradients for a ComputationGraph
* @param graph ComputationGraph to test. This must be initialized.
* @param epsilon Usually on the order of 1e-4 or so.
* @param maxRelError Maximum relative error. Usually < 0.01, though maybe more for deep networks
* @param minAbsoluteError Minimum absolute error to cause a failure. Numerical gradients can be non-zero due to precision issues.
* For example, 0.0 vs. 1e-18: relative error is 1.0, but not really a failure
* @param print Whether to print full pass/failure details for each parameter gradient
* @param exitOnFirstError If true: return upon first failure. If false: continue checking even if
* one parameter gradient has failed. Typically use false for debugging, true for unit tests.
* @param inputs Input arrays to use for forward pass. May be mini-batch data.
* @param labels Labels/targets (output) arrays to use to calculate backprop gradient. May be mini-batch data.
* @return true if gradients are passed, false otherwise.
*/
public static boolean checkGradients(ComputationGraph graph, double epsilon, double maxRelError, double minAbsoluteError, boolean print, boolean exitOnFirstError, INDArray[] inputs, INDArray[] labels) {
//Basic sanity checks on input:
if (epsilon <= 0.0 || epsilon > 0.1)
throw new IllegalArgumentException("Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so");
if (maxRelError <= 0.0 || maxRelError > 0.25)
throw new IllegalArgumentException("Invalid maxRelativeError: " + maxRelError);
if (graph.getNumInputArrays() != inputs.length)
throw new IllegalArgumentException("Invalid input arrays: expect " + graph.getNumInputArrays() + " inputs");
if (graph.getNumOutputArrays() != labels.length)
throw new IllegalArgumentException("Invalid labels arrays: expect " + graph.getNumOutputArrays() + " outputs");
//Check configuration
int layerCount = 0;
for (String vertexName : graph.getConfiguration().getVertices().keySet()) {
GraphVertex gv = graph.getConfiguration().getVertices().get(vertexName);
if (!(gv instanceof LayerVertex))
continue;
LayerVertex lv = (LayerVertex) gv;
org.deeplearning4j.nn.conf.Updater u = lv.getLayerConf().getLayer().getUpdater();
if (u == org.deeplearning4j.nn.conf.Updater.SGD) {
//Must have LR of 1.0
double lr = lv.getLayerConf().getLayer().getLearningRate();
if (lr != 1.0) {
throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer \"" + vertexName + "\"; got " + u);
}
} else if (u != org.deeplearning4j.nn.conf.Updater.NONE) {
throw new IllegalStateException("Must have Updater.NONE (or SGD + lr=1.0) for layer \"" + vertexName + "\"; got " + u);
}
double dropout = lv.getLayerConf().getLayer().getDropOut();
if (lv.getLayerConf().isUseRegularization() && dropout != 0.0) {
throw new IllegalStateException("Must have dropout == 0.0 for gradient checks - got dropout = " + dropout + " for layer " + layerCount);
}
IActivation activation = lv.getLayerConf().getLayer().getActivationFn();
if (activation != null) {
if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) {
log.warn("Layer \"" + vertexName + "\" is possibly using an unsuitable activation function: " + activation.getClass() + ". Activation functions for gradient checks must be smooth (like sigmoid, tanh, softmax) and not " + "contain discontinuities like ReLU or LeakyReLU (these may cause spurious failures)");
}
}
}
for (int i = 0; i < inputs.length; i++) graph.setInput(i, inputs[i]);
for (int i = 0; i < labels.length; i++) graph.setLabel(i, labels[i]);
graph.computeGradientAndScore();
Pair<Gradient, Double> gradAndScore = graph.gradientAndScore();
ComputationGraphUpdater updater = new ComputationGraphUpdater(graph);
updater.update(graph, gradAndScore.getFirst(), 0, graph.batchSize());
//need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done)
INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup();
//need dup: params are a *view* of full parameters
INDArray originalParams = graph.params().dup();
int nParams = originalParams.length();
Map<String, INDArray> paramTable = graph.paramTable();
List<String> paramNames = new ArrayList<>(paramTable.keySet());
int[] paramEnds = new int[paramNames.size()];
paramEnds[0] = paramTable.get(paramNames.get(0)).length();
for (int i = 1; i < paramEnds.length; i++) {
paramEnds[i] = paramEnds[i - 1] + paramTable.get(paramNames.get(i)).length();
}
int currParamNameIdx = 0;
int totalNFailures = 0;
double maxError = 0.0;
MultiDataSet mds = new MultiDataSet(inputs, labels);
//Assumption here: params is a view that we can modify in-place
INDArray params = graph.params();
for (int i = 0; i < nParams; i++) {
//Get param name
if (i >= paramEnds[currParamNameIdx]) {
currParamNameIdx++;
}
String paramName = paramNames.get(currParamNameIdx);
//(w+epsilon): Do forward pass and score
double origValue = params.getDouble(i);
params.putScalar(i, origValue + epsilon);
//training == true for batch norm, etc (scores and gradients need to be calculated on same thing)
double scorePlus = graph.score(mds, true);
//(w-epsilon): Do forward pass and score
params.putScalar(i, origValue - epsilon);
double scoreMinus = graph.score(mds, true);
//Reset original param value
params.putScalar(i, origValue);
//Calculate numerical parameter gradient:
double scoreDelta = scorePlus - scoreMinus;
double numericalGradient = scoreDelta / (2 * epsilon);
if (Double.isNaN(numericalGradient))
throw new IllegalStateException("Numerical gradient was NaN for parameter " + i + " of " + nParams);
double backpropGradient = gradientToCheck.getDouble(i);
//http://cs231n.github.io/neural-networks-3/#gradcheck
//use mean centered
double relError = Math.abs(backpropGradient - numericalGradient) / (Math.abs(numericalGradient) + Math.abs(backpropGradient));
if (backpropGradient == 0.0 && numericalGradient == 0.0)
//Edge case: i.e., RNNs with time series length of 1.0
relError = 0.0;
if (relError > maxError)
maxError = relError;
if (relError > maxRelError || Double.isNaN(relError)) {
double absError = Math.abs(backpropGradient - numericalGradient);
if (absError < minAbsoluteError) {
log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + "; absolute error = " + absError + " < minAbsoluteError = " + minAbsoluteError);
} else {
if (print)
log.info("Param " + i + " (" + paramName + ") FAILED: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + ", scorePlus=" + scorePlus + ", scoreMinus= " + scoreMinus);
if (exitOnFirstError)
return false;
totalNFailures++;
}
} else if (print) {
log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError);
}
}
if (print) {
int nPass = nParams - totalNFailures;
log.info("GradientCheckUtil.checkGradients(): " + nParams + " params checked, " + nPass + " passed, " + totalNFailures + " failed. Largest relative error = " + maxError);
}
return totalNFailures == 0;
}
use of org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater in project deeplearning4j by deeplearning4j.
the class ParallelWrapper method averageUpdatersState.
private void averageUpdatersState(AtomicInteger locker, double score) {
if (averageUpdaters) {
ComputationGraphUpdater updater = ((ComputationGraph) model).getUpdater();
int batchSize = 0;
if (updater != null && updater.getStateViewArray() != null) {
if (!legacyAveraging || Nd4j.getAffinityManager().getNumberOfDevices() == 1) {
List<INDArray> updaters = new ArrayList<>();
for (int cnt = 0; cnt < workers && cnt < locker.get(); cnt++) {
ComputationGraph workerModel = (ComputationGraph) zoo[cnt].getModel();
updaters.add(workerModel.getUpdater().getStateViewArray());
batchSize += workerModel.batchSize();
}
Nd4j.averageAndPropagate(updater.getStateViewArray(), updaters);
} else {
INDArray state = Nd4j.zeros(updater.getStateViewArray().shape());
int cnt = 0;
for (; cnt < workers && cnt < locker.get(); cnt++) {
ComputationGraph workerModel = (ComputationGraph) zoo[cnt].getModel();
state.addi(workerModel.getUpdater().getStateViewArray());
batchSize += workerModel.batchSize();
}
state.divi(cnt);
updater.setStateViewArray(state);
}
}
}
((ComputationGraph) model).setScore(score);
}
Aggregations