Search in sources :

Example 11 with NeuralNetConfiguration

use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.

the class OutputLayerTest method testSetParams.

@Test
public void testSetParams() {
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(100).learningRate(1e-1).layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.ZERO).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    OutputLayer l = (OutputLayer) conf.getLayer().instantiate(conf, Collections.<IterationListener>singletonList(new ScoreIterationListener(1)), 0, params, true);
    params = l.params();
    l.setParams(params);
    assertEquals(params, l.params());
}
Also used : RnnOutputLayer(org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) IterationListener(org.deeplearning4j.optimize.api.IterationListener) ScoreIterationListener(org.deeplearning4j.optimize.listeners.ScoreIterationListener) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) ScoreIterationListener(org.deeplearning4j.optimize.listeners.ScoreIterationListener) Test(org.junit.Test)

Example 12 with NeuralNetConfiguration

use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.

the class TestRenders method renderHistogram.

@Test
public void renderHistogram() throws Exception {
    MnistDataFetcher fetcher = new MnistDataFetcher(true);
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().momentum(0.9f).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(100).learningRate(1e-1f).layer(new org.deeplearning4j.nn.conf.layers.AutoEncoder.Builder().nIn(784).nOut(600).corruptionLevel(0.6).weightInit(WeightInit.XAVIER).lossFunction(LossFunctions.LossFunction.RMSE_XENT).build()).build();
    fetcher.fetch(100);
    DataSet d2 = fetcher.next();
    INDArray input = d2.getFeatureMatrix();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    AutoEncoder da = (AutoEncoder) conf.getLayer().instantiate(conf, null, 0, params, true);
    da.setListeners(new ScoreIterationListener(1), new HistogramIterationListener(5));
    da.setParams(da.params());
    da.fit(input);
}
Also used : MnistDataFetcher(org.deeplearning4j.datasets.fetchers.MnistDataFetcher) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) HistogramIterationListener(org.deeplearning4j.ui.weights.HistogramIterationListener) AutoEncoder(org.deeplearning4j.nn.layers.feedforward.autoencoder.AutoEncoder) ScoreIterationListener(org.deeplearning4j.optimize.listeners.ScoreIterationListener) Test(org.junit.Test)

Example 13 with NeuralNetConfiguration

use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.

the class TestPreProcessors method testRnnToFeedForwardPreProcessor.

@Test
public void testRnnToFeedForwardPreProcessor() {
    int[] miniBatchSizes = { 5, 1, 5, 1 };
    int[] timeSeriesLengths = { 9, 9, 1, 1 };
    for (int x = 0; x < miniBatchSizes.length; x++) {
        int miniBatchSize = miniBatchSizes[x];
        int layerSize = 7;
        int timeSeriesLength = timeSeriesLengths[x];
        RnnToFeedForwardPreProcessor proc = new RnnToFeedForwardPreProcessor();
        NeuralNetConfiguration nnc = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(layerSize).nOut(layerSize).build()).build();
        int numParams = nnc.getLayer().initializer().numParams(nnc);
        INDArray params = Nd4j.create(1, numParams);
        DenseLayer layer = (DenseLayer) nnc.getLayer().instantiate(nnc, null, 0, params, true);
        layer.setInputMiniBatchSize(miniBatchSize);
        INDArray activations3dc = Nd4j.create(new int[] { miniBatchSize, layerSize, timeSeriesLength }, 'c');
        INDArray activations3df = Nd4j.create(new int[] { miniBatchSize, layerSize, timeSeriesLength }, 'f');
        for (int i = 0; i < miniBatchSize; i++) {
            for (int j = 0; j < layerSize; j++) {
                for (int k = 0; k < timeSeriesLength; k++) {
                    //value abc -> example=a, neuronNumber=b, time=c
                    double value = 100 * i + 10 * j + k;
                    activations3dc.putScalar(new int[] { i, j, k }, value);
                    activations3df.putScalar(new int[] { i, j, k }, value);
                }
            }
        }
        assertEquals(activations3dc, activations3df);
        INDArray activations2dc = proc.preProcess(activations3dc, miniBatchSize);
        INDArray activations2df = proc.preProcess(activations3df, miniBatchSize);
        assertArrayEquals(activations2dc.shape(), new int[] { miniBatchSize * timeSeriesLength, layerSize });
        assertArrayEquals(activations2df.shape(), new int[] { miniBatchSize * timeSeriesLength, layerSize });
        assertEquals(activations2dc, activations2df);
        //Expect each row in activations2d to have order:
        //(example=0,t=0), (example=0,t=1), (example=0,t=2), ..., (example=1,t=0), (example=1,t=1), ...
        int nRows = activations2dc.rows();
        for (int i = 0; i < nRows; i++) {
            INDArray rowc = activations2dc.getRow(i);
            INDArray rowf = activations2df.getRow(i);
            assertArrayEquals(rowc.shape(), new int[] { 1, layerSize });
            assertEquals(rowc, rowf);
            //c order reshaping
            //                int origExampleNum = i / timeSeriesLength;
            //                int time = i % timeSeriesLength;
            //f order reshaping
            int time = i / miniBatchSize;
            int origExampleNum = i % miniBatchSize;
            INDArray expectedRow = activations3dc.tensorAlongDimension(time, 1, 0).getRow(origExampleNum);
            assertEquals(expectedRow, rowc);
            assertEquals(expectedRow, rowf);
        }
        //Given that epsilons and activations have same shape, we can do this (even though it's not the intended use)
        //Basically backprop should be exact opposite of preProcess
        INDArray outc = proc.backprop(activations2dc, miniBatchSize);
        INDArray outf = proc.backprop(activations2df, miniBatchSize);
        assertEquals(activations3dc, outc);
        assertEquals(activations3df, outf);
        //Also check case when epsilons are different orders:
        INDArray eps2d_c = Nd4j.create(activations2dc.shape(), 'c');
        INDArray eps2d_f = Nd4j.create(activations2dc.shape(), 'f');
        eps2d_c.assign(activations2dc);
        eps2d_f.assign(activations2df);
        INDArray eps3d_c = proc.backprop(eps2d_c, miniBatchSize);
        INDArray eps3d_f = proc.backprop(eps2d_f, miniBatchSize);
        assertEquals(activations3dc, eps3d_c);
        assertEquals(activations3df, eps3d_f);
    }
}
Also used : DenseLayer(org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Test(org.junit.Test)

Example 14 with NeuralNetConfiguration

use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.

the class TestPreProcessors method testFeedForwardToRnnPreProcessor.

@Test
public void testFeedForwardToRnnPreProcessor() {
    Nd4j.getRandom().setSeed(12345L);
    int[] miniBatchSizes = { 5, 1, 5, 1 };
    int[] timeSeriesLengths = { 9, 9, 1, 1 };
    for (int x = 0; x < miniBatchSizes.length; x++) {
        int miniBatchSize = miniBatchSizes[x];
        int layerSize = 7;
        int timeSeriesLength = timeSeriesLengths[x];
        String msg = "minibatch=" + miniBatchSize;
        FeedForwardToRnnPreProcessor proc = new FeedForwardToRnnPreProcessor();
        NeuralNetConfiguration nnc = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(layerSize).nOut(layerSize).build()).build();
        int numParams = nnc.getLayer().initializer().numParams(nnc);
        INDArray params = Nd4j.create(1, numParams);
        DenseLayer layer = (DenseLayer) nnc.getLayer().instantiate(nnc, null, 0, params, true);
        layer.setInputMiniBatchSize(miniBatchSize);
        INDArray rand = Nd4j.rand(miniBatchSize * timeSeriesLength, layerSize);
        INDArray activations2dc = Nd4j.create(new int[] { miniBatchSize * timeSeriesLength, layerSize }, 'c');
        INDArray activations2df = Nd4j.create(new int[] { miniBatchSize * timeSeriesLength, layerSize }, 'f');
        activations2dc.assign(rand);
        activations2df.assign(rand);
        assertEquals(activations2dc, activations2df);
        INDArray activations3dc = proc.preProcess(activations2dc, miniBatchSize);
        INDArray activations3df = proc.preProcess(activations2df, miniBatchSize);
        assertArrayEquals(new int[] { miniBatchSize, layerSize, timeSeriesLength }, activations3dc.shape());
        assertArrayEquals(new int[] { miniBatchSize, layerSize, timeSeriesLength }, activations3df.shape());
        assertEquals(activations3dc, activations3df);
        int nRows2D = miniBatchSize * timeSeriesLength;
        for (int i = 0; i < nRows2D; i++) {
            //c order reshaping:
            //                int time = i % timeSeriesLength;
            //                int example = i / timeSeriesLength;
            //f order reshaping
            int time = i / miniBatchSize;
            int example = i % miniBatchSize;
            INDArray row2d = activations2dc.getRow(i);
            INDArray row3dc = activations3dc.tensorAlongDimension(time, 0, 1).getRow(example);
            INDArray row3df = activations3df.tensorAlongDimension(time, 0, 1).getRow(example);
            assertEquals(row2d, row3dc);
            assertEquals(row2d, row3df);
        }
        //Again epsilons and activations have same shape, we can do this (even though it's not the intended use)
        INDArray epsilon2d1 = proc.backprop(activations3dc, miniBatchSize);
        INDArray epsilon2d2 = proc.backprop(activations3df, miniBatchSize);
        assertEquals(msg, activations2dc, epsilon2d1);
        assertEquals(msg, activations2dc, epsilon2d2);
        //Also check backprop with 3d activations in f order vs. c order:
        INDArray act3d_c = Nd4j.create(activations3dc.shape(), 'c');
        act3d_c.assign(activations3dc);
        INDArray act3d_f = Nd4j.create(activations3dc.shape(), 'f');
        act3d_f.assign(activations3dc);
        assertEquals(msg, activations2dc, proc.backprop(act3d_c, miniBatchSize));
        assertEquals(msg, activations2dc, proc.backprop(act3d_f, miniBatchSize));
    }
}
Also used : DenseLayer(org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Test(org.junit.Test)

Example 15 with NeuralNetConfiguration

use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.

the class GradientCheckUtil method checkGradients.

/**
     * Check backprop gradients for a MultiLayerNetwork.
     * @param mln MultiLayerNetwork to test. This must be initialized.
     * @param epsilon Usually on the order/ of 1e-4 or so.
     * @param maxRelError Maximum relative error. Usually < 1e-5 or so, though maybe more for deep networks or those with nonlinear activation
     * @param minAbsoluteError Minimum absolute error to cause a failure. Numerical gradients can be non-zero due to precision issues.
     *                         For example, 0.0 vs. 1e-18: relative error is 1.0, but not really a failure
     * @param print Whether to print full pass/failure details for each parameter gradient
     * @param exitOnFirstError If true: return upon first failure. If false: continue checking even if
     *  one parameter gradient has failed. Typically use false for debugging, true for unit tests.
     * @param input Input array to use for forward pass. May be mini-batch data.
     * @param labels Labels/targets to use to calculate backprop gradient. May be mini-batch data.
     * @return true if gradients are passed, false otherwise.
     */
public static boolean checkGradients(MultiLayerNetwork mln, double epsilon, double maxRelError, double minAbsoluteError, boolean print, boolean exitOnFirstError, INDArray input, INDArray labels) {
    //Basic sanity checks on input:
    if (epsilon <= 0.0 || epsilon > 0.1)
        throw new IllegalArgumentException("Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so");
    if (maxRelError <= 0.0 || maxRelError > 0.25)
        throw new IllegalArgumentException("Invalid maxRelativeError: " + maxRelError);
    if (!(mln.getOutputLayer() instanceof IOutputLayer))
        throw new IllegalArgumentException("Cannot check backprop gradients without OutputLayer");
    //Check network configuration:
    int layerCount = 0;
    for (NeuralNetConfiguration n : mln.getLayerWiseConfigurations().getConfs()) {
        org.deeplearning4j.nn.conf.Updater u = n.getLayer().getUpdater();
        if (u == org.deeplearning4j.nn.conf.Updater.SGD) {
            //Must have LR of 1.0
            double lr = n.getLayer().getLearningRate();
            if (lr != 1.0) {
                throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer " + layerCount + "; got " + u + " with lr=" + lr + " for layer \"" + n.getLayer().getLayerName() + "\"");
            }
        } else if (u != org.deeplearning4j.nn.conf.Updater.NONE) {
            throw new IllegalStateException("Must have Updater.NONE (or SGD + lr=1.0) for layer " + layerCount + "; got " + u);
        }
        double dropout = n.getLayer().getDropOut();
        if (n.isUseRegularization() && dropout != 0.0) {
            throw new IllegalStateException("Must have dropout == 0.0 for gradient checks - got dropout = " + dropout + " for layer " + layerCount);
        }
        IActivation activation = n.getLayer().getActivationFn();
        if (activation != null) {
            if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) {
                log.warn("Layer " + layerCount + " is possibly using an unsuitable activation function: " + activation.getClass() + ". Activation functions for gradient checks must be smooth (like sigmoid, tanh, softmax) and not " + "contain discontinuities like ReLU or LeakyReLU (these may cause spurious failures)");
            }
        }
    }
    mln.setInput(input);
    mln.setLabels(labels);
    mln.computeGradientAndScore();
    Pair<Gradient, Double> gradAndScore = mln.gradientAndScore();
    Updater updater = UpdaterCreator.getUpdater(mln);
    updater.update(mln, gradAndScore.getFirst(), 0, mln.batchSize());
    //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done)
    INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup();
    //need dup: params are a *view* of full parameters
    INDArray originalParams = mln.params().dup();
    int nParams = originalParams.length();
    Map<String, INDArray> paramTable = mln.paramTable();
    List<String> paramNames = new ArrayList<>(paramTable.keySet());
    int[] paramEnds = new int[paramNames.size()];
    paramEnds[0] = paramTable.get(paramNames.get(0)).length();
    for (int i = 1; i < paramEnds.length; i++) {
        paramEnds[i] = paramEnds[i - 1] + paramTable.get(paramNames.get(i)).length();
    }
    int totalNFailures = 0;
    double maxError = 0.0;
    DataSet ds = new DataSet(input, labels);
    int currParamNameIdx = 0;
    //Assumption here: params is a view that we can modify in-place
    INDArray params = mln.params();
    for (int i = 0; i < nParams; i++) {
        //Get param name
        if (i >= paramEnds[currParamNameIdx]) {
            currParamNameIdx++;
        }
        String paramName = paramNames.get(currParamNameIdx);
        //(w+epsilon): Do forward pass and score
        double origValue = params.getDouble(i);
        params.putScalar(i, origValue + epsilon);
        double scorePlus = mln.score(ds, true);
        //(w-epsilon): Do forward pass and score
        params.putScalar(i, origValue - epsilon);
        double scoreMinus = mln.score(ds, true);
        //Reset original param value
        params.putScalar(i, origValue);
        //Calculate numerical parameter gradient:
        double scoreDelta = scorePlus - scoreMinus;
        double numericalGradient = scoreDelta / (2 * epsilon);
        if (Double.isNaN(numericalGradient))
            throw new IllegalStateException("Numerical gradient was NaN for parameter " + i + " of " + nParams);
        double backpropGradient = gradientToCheck.getDouble(i);
        //http://cs231n.github.io/neural-networks-3/#gradcheck
        //use mean centered
        double relError = Math.abs(backpropGradient - numericalGradient) / (Math.abs(numericalGradient) + Math.abs(backpropGradient));
        if (backpropGradient == 0.0 && numericalGradient == 0.0)
            //Edge case: i.e., RNNs with time series length of 1.0
            relError = 0.0;
        if (relError > maxError)
            maxError = relError;
        if (relError > maxRelError || Double.isNaN(relError)) {
            double absError = Math.abs(backpropGradient - numericalGradient);
            if (absError < minAbsoluteError) {
                log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + "; absolute error = " + absError + " < minAbsoluteError = " + minAbsoluteError);
            } else {
                if (print)
                    log.info("Param " + i + " (" + paramName + ") FAILED: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + ", scorePlus=" + scorePlus + ", scoreMinus= " + scoreMinus);
                if (exitOnFirstError)
                    return false;
                totalNFailures++;
            }
        } else if (print) {
            log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError);
        }
    }
    if (print) {
        int nPass = nParams - totalNFailures;
        log.info("GradientCheckUtil.checkGradients(): " + nParams + " params checked, " + nPass + " passed, " + totalNFailures + " failed. Largest relative error = " + maxError);
    }
    return totalNFailures == 0;
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DataSet(org.nd4j.linalg.dataset.DataSet) MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet) ArrayList(java.util.ArrayList) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) IActivation(org.nd4j.linalg.activations.IActivation) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphUpdater(org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater) Updater(org.deeplearning4j.nn.api.Updater) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Aggregations

NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)83 INDArray (org.nd4j.linalg.api.ndarray.INDArray)65 Test (org.junit.Test)55 Layer (org.deeplearning4j.nn.api.Layer)29 Gradient (org.deeplearning4j.nn.gradient.Gradient)26 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)24 Updater (org.deeplearning4j.nn.api.Updater)22 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)21 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)21 DataSet (org.nd4j.linalg.dataset.DataSet)14 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)11 ScoreIterationListener (org.deeplearning4j.optimize.listeners.ScoreIterationListener)9 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)8 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)6 UniformDistribution (org.deeplearning4j.nn.conf.distribution.UniformDistribution)6 RnnOutputLayer (org.deeplearning4j.nn.layers.recurrent.RnnOutputLayer)6 MnistDataFetcher (org.deeplearning4j.datasets.fetchers.MnistDataFetcher)4 Evaluation (org.deeplearning4j.eval.Evaluation)4 Model (org.deeplearning4j.nn.api.Model)4 ConvolutionLayer (org.deeplearning4j.nn.conf.layers.ConvolutionLayer)4