Search in sources :

Example 66 with DataSet

use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.

the class GradientCheckTestsComputationGraph method testBasicIrisWithElementWiseNode.

@Test
public void testBasicIrisWithElementWiseNode() {
    ElementWiseVertex.Op[] ops = new ElementWiseVertex.Op[] { ElementWiseVertex.Op.Add, ElementWiseVertex.Op.Subtract };
    for (ElementWiseVertex.Op op : ops) {
        Nd4j.getRandom().setSeed(12345);
        ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).learningRate(1.0).graphBuilder().addInputs("input").addLayer("l1", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.TANH).build(), "input").addLayer("l2", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.SIGMOID).build(), "input").addVertex("elementwise", new ElementWiseVertex(op), "l1", "l2").addLayer("outputLayer", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(5).nOut(3).build(), "elementwise").setOutputs("outputLayer").pretrain(false).backprop(true).build();
        ComputationGraph graph = new ComputationGraph(conf);
        graph.init();
        int numParams = (4 * 5 + 5) + (4 * 5 + 5) + (5 * 3 + 3);
        assertEquals(numParams, graph.numParams());
        Nd4j.getRandom().setSeed(12345);
        int nParams = graph.numParams();
        INDArray newParams = Nd4j.rand(1, nParams);
        graph.setParams(newParams);
        DataSet ds = new IrisDataSetIterator(150, 150).next();
        INDArray min = ds.getFeatureMatrix().min(0);
        INDArray max = ds.getFeatureMatrix().max(0);
        ds.getFeatureMatrix().subiRowVector(min).diviRowVector(max.sub(min));
        INDArray input = ds.getFeatureMatrix();
        INDArray labels = ds.getLabels();
        if (PRINT_RESULTS) {
            System.out.println("testBasicIrisWithElementWiseVertex(op=" + op + ")");
            for (int j = 0; j < graph.getNumLayers(); j++) System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams());
        }
        boolean gradOK = GradientCheckUtil.checkGradients(graph, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { input }, new INDArray[] { labels });
        String msg = "testBasicIrisWithElementWiseVertex(op=" + op + ")";
        assertTrue(msg, gradOK);
    }
}
Also used : IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSet(org.nd4j.linalg.dataset.DataSet) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) Test(org.junit.Test)

Example 67 with DataSet

use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.

the class NeuralNetConfigurationTest method createData.

public DataSet createData() {
    int numFeatures = 40;
    // have to be at least two or else output layer gradient is a scalar and cause exception
    INDArray input = Nd4j.create(2, numFeatures);
    INDArray labels = Nd4j.create(2, 2);
    INDArray row0 = Nd4j.create(1, numFeatures);
    row0.assign(0.1);
    input.putRow(0, row0);
    // set the 4th column
    labels.put(0, 1, 1);
    INDArray row1 = Nd4j.create(1, numFeatures);
    row1.assign(0.2);
    input.putRow(1, row1);
    // set the 2nd column
    labels.put(1, 0, 1);
    return new DataSet(input, labels);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet)

Example 68 with DataSet

use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.

the class GradientCheckUtil method checkGradients.

/**
     * Check backprop gradients for a MultiLayerNetwork.
     * @param mln MultiLayerNetwork to test. This must be initialized.
     * @param epsilon Usually on the order/ of 1e-4 or so.
     * @param maxRelError Maximum relative error. Usually < 1e-5 or so, though maybe more for deep networks or those with nonlinear activation
     * @param minAbsoluteError Minimum absolute error to cause a failure. Numerical gradients can be non-zero due to precision issues.
     *                         For example, 0.0 vs. 1e-18: relative error is 1.0, but not really a failure
     * @param print Whether to print full pass/failure details for each parameter gradient
     * @param exitOnFirstError If true: return upon first failure. If false: continue checking even if
     *  one parameter gradient has failed. Typically use false for debugging, true for unit tests.
     * @param input Input array to use for forward pass. May be mini-batch data.
     * @param labels Labels/targets to use to calculate backprop gradient. May be mini-batch data.
     * @return true if gradients are passed, false otherwise.
     */
public static boolean checkGradients(MultiLayerNetwork mln, double epsilon, double maxRelError, double minAbsoluteError, boolean print, boolean exitOnFirstError, INDArray input, INDArray labels) {
    //Basic sanity checks on input:
    if (epsilon <= 0.0 || epsilon > 0.1)
        throw new IllegalArgumentException("Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so");
    if (maxRelError <= 0.0 || maxRelError > 0.25)
        throw new IllegalArgumentException("Invalid maxRelativeError: " + maxRelError);
    if (!(mln.getOutputLayer() instanceof IOutputLayer))
        throw new IllegalArgumentException("Cannot check backprop gradients without OutputLayer");
    //Check network configuration:
    int layerCount = 0;
    for (NeuralNetConfiguration n : mln.getLayerWiseConfigurations().getConfs()) {
        org.deeplearning4j.nn.conf.Updater u = n.getLayer().getUpdater();
        if (u == org.deeplearning4j.nn.conf.Updater.SGD) {
            //Must have LR of 1.0
            double lr = n.getLayer().getLearningRate();
            if (lr != 1.0) {
                throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer " + layerCount + "; got " + u + " with lr=" + lr + " for layer \"" + n.getLayer().getLayerName() + "\"");
            }
        } else if (u != org.deeplearning4j.nn.conf.Updater.NONE) {
            throw new IllegalStateException("Must have Updater.NONE (or SGD + lr=1.0) for layer " + layerCount + "; got " + u);
        }
        double dropout = n.getLayer().getDropOut();
        if (n.isUseRegularization() && dropout != 0.0) {
            throw new IllegalStateException("Must have dropout == 0.0 for gradient checks - got dropout = " + dropout + " for layer " + layerCount);
        }
        IActivation activation = n.getLayer().getActivationFn();
        if (activation != null) {
            if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) {
                log.warn("Layer " + layerCount + " is possibly using an unsuitable activation function: " + activation.getClass() + ". Activation functions for gradient checks must be smooth (like sigmoid, tanh, softmax) and not " + "contain discontinuities like ReLU or LeakyReLU (these may cause spurious failures)");
            }
        }
    }
    mln.setInput(input);
    mln.setLabels(labels);
    mln.computeGradientAndScore();
    Pair<Gradient, Double> gradAndScore = mln.gradientAndScore();
    Updater updater = UpdaterCreator.getUpdater(mln);
    updater.update(mln, gradAndScore.getFirst(), 0, mln.batchSize());
    //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done)
    INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup();
    //need dup: params are a *view* of full parameters
    INDArray originalParams = mln.params().dup();
    int nParams = originalParams.length();
    Map<String, INDArray> paramTable = mln.paramTable();
    List<String> paramNames = new ArrayList<>(paramTable.keySet());
    int[] paramEnds = new int[paramNames.size()];
    paramEnds[0] = paramTable.get(paramNames.get(0)).length();
    for (int i = 1; i < paramEnds.length; i++) {
        paramEnds[i] = paramEnds[i - 1] + paramTable.get(paramNames.get(i)).length();
    }
    int totalNFailures = 0;
    double maxError = 0.0;
    DataSet ds = new DataSet(input, labels);
    int currParamNameIdx = 0;
    //Assumption here: params is a view that we can modify in-place
    INDArray params = mln.params();
    for (int i = 0; i < nParams; i++) {
        //Get param name
        if (i >= paramEnds[currParamNameIdx]) {
            currParamNameIdx++;
        }
        String paramName = paramNames.get(currParamNameIdx);
        //(w+epsilon): Do forward pass and score
        double origValue = params.getDouble(i);
        params.putScalar(i, origValue + epsilon);
        double scorePlus = mln.score(ds, true);
        //(w-epsilon): Do forward pass and score
        params.putScalar(i, origValue - epsilon);
        double scoreMinus = mln.score(ds, true);
        //Reset original param value
        params.putScalar(i, origValue);
        //Calculate numerical parameter gradient:
        double scoreDelta = scorePlus - scoreMinus;
        double numericalGradient = scoreDelta / (2 * epsilon);
        if (Double.isNaN(numericalGradient))
            throw new IllegalStateException("Numerical gradient was NaN for parameter " + i + " of " + nParams);
        double backpropGradient = gradientToCheck.getDouble(i);
        //http://cs231n.github.io/neural-networks-3/#gradcheck
        //use mean centered
        double relError = Math.abs(backpropGradient - numericalGradient) / (Math.abs(numericalGradient) + Math.abs(backpropGradient));
        if (backpropGradient == 0.0 && numericalGradient == 0.0)
            //Edge case: i.e., RNNs with time series length of 1.0
            relError = 0.0;
        if (relError > maxError)
            maxError = relError;
        if (relError > maxRelError || Double.isNaN(relError)) {
            double absError = Math.abs(backpropGradient - numericalGradient);
            if (absError < minAbsoluteError) {
                log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + "; absolute error = " + absError + " < minAbsoluteError = " + minAbsoluteError);
            } else {
                if (print)
                    log.info("Param " + i + " (" + paramName + ") FAILED: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + ", scorePlus=" + scorePlus + ", scoreMinus= " + scoreMinus);
                if (exitOnFirstError)
                    return false;
                totalNFailures++;
            }
        } else if (print) {
            log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError);
        }
    }
    if (print) {
        int nPass = nParams - totalNFailures;
        log.info("GradientCheckUtil.checkGradients(): " + nParams + " params checked, " + nPass + " passed, " + totalNFailures + " failed. Largest relative error = " + maxError);
    }
    return totalNFailures == 0;
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DataSet(org.nd4j.linalg.dataset.DataSet) MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet) ArrayList(java.util.ArrayList) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) IActivation(org.nd4j.linalg.activations.IActivation) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphUpdater(org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater) Updater(org.deeplearning4j.nn.api.Updater) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Example 69 with DataSet

use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.

the class IteratorDataSetIterator method next.

@Override
public DataSet next(int num) {
    if (!hasNext())
        throw new NoSuchElementException();
    List<DataSet> list = new ArrayList<>();
    int countSoFar = 0;
    while ((!queued.isEmpty() || iterator.hasNext()) && countSoFar < batchSize) {
        DataSet next;
        if (!queued.isEmpty()) {
            next = queued.removeFirst();
        } else {
            next = iterator.next();
        }
        int nExamples = next.numExamples();
        if (countSoFar + nExamples <= batchSize) {
            //Add the entire DataSet as-is
            list.add(next);
        } else {
            //Otherwise, split it
            DataSet toKeep = (DataSet) next.getRange(0, batchSize - countSoFar);
            DataSet toCache = (DataSet) next.getRange(batchSize - countSoFar, nExamples);
            list.add(toKeep);
            queued.add(toCache);
        }
        countSoFar += nExamples;
    }
    if (inputColumns == -1) {
        //Set columns etc for later use
        DataSet temp = list.get(0);
        inputColumns = temp.getFeatureMatrix().size(1);
        //May be null for layerwise pretraining
        totalOutcomes = temp.getLabels() == null ? 0 : temp.getLabels().size(1);
    }
    DataSet out;
    if (list.size() == 1) {
        out = list.get(0);
    } else {
        out = DataSet.merge(list);
    }
    if (preProcessor != null) {
        if (!out.isPreProcessed()) {
            preProcessor.preProcess(out);
            out.markAsPreProcessed();
        }
    }
    cursor += out.numExamples();
    return out;
}
Also used : DataSet(org.nd4j.linalg.dataset.DataSet)

Example 70 with DataSet

use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.

the class ReconstructionDataSetIterator method next.

/**
     * Returns the next element in the iteration.
     *
     * @return the next element in the iteration
     */
@Override
public DataSet next() {
    DataSet next = iter.next();
    next.setLabels(next.getFeatureMatrix());
    return next;
}
Also used : DataSet(org.nd4j.linalg.dataset.DataSet)

Aggregations

DataSet (org.nd4j.linalg.dataset.DataSet)334 Test (org.junit.Test)226 INDArray (org.nd4j.linalg.api.ndarray.INDArray)194 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)93 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)82 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)79 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)73 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)62 ArrayList (java.util.ArrayList)50 MnistDataSetIterator (org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator)41 ScoreIterationListener (org.deeplearning4j.optimize.listeners.ScoreIterationListener)38 BaseSparkTest (org.deeplearning4j.spark.BaseSparkTest)34 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)32 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)31 MultiDataSet (org.nd4j.linalg.dataset.MultiDataSet)31 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)25 SequenceRecordReader (org.datavec.api.records.reader.SequenceRecordReader)24 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)24 CSVSequenceRecordReader (org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader)23 ClassPathResource (org.nd4j.linalg.io.ClassPathResource)23