Search in sources :

Example 6 with MultiDataSet

use of org.nd4j.linalg.dataset.MultiDataSet in project deeplearning4j by deeplearning4j.

the class ComputationGraphTestRNN method testTbpttMasking.

@Test
public void testTbpttMasking() {
    //Simple "does it throw an exception" type test...
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1).seed(12345).graphBuilder().addInputs("in").addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nIn(1).nOut(1).build(), "in").setOutputs("out").backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(8).tBPTTBackwardLength(8).build();
    ComputationGraph net = new ComputationGraph(conf);
    net.init();
    MultiDataSet data = new MultiDataSet(new INDArray[] { Nd4j.linspace(1, 10, 10).reshape(1, 1, 10) }, new INDArray[] { Nd4j.linspace(2, 20, 10).reshape(1, 1, 10) }, null, new INDArray[] { Nd4j.ones(10) });
    net.fit(data);
}
Also used : MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) Test(org.junit.Test)

Example 7 with MultiDataSet

use of org.nd4j.linalg.dataset.MultiDataSet in project deeplearning4j by deeplearning4j.

the class GradientCheckUtil method checkGradients.

/**Check backprop gradients for a ComputationGraph
     * @param graph ComputationGraph to test. This must be initialized.
     * @param epsilon Usually on the order of 1e-4 or so.
     * @param maxRelError Maximum relative error. Usually < 0.01, though maybe more for deep networks
     * @param minAbsoluteError Minimum absolute error to cause a failure. Numerical gradients can be non-zero due to precision issues.
     *                         For example, 0.0 vs. 1e-18: relative error is 1.0, but not really a failure
     * @param print Whether to print full pass/failure details for each parameter gradient
     * @param exitOnFirstError If true: return upon first failure. If false: continue checking even if
     *  one parameter gradient has failed. Typically use false for debugging, true for unit tests.
     * @param inputs Input arrays to use for forward pass. May be mini-batch data.
     * @param labels Labels/targets (output) arrays to use to calculate backprop gradient. May be mini-batch data.
     * @return true if gradients are passed, false otherwise.
     */
public static boolean checkGradients(ComputationGraph graph, double epsilon, double maxRelError, double minAbsoluteError, boolean print, boolean exitOnFirstError, INDArray[] inputs, INDArray[] labels) {
    //Basic sanity checks on input:
    if (epsilon <= 0.0 || epsilon > 0.1)
        throw new IllegalArgumentException("Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so");
    if (maxRelError <= 0.0 || maxRelError > 0.25)
        throw new IllegalArgumentException("Invalid maxRelativeError: " + maxRelError);
    if (graph.getNumInputArrays() != inputs.length)
        throw new IllegalArgumentException("Invalid input arrays: expect " + graph.getNumInputArrays() + " inputs");
    if (graph.getNumOutputArrays() != labels.length)
        throw new IllegalArgumentException("Invalid labels arrays: expect " + graph.getNumOutputArrays() + " outputs");
    //Check configuration
    int layerCount = 0;
    for (String vertexName : graph.getConfiguration().getVertices().keySet()) {
        GraphVertex gv = graph.getConfiguration().getVertices().get(vertexName);
        if (!(gv instanceof LayerVertex))
            continue;
        LayerVertex lv = (LayerVertex) gv;
        org.deeplearning4j.nn.conf.Updater u = lv.getLayerConf().getLayer().getUpdater();
        if (u == org.deeplearning4j.nn.conf.Updater.SGD) {
            //Must have LR of 1.0
            double lr = lv.getLayerConf().getLayer().getLearningRate();
            if (lr != 1.0) {
                throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer \"" + vertexName + "\"; got " + u);
            }
        } else if (u != org.deeplearning4j.nn.conf.Updater.NONE) {
            throw new IllegalStateException("Must have Updater.NONE (or SGD + lr=1.0) for layer \"" + vertexName + "\"; got " + u);
        }
        double dropout = lv.getLayerConf().getLayer().getDropOut();
        if (lv.getLayerConf().isUseRegularization() && dropout != 0.0) {
            throw new IllegalStateException("Must have dropout == 0.0 for gradient checks - got dropout = " + dropout + " for layer " + layerCount);
        }
        IActivation activation = lv.getLayerConf().getLayer().getActivationFn();
        if (activation != null) {
            if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) {
                log.warn("Layer \"" + vertexName + "\" is possibly using an unsuitable activation function: " + activation.getClass() + ". Activation functions for gradient checks must be smooth (like sigmoid, tanh, softmax) and not " + "contain discontinuities like ReLU or LeakyReLU (these may cause spurious failures)");
            }
        }
    }
    for (int i = 0; i < inputs.length; i++) graph.setInput(i, inputs[i]);
    for (int i = 0; i < labels.length; i++) graph.setLabel(i, labels[i]);
    graph.computeGradientAndScore();
    Pair<Gradient, Double> gradAndScore = graph.gradientAndScore();
    ComputationGraphUpdater updater = new ComputationGraphUpdater(graph);
    updater.update(graph, gradAndScore.getFirst(), 0, graph.batchSize());
    //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done)
    INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup();
    //need dup: params are a *view* of full parameters
    INDArray originalParams = graph.params().dup();
    int nParams = originalParams.length();
    Map<String, INDArray> paramTable = graph.paramTable();
    List<String> paramNames = new ArrayList<>(paramTable.keySet());
    int[] paramEnds = new int[paramNames.size()];
    paramEnds[0] = paramTable.get(paramNames.get(0)).length();
    for (int i = 1; i < paramEnds.length; i++) {
        paramEnds[i] = paramEnds[i - 1] + paramTable.get(paramNames.get(i)).length();
    }
    int currParamNameIdx = 0;
    int totalNFailures = 0;
    double maxError = 0.0;
    MultiDataSet mds = new MultiDataSet(inputs, labels);
    //Assumption here: params is a view that we can modify in-place
    INDArray params = graph.params();
    for (int i = 0; i < nParams; i++) {
        //Get param name
        if (i >= paramEnds[currParamNameIdx]) {
            currParamNameIdx++;
        }
        String paramName = paramNames.get(currParamNameIdx);
        //(w+epsilon): Do forward pass and score
        double origValue = params.getDouble(i);
        params.putScalar(i, origValue + epsilon);
        //training == true for batch norm, etc (scores and gradients need to be calculated on same thing)
        double scorePlus = graph.score(mds, true);
        //(w-epsilon): Do forward pass and score
        params.putScalar(i, origValue - epsilon);
        double scoreMinus = graph.score(mds, true);
        //Reset original param value
        params.putScalar(i, origValue);
        //Calculate numerical parameter gradient:
        double scoreDelta = scorePlus - scoreMinus;
        double numericalGradient = scoreDelta / (2 * epsilon);
        if (Double.isNaN(numericalGradient))
            throw new IllegalStateException("Numerical gradient was NaN for parameter " + i + " of " + nParams);
        double backpropGradient = gradientToCheck.getDouble(i);
        //http://cs231n.github.io/neural-networks-3/#gradcheck
        //use mean centered
        double relError = Math.abs(backpropGradient - numericalGradient) / (Math.abs(numericalGradient) + Math.abs(backpropGradient));
        if (backpropGradient == 0.0 && numericalGradient == 0.0)
            //Edge case: i.e., RNNs with time series length of 1.0
            relError = 0.0;
        if (relError > maxError)
            maxError = relError;
        if (relError > maxRelError || Double.isNaN(relError)) {
            double absError = Math.abs(backpropGradient - numericalGradient);
            if (absError < minAbsoluteError) {
                log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + "; absolute error = " + absError + " < minAbsoluteError = " + minAbsoluteError);
            } else {
                if (print)
                    log.info("Param " + i + " (" + paramName + ") FAILED: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + ", scorePlus=" + scorePlus + ", scoreMinus= " + scoreMinus);
                if (exitOnFirstError)
                    return false;
                totalNFailures++;
            }
        } else if (print) {
            log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError);
        }
    }
    if (print) {
        int nPass = nParams - totalNFailures;
        log.info("GradientCheckUtil.checkGradients(): " + nParams + " params checked, " + nPass + " passed, " + totalNFailures + " failed. Largest relative error = " + maxError);
    }
    return totalNFailures == 0;
}
Also used : LayerVertex(org.deeplearning4j.nn.conf.graph.LayerVertex) Gradient(org.deeplearning4j.nn.gradient.Gradient) ComputationGraphUpdater(org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater) ArrayList(java.util.ArrayList) IActivation(org.nd4j.linalg.activations.IActivation) GraphVertex(org.deeplearning4j.nn.conf.graph.GraphVertex) INDArray(org.nd4j.linalg.api.ndarray.INDArray) MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet)

Example 8 with MultiDataSet

use of org.nd4j.linalg.dataset.MultiDataSet in project deeplearning4j by deeplearning4j.

the class TransferLearningHelper method featurize.

/**
     * During training frozen vertices/layers can be treated as "featurizing" the input
     * The forward pass through these frozen layer/vertices can be done in advance and the dataset saved to disk to iterate
     * quickly on the smaller unfrozen part of the model
     * Currently does not support datasets with feature masks
     *
     * @param input multidataset to feed into the computation graph with frozen layer vertices
     * @return a multidataset with input features that are the outputs of the frozen layer vertices and the original labels.
     */
public DataSet featurize(DataSet input) {
    if (isGraph) {
        //trying to featurize for a computation graph
        if (origGraph.getNumInputArrays() > 1 || origGraph.getNumOutputArrays() > 1) {
            throw new IllegalArgumentException("Input or output size to a computation graph is greater than one. Requires use of a MultiDataSet.");
        } else {
            if (input.getFeaturesMaskArray() != null) {
                throw new IllegalArgumentException("Currently cannot support featurizing datasets with feature masks");
            }
            MultiDataSet inbW = new MultiDataSet(new INDArray[] { input.getFeatures() }, new INDArray[] { input.getLabels() }, null, new INDArray[] { input.getLabelsMaskArray() });
            MultiDataSet ret = featurize(inbW);
            return new DataSet(ret.getFeatures()[0], input.getLabels(), ret.getLabelsMaskArrays()[0], input.getLabelsMaskArray());
        }
    } else {
        if (input.getFeaturesMaskArray() != null)
            throw new UnsupportedOperationException("Feature masks not supported with featurizing currently");
        return new DataSet(origMLN.feedForwardToLayer(frozenInputLayer + 1, input.getFeatures(), false).get(frozenInputLayer + 1), input.getLabels(), null, input.getLabelsMaskArray());
    }
}
Also used : MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet) DataSet(org.nd4j.linalg.dataset.DataSet) MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet)

Example 9 with MultiDataSet

use of org.nd4j.linalg.dataset.MultiDataSet in project deeplearning4j by deeplearning4j.

the class TestPreProcessedData method testPreprocessedDataCompGraphMultiDataSet.

@Test
public void testPreprocessedDataCompGraphMultiDataSet() throws IOException {
    //Test _loading_ of preprocessed MultiDataSet data
    int dataSetObjSize = 5;
    int batchSizePerExecutor = 10;
    String path = FilenameUtils.concat(System.getProperty("java.io.tmpdir"), "dl4j_testpreprocdata3");
    File f = new File(path);
    if (f.exists())
        f.delete();
    f.mkdir();
    DataSetIterator iter = new IrisDataSetIterator(5, 150);
    int i = 0;
    while (iter.hasNext()) {
        File f2 = new File(FilenameUtils.concat(path, "data" + (i++) + ".bin"));
        DataSet ds = iter.next();
        MultiDataSet mds = new MultiDataSet(ds.getFeatures(), ds.getLabels());
        mds.save(f2);
    }
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3).activation(Activation.TANH).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3).activation(Activation.SOFTMAX).build(), "0").setOutputs("1").pretrain(false).backprop(true).build();
    SparkComputationGraph sparkNet = new SparkComputationGraph(sc, conf, new ParameterAveragingTrainingMaster.Builder(numExecutors(), dataSetObjSize).batchSizePerWorker(batchSizePerExecutor).averagingFrequency(1).repartionData(Repartition.Always).build());
    sparkNet.setCollectTrainingStats(true);
    sparkNet.fitMultiDataSet("file:///" + path.replaceAll("\\\\", "/"));
    SparkTrainingStats sts = sparkNet.getSparkTrainingStats();
    //4 'fits' per averaging (4 executors, 1 averaging freq); 10 examples each -> 40 examples per fit. 150/40 = 3 averagings (round down); 3*4 = 12
    int expNumFits = 12;
    //Unfortunately: perfect partitioning isn't guaranteed by SparkUtils.balancedRandomSplit (esp. if original partitions are all size 1
    // which appears to be occurring at least some of the time), but we should get close to what we expect...
    assertTrue(Math.abs(expNumFits - sts.getValue("ParameterAveragingWorkerFitTimesMs").size()) < 3);
    assertEquals(3, sts.getValue("ParameterAveragingMasterMapPartitionsTimesMs").size());
}
Also used : SparkComputationGraph(org.deeplearning4j.spark.impl.graph.SparkComputationGraph) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet) DataSet(org.nd4j.linalg.dataset.DataSet) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) ParameterAveragingTrainingMaster(org.deeplearning4j.spark.impl.paramavg.ParameterAveragingTrainingMaster) SparkTrainingStats(org.deeplearning4j.spark.api.stats.SparkTrainingStats) MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) File(java.io.File) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) PortableDataStreamDataSetIterator(org.deeplearning4j.spark.iterator.PortableDataStreamDataSetIterator) BaseSparkTest(org.deeplearning4j.spark.BaseSparkTest) Test(org.junit.Test)

Example 10 with MultiDataSet

use of org.nd4j.linalg.dataset.MultiDataSet in project deeplearning4j by deeplearning4j.

the class TestSparkMultiLayerParameterAveraging method testFitViaStringPathsCompGraph.

@Test
public void testFitViaStringPathsCompGraph() throws Exception {
    Path tempDir = Files.createTempDirectory("DL4J-testFitViaStringPathsCG");
    Path tempDir2 = Files.createTempDirectory("DL4J-testFitViaStringPathsCG-MDS");
    File tempDirF = tempDir.toFile();
    File tempDirF2 = tempDir2.toFile();
    tempDirF.deleteOnExit();
    tempDirF2.deleteOnExit();
    int dataSetObjSize = 5;
    int batchSizePerExecutor = 25;
    DataSetIterator iter = new MnistDataSetIterator(dataSetObjSize, 1000, false);
    int i = 0;
    while (iter.hasNext()) {
        File nextFile = new File(tempDirF, i + ".bin");
        File nextFile2 = new File(tempDirF2, i + ".bin");
        DataSet ds = iter.next();
        MultiDataSet mds = new MultiDataSet(ds.getFeatures(), ds.getLabels());
        ds.save(nextFile);
        mds.save(nextFile2);
        i++;
    }
    System.out.println("Saved to: " + tempDirF.getAbsolutePath());
    System.out.println("Saved to: " + tempDirF2.getAbsolutePath());
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50).activation(Activation.TANH).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10).activation(Activation.SOFTMAX).build(), "0").setOutputs("1").pretrain(false).backprop(true).build();
    SparkComputationGraph sparkNet = new SparkComputationGraph(sc, conf, new ParameterAveragingTrainingMaster.Builder(numExecutors(), dataSetObjSize).workerPrefetchNumBatches(5).workerPrefetchNumBatches(0).batchSizePerWorker(batchSizePerExecutor).averagingFrequency(1).repartionData(Repartition.Always).build());
    sparkNet.setCollectTrainingStats(true);
    //List files:
    Configuration config = new Configuration();
    FileSystem hdfs = FileSystem.get(tempDir.toUri(), config);
    RemoteIterator<LocatedFileStatus> fileIter = hdfs.listFiles(new org.apache.hadoop.fs.Path(tempDir.toString()), false);
    List<String> paths = new ArrayList<>();
    while (fileIter.hasNext()) {
        String path = fileIter.next().getPath().toString();
        paths.add(path);
    }
    INDArray paramsBefore = sparkNet.getNetwork().params().dup();
    JavaRDD<String> pathRdd = sc.parallelize(paths);
    sparkNet.fitPaths(pathRdd);
    INDArray paramsAfter = sparkNet.getNetwork().params().dup();
    assertNotEquals(paramsBefore, paramsAfter);
    SparkTrainingStats stats = sparkNet.getSparkTrainingStats();
    System.out.println(stats.statsAsString());
    //Same thing, buf for MultiDataSet objects:
    config = new Configuration();
    hdfs = FileSystem.get(tempDir2.toUri(), config);
    fileIter = hdfs.listFiles(new org.apache.hadoop.fs.Path(tempDir2.toString()), false);
    paths = new ArrayList<>();
    while (fileIter.hasNext()) {
        String path = fileIter.next().getPath().toString();
        paths.add(path);
    }
    paramsBefore = sparkNet.getNetwork().params().dup();
    pathRdd = sc.parallelize(paths);
    sparkNet.fitPathsMultiDataSet(pathRdd);
    paramsAfter = sparkNet.getNetwork().params().dup();
    assertNotEquals(paramsBefore, paramsAfter);
    stats = sparkNet.getSparkTrainingStats();
    System.out.println(stats.statsAsString());
}
Also used : OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) SparkComputationGraph(org.deeplearning4j.spark.impl.graph.SparkComputationGraph) Configuration(org.apache.hadoop.conf.Configuration) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet) DataSet(org.nd4j.linalg.dataset.DataSet) SparkTrainingStats(org.deeplearning4j.spark.api.stats.SparkTrainingStats) FileSystem(org.apache.hadoop.fs.FileSystem) Path(java.nio.file.Path) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) LabeledPoint(org.apache.spark.mllib.regression.LabeledPoint) MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) File(java.io.File) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) BaseSparkTest(org.deeplearning4j.spark.BaseSparkTest) Test(org.junit.Test)

Aggregations

MultiDataSet (org.nd4j.linalg.dataset.MultiDataSet)10 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)8 Test (org.junit.Test)8 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)6 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)5 INDArray (org.nd4j.linalg.api.ndarray.INDArray)5 MergeVertex (org.deeplearning4j.nn.conf.graph.MergeVertex)4 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)4 DataSet (org.nd4j.linalg.dataset.DataSet)4 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)3 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)3 File (java.io.File)2 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)2 BaseSparkTest (org.deeplearning4j.spark.BaseSparkTest)2 SparkTrainingStats (org.deeplearning4j.spark.api.stats.SparkTrainingStats)2 SparkComputationGraph (org.deeplearning4j.spark.impl.graph.SparkComputationGraph)2 Path (java.nio.file.Path)1 ArrayList (java.util.ArrayList)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileSystem (org.apache.hadoop.fs.FileSystem)1