Search in sources :

Example 11 with SparkTrainingStats

use of org.deeplearning4j.spark.api.stats.SparkTrainingStats in project deeplearning4j by deeplearning4j.

the class TestSparkMultiLayerParameterAveraging method testFitViaStringPathsSize1.

@Test
public void testFitViaStringPathsSize1() throws Exception {
    Path tempDir = Files.createTempDirectory("DL4J-testFitViaStringPathsSize1");
    File tempDirF = tempDir.toFile();
    tempDirF.deleteOnExit();
    int dataSetObjSize = 1;
    int batchSizePerExecutor = 25;
    int numSplits = 10;
    int averagingFrequency = 3;
    int totalExamples = numExecutors() * batchSizePerExecutor * numSplits * averagingFrequency;
    DataSetIterator iter = new MnistDataSetIterator(dataSetObjSize, totalExamples, false);
    int i = 0;
    while (iter.hasNext()) {
        File nextFile = new File(tempDirF, i + ".bin");
        DataSet ds = iter.next();
        ds.save(nextFile);
        i++;
    }
    System.out.println("Saved to: " + tempDirF.getAbsolutePath());
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list().layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50).activation(Activation.TANH).build()).layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10).activation(Activation.SOFTMAX).build()).pretrain(false).backprop(true).build();
    SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, conf, new ParameterAveragingTrainingMaster.Builder(numExecutors(), dataSetObjSize).workerPrefetchNumBatches(5).batchSizePerWorker(batchSizePerExecutor).averagingFrequency(averagingFrequency).repartionData(Repartition.Always).build());
    sparkNet.setCollectTrainingStats(true);
    //List files:
    Configuration config = new Configuration();
    FileSystem hdfs = FileSystem.get(tempDir.toUri(), config);
    RemoteIterator<LocatedFileStatus> fileIter = hdfs.listFiles(new org.apache.hadoop.fs.Path(tempDir.toString()), false);
    List<String> paths = new ArrayList<>();
    while (fileIter.hasNext()) {
        String path = fileIter.next().getPath().toString();
        paths.add(path);
    }
    INDArray paramsBefore = sparkNet.getNetwork().params().dup();
    JavaRDD<String> pathRdd = sc.parallelize(paths);
    sparkNet.fitPaths(pathRdd);
    INDArray paramsAfter = sparkNet.getNetwork().params().dup();
    assertNotEquals(paramsBefore, paramsAfter);
    Thread.sleep(2000);
    SparkTrainingStats stats = sparkNet.getSparkTrainingStats();
    //Expect
    System.out.println(stats.statsAsString());
    assertEquals(numSplits, stats.getValue("ParameterAveragingMasterRepartitionTimesMs").size());
    List<EventStats> list = stats.getValue("ParameterAveragingWorkerFitTimesMs");
    assertEquals(numSplits * numExecutors() * averagingFrequency, list.size());
    for (EventStats es : list) {
        ExampleCountEventStats e = (ExampleCountEventStats) es;
        assertTrue(batchSizePerExecutor * averagingFrequency - 10 >= e.getTotalExampleCount());
    }
    sparkNet.getTrainingMaster().deleteTempFiles(sc);
}
Also used : ExampleCountEventStats(org.deeplearning4j.spark.stats.ExampleCountEventStats) Configuration(org.apache.hadoop.conf.Configuration) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet) DataSet(org.nd4j.linalg.dataset.DataSet) SparkTrainingStats(org.deeplearning4j.spark.api.stats.SparkTrainingStats) ExampleCountEventStats(org.deeplearning4j.spark.stats.ExampleCountEventStats) EventStats(org.deeplearning4j.spark.stats.EventStats) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) SparkDl4jMultiLayer(org.deeplearning4j.spark.impl.multilayer.SparkDl4jMultiLayer) Path(java.nio.file.Path) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) LabeledPoint(org.apache.spark.mllib.regression.LabeledPoint) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) File(java.io.File) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) BaseSparkTest(org.deeplearning4j.spark.BaseSparkTest) Test(org.junit.Test)

Example 12 with SparkTrainingStats

use of org.deeplearning4j.spark.api.stats.SparkTrainingStats in project deeplearning4j by deeplearning4j.

the class TestSparkMultiLayerParameterAveraging method testParameterAveragingMultipleExamplesPerDataSet.

@Test
public void testParameterAveragingMultipleExamplesPerDataSet() throws Exception {
    int dataSetObjSize = 5;
    int batchSizePerExecutor = 25;
    List<DataSet> list = new ArrayList<>();
    DataSetIterator iter = new MnistDataSetIterator(dataSetObjSize, 1000, false);
    while (iter.hasNext()) {
        list.add(iter.next());
    }
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list().layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50).activation(Activation.TANH).build()).layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10).activation(Activation.SOFTMAX).build()).pretrain(false).backprop(true).build();
    SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, conf, new ParameterAveragingTrainingMaster.Builder(numExecutors(), dataSetObjSize).batchSizePerWorker(batchSizePerExecutor).averagingFrequency(1).repartionData(Repartition.Always).build());
    sparkNet.setCollectTrainingStats(true);
    JavaRDD<DataSet> rdd = sc.parallelize(list);
    sparkNet.fit(rdd);
    SparkTrainingStats stats = sparkNet.getSparkTrainingStats();
    List<EventStats> mapPartitionStats = stats.getValue("ParameterAveragingMasterMapPartitionsTimesMs");
    //For an averaging frequency of 1
    int numSplits = list.size() * dataSetObjSize / (numExecutors() * batchSizePerExecutor);
    assertEquals(numSplits, mapPartitionStats.size());
    List<EventStats> workerFitStats = stats.getValue("ParameterAveragingWorkerFitTimesMs");
    for (EventStats e : workerFitStats) {
        ExampleCountEventStats eces = (ExampleCountEventStats) e;
        System.out.println(eces.getTotalExampleCount());
    }
    for (EventStats e : workerFitStats) {
        ExampleCountEventStats eces = (ExampleCountEventStats) e;
        assertEquals(batchSizePerExecutor, eces.getTotalExampleCount());
    }
}
Also used : ExampleCountEventStats(org.deeplearning4j.spark.stats.ExampleCountEventStats) MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet) DataSet(org.nd4j.linalg.dataset.DataSet) SparkTrainingStats(org.deeplearning4j.spark.api.stats.SparkTrainingStats) ExampleCountEventStats(org.deeplearning4j.spark.stats.ExampleCountEventStats) EventStats(org.deeplearning4j.spark.stats.EventStats) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) SparkDl4jMultiLayer(org.deeplearning4j.spark.impl.multilayer.SparkDl4jMultiLayer) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) LabeledPoint(org.apache.spark.mllib.regression.LabeledPoint) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) BaseSparkTest(org.deeplearning4j.spark.BaseSparkTest) Test(org.junit.Test)

Example 13 with SparkTrainingStats

use of org.deeplearning4j.spark.api.stats.SparkTrainingStats in project deeplearning4j by deeplearning4j.

the class TestSparkMultiLayerParameterAveraging method testFitViaStringPathsCompGraph.

@Test
public void testFitViaStringPathsCompGraph() throws Exception {
    Path tempDir = Files.createTempDirectory("DL4J-testFitViaStringPathsCG");
    Path tempDir2 = Files.createTempDirectory("DL4J-testFitViaStringPathsCG-MDS");
    File tempDirF = tempDir.toFile();
    File tempDirF2 = tempDir2.toFile();
    tempDirF.deleteOnExit();
    tempDirF2.deleteOnExit();
    int dataSetObjSize = 5;
    int batchSizePerExecutor = 25;
    DataSetIterator iter = new MnistDataSetIterator(dataSetObjSize, 1000, false);
    int i = 0;
    while (iter.hasNext()) {
        File nextFile = new File(tempDirF, i + ".bin");
        File nextFile2 = new File(tempDirF2, i + ".bin");
        DataSet ds = iter.next();
        MultiDataSet mds = new MultiDataSet(ds.getFeatures(), ds.getLabels());
        ds.save(nextFile);
        mds.save(nextFile2);
        i++;
    }
    System.out.println("Saved to: " + tempDirF.getAbsolutePath());
    System.out.println("Saved to: " + tempDirF2.getAbsolutePath());
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50).activation(Activation.TANH).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10).activation(Activation.SOFTMAX).build(), "0").setOutputs("1").pretrain(false).backprop(true).build();
    SparkComputationGraph sparkNet = new SparkComputationGraph(sc, conf, new ParameterAveragingTrainingMaster.Builder(numExecutors(), dataSetObjSize).workerPrefetchNumBatches(5).workerPrefetchNumBatches(0).batchSizePerWorker(batchSizePerExecutor).averagingFrequency(1).repartionData(Repartition.Always).build());
    sparkNet.setCollectTrainingStats(true);
    //List files:
    Configuration config = new Configuration();
    FileSystem hdfs = FileSystem.get(tempDir.toUri(), config);
    RemoteIterator<LocatedFileStatus> fileIter = hdfs.listFiles(new org.apache.hadoop.fs.Path(tempDir.toString()), false);
    List<String> paths = new ArrayList<>();
    while (fileIter.hasNext()) {
        String path = fileIter.next().getPath().toString();
        paths.add(path);
    }
    INDArray paramsBefore = sparkNet.getNetwork().params().dup();
    JavaRDD<String> pathRdd = sc.parallelize(paths);
    sparkNet.fitPaths(pathRdd);
    INDArray paramsAfter = sparkNet.getNetwork().params().dup();
    assertNotEquals(paramsBefore, paramsAfter);
    SparkTrainingStats stats = sparkNet.getSparkTrainingStats();
    System.out.println(stats.statsAsString());
    //Same thing, buf for MultiDataSet objects:
    config = new Configuration();
    hdfs = FileSystem.get(tempDir2.toUri(), config);
    fileIter = hdfs.listFiles(new org.apache.hadoop.fs.Path(tempDir2.toString()), false);
    paths = new ArrayList<>();
    while (fileIter.hasNext()) {
        String path = fileIter.next().getPath().toString();
        paths.add(path);
    }
    paramsBefore = sparkNet.getNetwork().params().dup();
    pathRdd = sc.parallelize(paths);
    sparkNet.fitPathsMultiDataSet(pathRdd);
    paramsAfter = sparkNet.getNetwork().params().dup();
    assertNotEquals(paramsBefore, paramsAfter);
    stats = sparkNet.getSparkTrainingStats();
    System.out.println(stats.statsAsString());
}
Also used : OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) SparkComputationGraph(org.deeplearning4j.spark.impl.graph.SparkComputationGraph) Configuration(org.apache.hadoop.conf.Configuration) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet) DataSet(org.nd4j.linalg.dataset.DataSet) SparkTrainingStats(org.deeplearning4j.spark.api.stats.SparkTrainingStats) FileSystem(org.apache.hadoop.fs.FileSystem) Path(java.nio.file.Path) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) LabeledPoint(org.apache.spark.mllib.regression.LabeledPoint) MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) File(java.io.File) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) BaseSparkTest(org.deeplearning4j.spark.BaseSparkTest) Test(org.junit.Test)

Example 14 with SparkTrainingStats

use of org.deeplearning4j.spark.api.stats.SparkTrainingStats in project deeplearning4j by deeplearning4j.

the class TestSparkMultiLayerParameterAveraging method testFitViaStringPaths.

@Test
public void testFitViaStringPaths() throws Exception {
    Path tempDir = Files.createTempDirectory("DL4J-testFitViaStringPaths");
    File tempDirF = tempDir.toFile();
    tempDirF.deleteOnExit();
    int dataSetObjSize = 5;
    int batchSizePerExecutor = 25;
    DataSetIterator iter = new MnistDataSetIterator(dataSetObjSize, 1000, false);
    int i = 0;
    while (iter.hasNext()) {
        File nextFile = new File(tempDirF, i + ".bin");
        DataSet ds = iter.next();
        ds.save(nextFile);
        i++;
    }
    System.out.println("Saved to: " + tempDirF.getAbsolutePath());
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list().layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50).activation(Activation.TANH).build()).layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10).activation(Activation.SOFTMAX).build()).pretrain(false).backprop(true).build();
    SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, conf, new ParameterAveragingTrainingMaster.Builder(numExecutors(), dataSetObjSize).workerPrefetchNumBatches(5).batchSizePerWorker(batchSizePerExecutor).averagingFrequency(1).repartionData(Repartition.Always).build());
    sparkNet.setCollectTrainingStats(true);
    //List files:
    Configuration config = new Configuration();
    FileSystem hdfs = FileSystem.get(tempDir.toUri(), config);
    RemoteIterator<LocatedFileStatus> fileIter = hdfs.listFiles(new org.apache.hadoop.fs.Path(tempDir.toString()), false);
    List<String> paths = new ArrayList<>();
    while (fileIter.hasNext()) {
        String path = fileIter.next().getPath().toString();
        paths.add(path);
    }
    INDArray paramsBefore = sparkNet.getNetwork().params().dup();
    JavaRDD<String> pathRdd = sc.parallelize(paths);
    sparkNet.fitPaths(pathRdd);
    INDArray paramsAfter = sparkNet.getNetwork().params().dup();
    assertNotEquals(paramsBefore, paramsAfter);
    SparkTrainingStats stats = sparkNet.getSparkTrainingStats();
    System.out.println(stats.statsAsString());
    sparkNet.getTrainingMaster().deleteTempFiles(sc);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) MultiDataSet(org.nd4j.linalg.dataset.MultiDataSet) DataSet(org.nd4j.linalg.dataset.DataSet) SparkTrainingStats(org.deeplearning4j.spark.api.stats.SparkTrainingStats) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) SparkDl4jMultiLayer(org.deeplearning4j.spark.impl.multilayer.SparkDl4jMultiLayer) Path(java.nio.file.Path) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) LabeledPoint(org.apache.spark.mllib.regression.LabeledPoint) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) File(java.io.File) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) BaseSparkTest(org.deeplearning4j.spark.BaseSparkTest) Test(org.junit.Test)

Example 15 with SparkTrainingStats

use of org.deeplearning4j.spark.api.stats.SparkTrainingStats in project deeplearning4j by deeplearning4j.

the class ParameterAveragingTrainingWorker method getFinalResultWithStats.

@Override
public Pair<ParameterAveragingTrainingResult, SparkTrainingStats> getFinalResultWithStats(MultiLayerNetwork network) {
    ParameterAveragingTrainingResult result = getFinalResult(network);
    if (result == null)
        return null;
    SparkTrainingStats statsToReturn = (stats != null ? stats.build() : null);
    return new Pair<>(result, statsToReturn);
}
Also used : SparkTrainingStats(org.deeplearning4j.spark.api.stats.SparkTrainingStats) Pair(org.deeplearning4j.berkeley.Pair)

Aggregations

SparkTrainingStats (org.deeplearning4j.spark.api.stats.SparkTrainingStats)17 Test (org.junit.Test)8 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)8 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)7 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)7 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)7 BaseSparkTest (org.deeplearning4j.spark.BaseSparkTest)7 INDArray (org.nd4j.linalg.api.ndarray.INDArray)7 DataSet (org.nd4j.linalg.dataset.DataSet)7 File (java.io.File)6 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)6 SparkDl4jMultiLayer (org.deeplearning4j.spark.impl.multilayer.SparkDl4jMultiLayer)5 GridExecutioner (org.nd4j.linalg.api.ops.executioner.GridExecutioner)5 MultiDataSet (org.nd4j.linalg.dataset.MultiDataSet)5 LabeledPoint (org.apache.spark.mllib.regression.LabeledPoint)4 Pair (org.deeplearning4j.berkeley.Pair)4 MnistDataSetIterator (org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator)4 SparkComputationGraph (org.deeplearning4j.spark.impl.graph.SparkComputationGraph)4 ParameterAveragingTrainingMaster (org.deeplearning4j.spark.impl.paramavg.ParameterAveragingTrainingMaster)4 Path (java.nio.file.Path)3