use of org.nd4j.linalg.dataset.MultiDataSet in project deeplearning4j by deeplearning4j.
the class ComputationGraphTestRNN method testTbpttMasking.
@Test
public void testTbpttMasking() {
//Simple "does it throw an exception" type test...
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1).seed(12345).graphBuilder().addInputs("in").addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nIn(1).nOut(1).build(), "in").setOutputs("out").backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(8).tBPTTBackwardLength(8).build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
MultiDataSet data = new MultiDataSet(new INDArray[] { Nd4j.linspace(1, 10, 10).reshape(1, 1, 10) }, new INDArray[] { Nd4j.linspace(2, 20, 10).reshape(1, 1, 10) }, null, new INDArray[] { Nd4j.ones(10) });
net.fit(data);
}
use of org.nd4j.linalg.dataset.MultiDataSet in project deeplearning4j by deeplearning4j.
the class GradientCheckUtil method checkGradients.
/**Check backprop gradients for a ComputationGraph
* @param graph ComputationGraph to test. This must be initialized.
* @param epsilon Usually on the order of 1e-4 or so.
* @param maxRelError Maximum relative error. Usually < 0.01, though maybe more for deep networks
* @param minAbsoluteError Minimum absolute error to cause a failure. Numerical gradients can be non-zero due to precision issues.
* For example, 0.0 vs. 1e-18: relative error is 1.0, but not really a failure
* @param print Whether to print full pass/failure details for each parameter gradient
* @param exitOnFirstError If true: return upon first failure. If false: continue checking even if
* one parameter gradient has failed. Typically use false for debugging, true for unit tests.
* @param inputs Input arrays to use for forward pass. May be mini-batch data.
* @param labels Labels/targets (output) arrays to use to calculate backprop gradient. May be mini-batch data.
* @return true if gradients are passed, false otherwise.
*/
public static boolean checkGradients(ComputationGraph graph, double epsilon, double maxRelError, double minAbsoluteError, boolean print, boolean exitOnFirstError, INDArray[] inputs, INDArray[] labels) {
//Basic sanity checks on input:
if (epsilon <= 0.0 || epsilon > 0.1)
throw new IllegalArgumentException("Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so");
if (maxRelError <= 0.0 || maxRelError > 0.25)
throw new IllegalArgumentException("Invalid maxRelativeError: " + maxRelError);
if (graph.getNumInputArrays() != inputs.length)
throw new IllegalArgumentException("Invalid input arrays: expect " + graph.getNumInputArrays() + " inputs");
if (graph.getNumOutputArrays() != labels.length)
throw new IllegalArgumentException("Invalid labels arrays: expect " + graph.getNumOutputArrays() + " outputs");
//Check configuration
int layerCount = 0;
for (String vertexName : graph.getConfiguration().getVertices().keySet()) {
GraphVertex gv = graph.getConfiguration().getVertices().get(vertexName);
if (!(gv instanceof LayerVertex))
continue;
LayerVertex lv = (LayerVertex) gv;
org.deeplearning4j.nn.conf.Updater u = lv.getLayerConf().getLayer().getUpdater();
if (u == org.deeplearning4j.nn.conf.Updater.SGD) {
//Must have LR of 1.0
double lr = lv.getLayerConf().getLayer().getLearningRate();
if (lr != 1.0) {
throw new IllegalStateException("When using SGD updater, must also use lr=1.0 for layer \"" + vertexName + "\"; got " + u);
}
} else if (u != org.deeplearning4j.nn.conf.Updater.NONE) {
throw new IllegalStateException("Must have Updater.NONE (or SGD + lr=1.0) for layer \"" + vertexName + "\"; got " + u);
}
double dropout = lv.getLayerConf().getLayer().getDropOut();
if (lv.getLayerConf().isUseRegularization() && dropout != 0.0) {
throw new IllegalStateException("Must have dropout == 0.0 for gradient checks - got dropout = " + dropout + " for layer " + layerCount);
}
IActivation activation = lv.getLayerConf().getLayer().getActivationFn();
if (activation != null) {
if (!VALID_ACTIVATION_FUNCTIONS.contains(activation.getClass())) {
log.warn("Layer \"" + vertexName + "\" is possibly using an unsuitable activation function: " + activation.getClass() + ". Activation functions for gradient checks must be smooth (like sigmoid, tanh, softmax) and not " + "contain discontinuities like ReLU or LeakyReLU (these may cause spurious failures)");
}
}
}
for (int i = 0; i < inputs.length; i++) graph.setInput(i, inputs[i]);
for (int i = 0; i < labels.length; i++) graph.setLabel(i, labels[i]);
graph.computeGradientAndScore();
Pair<Gradient, Double> gradAndScore = graph.gradientAndScore();
ComputationGraphUpdater updater = new ComputationGraphUpdater(graph);
updater.update(graph, gradAndScore.getFirst(), 0, graph.batchSize());
//need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done)
INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup();
//need dup: params are a *view* of full parameters
INDArray originalParams = graph.params().dup();
int nParams = originalParams.length();
Map<String, INDArray> paramTable = graph.paramTable();
List<String> paramNames = new ArrayList<>(paramTable.keySet());
int[] paramEnds = new int[paramNames.size()];
paramEnds[0] = paramTable.get(paramNames.get(0)).length();
for (int i = 1; i < paramEnds.length; i++) {
paramEnds[i] = paramEnds[i - 1] + paramTable.get(paramNames.get(i)).length();
}
int currParamNameIdx = 0;
int totalNFailures = 0;
double maxError = 0.0;
MultiDataSet mds = new MultiDataSet(inputs, labels);
//Assumption here: params is a view that we can modify in-place
INDArray params = graph.params();
for (int i = 0; i < nParams; i++) {
//Get param name
if (i >= paramEnds[currParamNameIdx]) {
currParamNameIdx++;
}
String paramName = paramNames.get(currParamNameIdx);
//(w+epsilon): Do forward pass and score
double origValue = params.getDouble(i);
params.putScalar(i, origValue + epsilon);
//training == true for batch norm, etc (scores and gradients need to be calculated on same thing)
double scorePlus = graph.score(mds, true);
//(w-epsilon): Do forward pass and score
params.putScalar(i, origValue - epsilon);
double scoreMinus = graph.score(mds, true);
//Reset original param value
params.putScalar(i, origValue);
//Calculate numerical parameter gradient:
double scoreDelta = scorePlus - scoreMinus;
double numericalGradient = scoreDelta / (2 * epsilon);
if (Double.isNaN(numericalGradient))
throw new IllegalStateException("Numerical gradient was NaN for parameter " + i + " of " + nParams);
double backpropGradient = gradientToCheck.getDouble(i);
//http://cs231n.github.io/neural-networks-3/#gradcheck
//use mean centered
double relError = Math.abs(backpropGradient - numericalGradient) / (Math.abs(numericalGradient) + Math.abs(backpropGradient));
if (backpropGradient == 0.0 && numericalGradient == 0.0)
//Edge case: i.e., RNNs with time series length of 1.0
relError = 0.0;
if (relError > maxError)
maxError = relError;
if (relError > maxRelError || Double.isNaN(relError)) {
double absError = Math.abs(backpropGradient - numericalGradient);
if (absError < minAbsoluteError) {
log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + "; absolute error = " + absError + " < minAbsoluteError = " + minAbsoluteError);
} else {
if (print)
log.info("Param " + i + " (" + paramName + ") FAILED: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + ", scorePlus=" + scorePlus + ", scoreMinus= " + scoreMinus);
if (exitOnFirstError)
return false;
totalNFailures++;
}
} else if (print) {
log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError);
}
}
if (print) {
int nPass = nParams - totalNFailures;
log.info("GradientCheckUtil.checkGradients(): " + nParams + " params checked, " + nPass + " passed, " + totalNFailures + " failed. Largest relative error = " + maxError);
}
return totalNFailures == 0;
}
use of org.nd4j.linalg.dataset.MultiDataSet in project deeplearning4j by deeplearning4j.
the class TransferLearningHelper method featurize.
/**
* During training frozen vertices/layers can be treated as "featurizing" the input
* The forward pass through these frozen layer/vertices can be done in advance and the dataset saved to disk to iterate
* quickly on the smaller unfrozen part of the model
* Currently does not support datasets with feature masks
*
* @param input multidataset to feed into the computation graph with frozen layer vertices
* @return a multidataset with input features that are the outputs of the frozen layer vertices and the original labels.
*/
public DataSet featurize(DataSet input) {
if (isGraph) {
//trying to featurize for a computation graph
if (origGraph.getNumInputArrays() > 1 || origGraph.getNumOutputArrays() > 1) {
throw new IllegalArgumentException("Input or output size to a computation graph is greater than one. Requires use of a MultiDataSet.");
} else {
if (input.getFeaturesMaskArray() != null) {
throw new IllegalArgumentException("Currently cannot support featurizing datasets with feature masks");
}
MultiDataSet inbW = new MultiDataSet(new INDArray[] { input.getFeatures() }, new INDArray[] { input.getLabels() }, null, new INDArray[] { input.getLabelsMaskArray() });
MultiDataSet ret = featurize(inbW);
return new DataSet(ret.getFeatures()[0], input.getLabels(), ret.getLabelsMaskArrays()[0], input.getLabelsMaskArray());
}
} else {
if (input.getFeaturesMaskArray() != null)
throw new UnsupportedOperationException("Feature masks not supported with featurizing currently");
return new DataSet(origMLN.feedForwardToLayer(frozenInputLayer + 1, input.getFeatures(), false).get(frozenInputLayer + 1), input.getLabels(), null, input.getLabelsMaskArray());
}
}
use of org.nd4j.linalg.dataset.MultiDataSet in project deeplearning4j by deeplearning4j.
the class TestPreProcessedData method testPreprocessedDataCompGraphMultiDataSet.
@Test
public void testPreprocessedDataCompGraphMultiDataSet() throws IOException {
//Test _loading_ of preprocessed MultiDataSet data
int dataSetObjSize = 5;
int batchSizePerExecutor = 10;
String path = FilenameUtils.concat(System.getProperty("java.io.tmpdir"), "dl4j_testpreprocdata3");
File f = new File(path);
if (f.exists())
f.delete();
f.mkdir();
DataSetIterator iter = new IrisDataSetIterator(5, 150);
int i = 0;
while (iter.hasNext()) {
File f2 = new File(FilenameUtils.concat(path, "data" + (i++) + ".bin"));
DataSet ds = iter.next();
MultiDataSet mds = new MultiDataSet(ds.getFeatures(), ds.getLabels());
mds.save(f2);
}
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3).activation(Activation.TANH).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3).activation(Activation.SOFTMAX).build(), "0").setOutputs("1").pretrain(false).backprop(true).build();
SparkComputationGraph sparkNet = new SparkComputationGraph(sc, conf, new ParameterAveragingTrainingMaster.Builder(numExecutors(), dataSetObjSize).batchSizePerWorker(batchSizePerExecutor).averagingFrequency(1).repartionData(Repartition.Always).build());
sparkNet.setCollectTrainingStats(true);
sparkNet.fitMultiDataSet("file:///" + path.replaceAll("\\\\", "/"));
SparkTrainingStats sts = sparkNet.getSparkTrainingStats();
//4 'fits' per averaging (4 executors, 1 averaging freq); 10 examples each -> 40 examples per fit. 150/40 = 3 averagings (round down); 3*4 = 12
int expNumFits = 12;
//Unfortunately: perfect partitioning isn't guaranteed by SparkUtils.balancedRandomSplit (esp. if original partitions are all size 1
// which appears to be occurring at least some of the time), but we should get close to what we expect...
assertTrue(Math.abs(expNumFits - sts.getValue("ParameterAveragingWorkerFitTimesMs").size()) < 3);
assertEquals(3, sts.getValue("ParameterAveragingMasterMapPartitionsTimesMs").size());
}
use of org.nd4j.linalg.dataset.MultiDataSet in project deeplearning4j by deeplearning4j.
the class TestSparkMultiLayerParameterAveraging method testFitViaStringPathsCompGraph.
@Test
public void testFitViaStringPathsCompGraph() throws Exception {
Path tempDir = Files.createTempDirectory("DL4J-testFitViaStringPathsCG");
Path tempDir2 = Files.createTempDirectory("DL4J-testFitViaStringPathsCG-MDS");
File tempDirF = tempDir.toFile();
File tempDirF2 = tempDir2.toFile();
tempDirF.deleteOnExit();
tempDirF2.deleteOnExit();
int dataSetObjSize = 5;
int batchSizePerExecutor = 25;
DataSetIterator iter = new MnistDataSetIterator(dataSetObjSize, 1000, false);
int i = 0;
while (iter.hasNext()) {
File nextFile = new File(tempDirF, i + ".bin");
File nextFile2 = new File(tempDirF2, i + ".bin");
DataSet ds = iter.next();
MultiDataSet mds = new MultiDataSet(ds.getFeatures(), ds.getLabels());
ds.save(nextFile);
mds.save(nextFile2);
i++;
}
System.out.println("Saved to: " + tempDirF.getAbsolutePath());
System.out.println("Saved to: " + tempDirF2.getAbsolutePath());
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(28 * 28).nOut(50).activation(Activation.TANH).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(50).nOut(10).activation(Activation.SOFTMAX).build(), "0").setOutputs("1").pretrain(false).backprop(true).build();
SparkComputationGraph sparkNet = new SparkComputationGraph(sc, conf, new ParameterAveragingTrainingMaster.Builder(numExecutors(), dataSetObjSize).workerPrefetchNumBatches(5).workerPrefetchNumBatches(0).batchSizePerWorker(batchSizePerExecutor).averagingFrequency(1).repartionData(Repartition.Always).build());
sparkNet.setCollectTrainingStats(true);
//List files:
Configuration config = new Configuration();
FileSystem hdfs = FileSystem.get(tempDir.toUri(), config);
RemoteIterator<LocatedFileStatus> fileIter = hdfs.listFiles(new org.apache.hadoop.fs.Path(tempDir.toString()), false);
List<String> paths = new ArrayList<>();
while (fileIter.hasNext()) {
String path = fileIter.next().getPath().toString();
paths.add(path);
}
INDArray paramsBefore = sparkNet.getNetwork().params().dup();
JavaRDD<String> pathRdd = sc.parallelize(paths);
sparkNet.fitPaths(pathRdd);
INDArray paramsAfter = sparkNet.getNetwork().params().dup();
assertNotEquals(paramsBefore, paramsAfter);
SparkTrainingStats stats = sparkNet.getSparkTrainingStats();
System.out.println(stats.statsAsString());
//Same thing, buf for MultiDataSet objects:
config = new Configuration();
hdfs = FileSystem.get(tempDir2.toUri(), config);
fileIter = hdfs.listFiles(new org.apache.hadoop.fs.Path(tempDir2.toString()), false);
paths = new ArrayList<>();
while (fileIter.hasNext()) {
String path = fileIter.next().getPath().toString();
paths.add(path);
}
paramsBefore = sparkNet.getNetwork().params().dup();
pathRdd = sc.parallelize(paths);
sparkNet.fitPathsMultiDataSet(pathRdd);
paramsAfter = sparkNet.getNetwork().params().dup();
assertNotEquals(paramsBefore, paramsAfter);
stats = sparkNet.getSparkTrainingStats();
System.out.println(stats.statsAsString());
}
Aggregations