use of org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator in project nd4j by deeplearning4j.
the class NormalizerMinMaxScalerTest method testBruteForce.
@Test
public void testBruteForce() {
// X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
// X_scaled = X_std * (max - min) + min
// Dataset features are scaled consecutive natural numbers
int nSamples = 500;
int x = 4, y = 2, z = 3;
INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1);
INDArray featureY = featureX.mul(y);
INDArray featureZ = featureX.mul(z);
featureX.muli(x);
INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
INDArray labelSet = Nd4j.zeros(nSamples, 1);
DataSet sampleDataSet = new DataSet(featureSet, labelSet);
// expected min and max
INDArray theoreticalMin = Nd4j.create(new double[] { x, y, z });
INDArray theoreticalMax = Nd4j.create(new double[] { nSamples * x, nSamples * y, nSamples * z });
INDArray theoreticalRange = theoreticalMax.sub(theoreticalMin);
NormalizerMinMaxScaler myNormalizer = new NormalizerMinMaxScaler();
myNormalizer.fit(sampleDataSet);
INDArray minDataSet = myNormalizer.getMin();
INDArray maxDataSet = myNormalizer.getMax();
INDArray minDiff = minDataSet.sub(theoreticalMin).max(1);
INDArray maxDiff = maxDataSet.sub(theoreticalMax).max(1);
assertEquals(minDiff.getDouble(0, 0), 0.0, 0.000000001);
assertEquals(maxDiff.max(1).getDouble(0, 0), 0.0, 0.000000001);
// SAME TEST WITH THE ITERATOR
int bSize = 1;
DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
myNormalizer.fit(sampleIter);
minDataSet = myNormalizer.getMin();
maxDataSet = myNormalizer.getMax();
assertEquals(minDataSet.sub(theoreticalMin).max(1).getDouble(0, 0), 0.0, 0.000000001);
assertEquals(maxDataSet.sub(theoreticalMax).max(1).getDouble(0, 0), 0.0, 0.000000001);
sampleIter.setPreProcessor(myNormalizer);
INDArray actual, expected, delta;
int i = 1;
while (sampleIter.hasNext()) {
expected = theoreticalMin.mul(i - 1).div(theoreticalRange);
actual = sampleIter.next().getFeatures();
delta = Transforms.abs(actual.sub(expected));
assertTrue(delta.max(1).getDouble(0, 0) < 0.0001);
i++;
}
}
use of org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator in project nd4j by deeplearning4j.
the class NormalizerStandardizeLabelsTest method testBruteForce.
@Test
public void testBruteForce() {
/* This test creates a dataset where feature values are multiples of consecutive natural numbers
The obtained values are compared to the theoretical mean and std dev
*/
double tolerancePerc = 0.01;
int nSamples = 5120;
int x = 1, y = 2, z = 3;
INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1).mul(x);
INDArray featureY = featureX.mul(y);
INDArray featureZ = featureX.mul(z);
INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
INDArray labelSet = featureSet.dup().getColumns(new int[] { 0 });
DataSet sampleDataSet = new DataSet(featureSet, labelSet);
double meanNaturalNums = (nSamples + 1) / 2.0;
INDArray theoreticalMean = Nd4j.create(new double[] { meanNaturalNums * x, meanNaturalNums * y, meanNaturalNums * z });
INDArray theoreticallabelMean = theoreticalMean.dup().getColumns(new int[] { 0 });
double stdNaturalNums = Math.sqrt((nSamples * nSamples - 1) / 12.0);
INDArray theoreticalStd = Nd4j.create(new double[] { stdNaturalNums * x, stdNaturalNums * y, stdNaturalNums * z });
INDArray theoreticallabelStd = theoreticalStd.dup().getColumns(new int[] { 0 });
NormalizerStandardize myNormalizer = new NormalizerStandardize();
myNormalizer.fitLabel(true);
myNormalizer.fit(sampleDataSet);
INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
INDArray labelDelta = Transforms.abs(theoreticallabelMean.sub(myNormalizer.getLabelMean()));
INDArray meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
INDArray labelDeltaPerc = labelDelta.div(theoreticallabelMean).mul(100);
double maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
assertTrue(maxMeanDeltaPerc < tolerancePerc);
assertTrue(labelDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc);
INDArray stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd()));
INDArray stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
INDArray stdlabelDeltaPerc = Transforms.abs(theoreticallabelStd.sub(myNormalizer.getLabelStd())).div(theoreticallabelStd);
double maxStdDeltaPerc = stdDeltaPerc.max(1).mul(100).getDouble(0, 0);
double maxlabelStdDeltaPerc = stdlabelDeltaPerc.max(1).getDouble(0, 0);
assertTrue(maxStdDeltaPerc < tolerancePerc);
assertTrue(maxlabelStdDeltaPerc < tolerancePerc);
// SAME TEST WITH THE ITERATOR
int bSize = 10;
// 1% of correct value
tolerancePerc = 0.1;
DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
myNormalizer.fit(sampleIter);
meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
assertTrue(maxMeanDeltaPerc < tolerancePerc);
stdDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0);
assertTrue(maxStdDeltaPerc < tolerancePerc);
}
use of org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator in project nd4j by deeplearning4j.
the class NormalizerStandardizeTest method testUnderOverflow.
@Test
public void testUnderOverflow() {
// This dataset will be basically constant with a small std deviation
// And the constant is large. Checking if algorithm can handle
// Within 1 %
double tolerancePerc = 1;
double toleranceAbs = 0.0005;
int nSamples = 1000;
int bSize = 10;
int x = -1000000, y = 1000000;
double z = 1000000;
INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x);
INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y);
INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z);
INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
INDArray labelSet = Nd4j.zeros(nSamples, 1);
DataSet sampleDataSet = new DataSet(featureSet, labelSet);
DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
INDArray theoreticalMean = Nd4j.create(new double[] { x, y, z });
NormalizerStandardize myNormalizer = new NormalizerStandardize();
myNormalizer.fit(sampleIter);
INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean);
assertTrue(meanDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc);
// this just has to not barf
// myNormalizer.transform(sampleIter);
myNormalizer.transform(sampleDataSet);
}
use of org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator in project nd4j by deeplearning4j.
the class PreProcessor3D4DTest method testBruteForce3dMaskLabels.
@Test
public void testBruteForce3dMaskLabels() {
NormalizerStandardize myNormalizer = new NormalizerStandardize();
myNormalizer.fitLabel(true);
NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();
myMinMaxScaler.fitLabel(true);
// generating a dataset with consecutive numbers as feature values. Dataset also has masks
int samples = 100;
INDArray featureScale = Nd4j.create(new float[] { 1, 2, 10 }).reshape(3, 1);
int timeStepsU = 5;
Construct3dDataSet sampleU = new Construct3dDataSet(featureScale, timeStepsU, samples, 1);
int timeStepsV = 3;
Construct3dDataSet sampleV = new Construct3dDataSet(featureScale, timeStepsV, samples, sampleU.newOrigin);
List<DataSet> dataSetList = new ArrayList<DataSet>();
dataSetList.add(sampleU.sampleDataSet);
dataSetList.add(sampleV.sampleDataSet);
DataSet fullDataSetA = DataSet.merge(dataSetList);
DataSet fullDataSetAA = fullDataSetA.copy();
// This should be the same datasets as above without a mask
Construct3dDataSet fullDataSetNoMask = new Construct3dDataSet(featureScale, timeStepsU + timeStepsV, samples, 1);
// preprocessors - label and feature values are the same
myNormalizer.fit(fullDataSetA);
assertEquals(myNormalizer.getMean(), fullDataSetNoMask.expectedMean);
assertEquals(myNormalizer.getStd(), fullDataSetNoMask.expectedStd);
assertEquals(myNormalizer.getLabelMean(), fullDataSetNoMask.expectedMean);
assertEquals(myNormalizer.getLabelStd(), fullDataSetNoMask.expectedStd);
myMinMaxScaler.fit(fullDataSetAA);
assertEquals(myMinMaxScaler.getMin(), fullDataSetNoMask.expectedMin);
assertEquals(myMinMaxScaler.getMax(), fullDataSetNoMask.expectedMax);
assertEquals(myMinMaxScaler.getLabelMin(), fullDataSetNoMask.expectedMin);
assertEquals(myMinMaxScaler.getLabelMax(), fullDataSetNoMask.expectedMax);
// Same Test with an Iterator, values should be close for std, exact for everything else
DataSetIterator sampleIterA = new TestDataSetIterator(fullDataSetA, 5);
DataSetIterator sampleIterB = new TestDataSetIterator(fullDataSetAA, 5);
myNormalizer.fit(sampleIterA);
assertEquals(myNormalizer.getMean(), fullDataSetNoMask.expectedMean);
assertEquals(myNormalizer.getLabelMean(), fullDataSetNoMask.expectedMean);
assertTrue(Transforms.abs(myNormalizer.getStd().div(fullDataSetNoMask.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);
assertTrue(Transforms.abs(myNormalizer.getLabelStd().div(fullDataSetNoMask.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);
myMinMaxScaler.fit(sampleIterB);
assertEquals(myMinMaxScaler.getMin(), fullDataSetNoMask.expectedMin);
assertEquals(myMinMaxScaler.getMax(), fullDataSetNoMask.expectedMax);
assertEquals(myMinMaxScaler.getLabelMin(), fullDataSetNoMask.expectedMin);
assertEquals(myMinMaxScaler.getLabelMax(), fullDataSetNoMask.expectedMax);
}
use of org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator in project nd4j by deeplearning4j.
the class NormalizerStandardizeTest method testBruteForce.
@Test
public void testBruteForce() {
/* This test creates a dataset where feature values are multiples of consecutive natural numbers
The obtained values are compared to the theoretical mean and std dev
*/
// 0.01% of correct value
double tolerancePerc = 0.01;
int nSamples = 5120;
int x = 1, y = 2, z = 3;
INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1).mul(x);
INDArray featureY = featureX.mul(y);
INDArray featureZ = featureX.mul(z);
INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
INDArray labelSet = Nd4j.zeros(nSamples, 1);
DataSet sampleDataSet = new DataSet(featureSet, labelSet);
double meanNaturalNums = (nSamples + 1) / 2.0;
INDArray theoreticalMean = Nd4j.create(new double[] { meanNaturalNums * x, meanNaturalNums * y, meanNaturalNums * z });
double stdNaturalNums = Math.sqrt((nSamples * nSamples - 1) / 12.0);
INDArray theoreticalStd = Nd4j.create(new double[] { stdNaturalNums * x, stdNaturalNums * y, stdNaturalNums * z });
NormalizerStandardize myNormalizer = new NormalizerStandardize();
myNormalizer.fit(sampleDataSet);
INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
INDArray meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
double maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
assertTrue(maxMeanDeltaPerc < tolerancePerc);
INDArray stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd()));
INDArray stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
double maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0);
assertTrue(maxStdDeltaPerc < tolerancePerc);
// SAME TEST WITH THE ITERATOR
int bSize = 10;
// 0.1% of correct value
tolerancePerc = 0.1;
DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
myNormalizer.fit(sampleIter);
meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
assertTrue(maxMeanDeltaPerc < tolerancePerc);
stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd()));
stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0);
assertTrue(maxStdDeltaPerc < tolerancePerc);
}
Aggregations