Search in sources :

Example 1 with TestDataSetIterator

use of org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator in project nd4j by deeplearning4j.

the class NormalizerMinMaxScalerTest method testBruteForce.

@Test
public void testBruteForce() {
    // X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
    // X_scaled = X_std * (max - min) + min
    // Dataset features are scaled consecutive natural numbers
    int nSamples = 500;
    int x = 4, y = 2, z = 3;
    INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1);
    INDArray featureY = featureX.mul(y);
    INDArray featureZ = featureX.mul(z);
    featureX.muli(x);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);
    // expected min and max
    INDArray theoreticalMin = Nd4j.create(new double[] { x, y, z });
    INDArray theoreticalMax = Nd4j.create(new double[] { nSamples * x, nSamples * y, nSamples * z });
    INDArray theoreticalRange = theoreticalMax.sub(theoreticalMin);
    NormalizerMinMaxScaler myNormalizer = new NormalizerMinMaxScaler();
    myNormalizer.fit(sampleDataSet);
    INDArray minDataSet = myNormalizer.getMin();
    INDArray maxDataSet = myNormalizer.getMax();
    INDArray minDiff = minDataSet.sub(theoreticalMin).max(1);
    INDArray maxDiff = maxDataSet.sub(theoreticalMax).max(1);
    assertEquals(minDiff.getDouble(0, 0), 0.0, 0.000000001);
    assertEquals(maxDiff.max(1).getDouble(0, 0), 0.0, 0.000000001);
    // SAME TEST WITH THE ITERATOR
    int bSize = 1;
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
    myNormalizer.fit(sampleIter);
    minDataSet = myNormalizer.getMin();
    maxDataSet = myNormalizer.getMax();
    assertEquals(minDataSet.sub(theoreticalMin).max(1).getDouble(0, 0), 0.0, 0.000000001);
    assertEquals(maxDataSet.sub(theoreticalMax).max(1).getDouble(0, 0), 0.0, 0.000000001);
    sampleIter.setPreProcessor(myNormalizer);
    INDArray actual, expected, delta;
    int i = 1;
    while (sampleIter.hasNext()) {
        expected = theoreticalMin.mul(i - 1).div(theoreticalRange);
        actual = sampleIter.next().getFeatures();
        delta = Transforms.abs(actual.sub(expected));
        assertTrue(delta.max(1).getDouble(0, 0) < 0.0001);
        i++;
    }
}
Also used : NormalizerMinMaxScaler(org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler) TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test) BaseNd4jTest(org.nd4j.linalg.BaseNd4jTest)

Example 2 with TestDataSetIterator

use of org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator in project nd4j by deeplearning4j.

the class NormalizerStandardizeLabelsTest method testBruteForce.

@Test
public void testBruteForce() {
    /* This test creates a dataset where feature values are multiples of consecutive natural numbers
           The obtained values are compared to the theoretical mean and std dev
         */
    double tolerancePerc = 0.01;
    int nSamples = 5120;
    int x = 1, y = 2, z = 3;
    INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1).mul(x);
    INDArray featureY = featureX.mul(y);
    INDArray featureZ = featureX.mul(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = featureSet.dup().getColumns(new int[] { 0 });
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);
    double meanNaturalNums = (nSamples + 1) / 2.0;
    INDArray theoreticalMean = Nd4j.create(new double[] { meanNaturalNums * x, meanNaturalNums * y, meanNaturalNums * z });
    INDArray theoreticallabelMean = theoreticalMean.dup().getColumns(new int[] { 0 });
    double stdNaturalNums = Math.sqrt((nSamples * nSamples - 1) / 12.0);
    INDArray theoreticalStd = Nd4j.create(new double[] { stdNaturalNums * x, stdNaturalNums * y, stdNaturalNums * z });
    INDArray theoreticallabelStd = theoreticalStd.dup().getColumns(new int[] { 0 });
    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fitLabel(true);
    myNormalizer.fit(sampleDataSet);
    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray labelDelta = Transforms.abs(theoreticallabelMean.sub(myNormalizer.getLabelMean()));
    INDArray meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
    INDArray labelDeltaPerc = labelDelta.div(theoreticallabelMean).mul(100);
    double maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxMeanDeltaPerc < tolerancePerc);
    assertTrue(labelDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc);
    INDArray stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd()));
    INDArray stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
    INDArray stdlabelDeltaPerc = Transforms.abs(theoreticallabelStd.sub(myNormalizer.getLabelStd())).div(theoreticallabelStd);
    double maxStdDeltaPerc = stdDeltaPerc.max(1).mul(100).getDouble(0, 0);
    double maxlabelStdDeltaPerc = stdlabelDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxStdDeltaPerc < tolerancePerc);
    assertTrue(maxlabelStdDeltaPerc < tolerancePerc);
    // SAME TEST WITH THE ITERATOR
    int bSize = 10;
    // 1% of correct value
    tolerancePerc = 0.1;
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
    myNormalizer.fit(sampleIter);
    meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
    maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxMeanDeltaPerc < tolerancePerc);
    stdDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
    maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxStdDeltaPerc < tolerancePerc);
}
Also used : TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalizerStandardize(org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize) TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test) BaseNd4jTest(org.nd4j.linalg.BaseNd4jTest)

Example 3 with TestDataSetIterator

use of org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator in project nd4j by deeplearning4j.

the class NormalizerStandardizeTest method testUnderOverflow.

@Test
public void testUnderOverflow() {
    // This dataset will be basically constant with a small std deviation
    // And the constant is large. Checking if algorithm can handle
    // Within 1 %
    double tolerancePerc = 1;
    double toleranceAbs = 0.0005;
    int nSamples = 1000;
    int bSize = 10;
    int x = -1000000, y = 1000000;
    double z = 1000000;
    INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x);
    INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y);
    INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
    INDArray theoreticalMean = Nd4j.create(new double[] { x, y, z });
    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleIter);
    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean);
    assertTrue(meanDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc);
    // this just has to not barf
    // myNormalizer.transform(sampleIter);
    myNormalizer.transform(sampleDataSet);
}
Also used : TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalizerStandardize(org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize) TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test) BaseNd4jTest(org.nd4j.linalg.BaseNd4jTest)

Example 4 with TestDataSetIterator

use of org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator in project nd4j by deeplearning4j.

the class PreProcessor3D4DTest method testBruteForce3dMaskLabels.

@Test
public void testBruteForce3dMaskLabels() {
    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fitLabel(true);
    NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();
    myMinMaxScaler.fitLabel(true);
    // generating a dataset with consecutive numbers as feature values. Dataset also has masks
    int samples = 100;
    INDArray featureScale = Nd4j.create(new float[] { 1, 2, 10 }).reshape(3, 1);
    int timeStepsU = 5;
    Construct3dDataSet sampleU = new Construct3dDataSet(featureScale, timeStepsU, samples, 1);
    int timeStepsV = 3;
    Construct3dDataSet sampleV = new Construct3dDataSet(featureScale, timeStepsV, samples, sampleU.newOrigin);
    List<DataSet> dataSetList = new ArrayList<DataSet>();
    dataSetList.add(sampleU.sampleDataSet);
    dataSetList.add(sampleV.sampleDataSet);
    DataSet fullDataSetA = DataSet.merge(dataSetList);
    DataSet fullDataSetAA = fullDataSetA.copy();
    // This should be the same datasets as above without a mask
    Construct3dDataSet fullDataSetNoMask = new Construct3dDataSet(featureScale, timeStepsU + timeStepsV, samples, 1);
    // preprocessors - label and feature values are the same
    myNormalizer.fit(fullDataSetA);
    assertEquals(myNormalizer.getMean(), fullDataSetNoMask.expectedMean);
    assertEquals(myNormalizer.getStd(), fullDataSetNoMask.expectedStd);
    assertEquals(myNormalizer.getLabelMean(), fullDataSetNoMask.expectedMean);
    assertEquals(myNormalizer.getLabelStd(), fullDataSetNoMask.expectedStd);
    myMinMaxScaler.fit(fullDataSetAA);
    assertEquals(myMinMaxScaler.getMin(), fullDataSetNoMask.expectedMin);
    assertEquals(myMinMaxScaler.getMax(), fullDataSetNoMask.expectedMax);
    assertEquals(myMinMaxScaler.getLabelMin(), fullDataSetNoMask.expectedMin);
    assertEquals(myMinMaxScaler.getLabelMax(), fullDataSetNoMask.expectedMax);
    // Same Test with an Iterator, values should be close for std, exact for everything else
    DataSetIterator sampleIterA = new TestDataSetIterator(fullDataSetA, 5);
    DataSetIterator sampleIterB = new TestDataSetIterator(fullDataSetAA, 5);
    myNormalizer.fit(sampleIterA);
    assertEquals(myNormalizer.getMean(), fullDataSetNoMask.expectedMean);
    assertEquals(myNormalizer.getLabelMean(), fullDataSetNoMask.expectedMean);
    assertTrue(Transforms.abs(myNormalizer.getStd().div(fullDataSetNoMask.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);
    assertTrue(Transforms.abs(myNormalizer.getLabelStd().div(fullDataSetNoMask.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);
    myMinMaxScaler.fit(sampleIterB);
    assertEquals(myMinMaxScaler.getMin(), fullDataSetNoMask.expectedMin);
    assertEquals(myMinMaxScaler.getMax(), fullDataSetNoMask.expectedMax);
    assertEquals(myMinMaxScaler.getLabelMin(), fullDataSetNoMask.expectedMin);
    assertEquals(myMinMaxScaler.getLabelMax(), fullDataSetNoMask.expectedMax);
}
Also used : NormalizerMinMaxScaler(org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler) TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ArrayList(java.util.ArrayList) NormalizerStandardize(org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize) TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test) BaseNd4jTest(org.nd4j.linalg.BaseNd4jTest)

Example 5 with TestDataSetIterator

use of org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator in project nd4j by deeplearning4j.

the class NormalizerStandardizeTest method testBruteForce.

@Test
public void testBruteForce() {
    /* This test creates a dataset where feature values are multiples of consecutive natural numbers
           The obtained values are compared to the theoretical mean and std dev
         */
    // 0.01% of correct value
    double tolerancePerc = 0.01;
    int nSamples = 5120;
    int x = 1, y = 2, z = 3;
    INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1).mul(x);
    INDArray featureY = featureX.mul(y);
    INDArray featureZ = featureX.mul(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);
    double meanNaturalNums = (nSamples + 1) / 2.0;
    INDArray theoreticalMean = Nd4j.create(new double[] { meanNaturalNums * x, meanNaturalNums * y, meanNaturalNums * z });
    double stdNaturalNums = Math.sqrt((nSamples * nSamples - 1) / 12.0);
    INDArray theoreticalStd = Nd4j.create(new double[] { stdNaturalNums * x, stdNaturalNums * y, stdNaturalNums * z });
    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
    double maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxMeanDeltaPerc < tolerancePerc);
    INDArray stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd()));
    INDArray stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
    double maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxStdDeltaPerc < tolerancePerc);
    // SAME TEST WITH THE ITERATOR
    int bSize = 10;
    // 0.1% of correct value
    tolerancePerc = 0.1;
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
    myNormalizer.fit(sampleIter);
    meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
    maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxMeanDeltaPerc < tolerancePerc);
    stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd()));
    stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
    maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxStdDeltaPerc < tolerancePerc);
}
Also used : TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalizerStandardize(org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize) TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test) BaseNd4jTest(org.nd4j.linalg.BaseNd4jTest)

Aggregations

INDArray (org.nd4j.linalg.api.ndarray.INDArray)10 TestDataSetIterator (org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator)10 Test (org.junit.Test)9 BaseNd4jTest (org.nd4j.linalg.BaseNd4jTest)9 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)9 NormalizerStandardize (org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize)8 NormalizerMinMaxScaler (org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler)4 ArrayList (java.util.ArrayList)2 DataNormalization (org.nd4j.linalg.dataset.api.preprocessor.DataNormalization)1