Search in sources :

Example 6 with NormalizerStandardize

use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project nd4j by deeplearning4j.

the class NormalizerStandardizeLabelsTest method testTransform.

@Test
public void testTransform() {
    /*Random dataset is generated such that
            AX + B where X is from a normal distribution with mean 0 and std 1
            The mean of above will be B and std A
            Obtained mean and std dev are compared to theoretical
            Transformed values should be the same as X with the same seed.
         */
    long randSeed = 2227724;
    int nFeatures = 2;
    int nSamples = 6400;
    int bsize = 8;
    int a = 5;
    int b = 100;
    INDArray sampleMean, sampleStd, sampleMeanDelta, sampleStdDelta, delta, deltaPerc;
    double maxDeltaPerc, sampleMeanSEM;
    genRandomDataSet normData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed);
    genRandomDataSet expectedData = new genRandomDataSet(nSamples, nFeatures, 1, 0, randSeed);
    genRandomDataSet beforeTransformData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed);
    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fitLabel(true);
    DataSetIterator normIterator = normData.getIter(bsize);
    DataSetIterator expectedIterator = expectedData.getIter(bsize);
    DataSetIterator beforeTransformIterator = beforeTransformData.getIter(bsize);
    myNormalizer.fit(normIterator);
    // within 0.5%
    double tolerancePerc = 0.5;
    sampleMean = myNormalizer.getMean();
    sampleMeanDelta = Transforms.abs(sampleMean.sub(normData.theoreticalMean));
    assertTrue(sampleMeanDelta.mul(100).div(normData.theoreticalMean).max(1).getDouble(0, 0) < tolerancePerc);
    // sanity check to see if it's within the theoretical standard error of mean
    sampleMeanSEM = sampleMeanDelta.div(normData.theoreticalSEM).max(1).getDouble(0, 0);
    // 99% of the time it should be within this many SEMs
    assertTrue(sampleMeanSEM < 2.6);
    // within 5%
    tolerancePerc = 5;
    sampleStd = myNormalizer.getStd();
    sampleStdDelta = Transforms.abs(sampleStd.sub(normData.theoreticalStd));
    assertTrue(sampleStdDelta.div(normData.theoreticalStd).max(1).mul(100).getDouble(0, 0) < tolerancePerc);
    // within 1%
    tolerancePerc = 1;
    normIterator.setPreProcessor(myNormalizer);
    while (normIterator.hasNext()) {
        INDArray before = beforeTransformIterator.next().getFeatures();
        DataSet here = normIterator.next();
        // bootstrapping existing test on features
        assertEquals(here.getFeatures(), here.getLabels());
        INDArray after = here.getFeatures();
        INDArray expected = expectedIterator.next().getFeatures();
        delta = Transforms.abs(after.sub(expected));
        deltaPerc = delta.div(before.sub(expected));
        deltaPerc.muli(100);
        maxDeltaPerc = deltaPerc.max(0, 1).getDouble(0, 0);
        // System.out.println("=== BEFORE ===");
        // System.out.println(before);
        // System.out.println("=== AFTER ===");
        // System.out.println(after);
        // System.out.println("=== SHOULD BE ===");
        // System.out.println(expected);
        assertTrue(maxDeltaPerc < tolerancePerc);
    }
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalizerStandardize(org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize) TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test) BaseNd4jTest(org.nd4j.linalg.BaseNd4jTest)

Example 7 with NormalizerStandardize

use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project nd4j by deeplearning4j.

the class NormalizerStandardizeLabelsTest method testBruteForce.

@Test
public void testBruteForce() {
    /* This test creates a dataset where feature values are multiples of consecutive natural numbers
           The obtained values are compared to the theoretical mean and std dev
         */
    double tolerancePerc = 0.01;
    int nSamples = 5120;
    int x = 1, y = 2, z = 3;
    INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1).mul(x);
    INDArray featureY = featureX.mul(y);
    INDArray featureZ = featureX.mul(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = featureSet.dup().getColumns(new int[] { 0 });
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);
    double meanNaturalNums = (nSamples + 1) / 2.0;
    INDArray theoreticalMean = Nd4j.create(new double[] { meanNaturalNums * x, meanNaturalNums * y, meanNaturalNums * z });
    INDArray theoreticallabelMean = theoreticalMean.dup().getColumns(new int[] { 0 });
    double stdNaturalNums = Math.sqrt((nSamples * nSamples - 1) / 12.0);
    INDArray theoreticalStd = Nd4j.create(new double[] { stdNaturalNums * x, stdNaturalNums * y, stdNaturalNums * z });
    INDArray theoreticallabelStd = theoreticalStd.dup().getColumns(new int[] { 0 });
    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fitLabel(true);
    myNormalizer.fit(sampleDataSet);
    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray labelDelta = Transforms.abs(theoreticallabelMean.sub(myNormalizer.getLabelMean()));
    INDArray meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
    INDArray labelDeltaPerc = labelDelta.div(theoreticallabelMean).mul(100);
    double maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxMeanDeltaPerc < tolerancePerc);
    assertTrue(labelDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc);
    INDArray stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd()));
    INDArray stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
    INDArray stdlabelDeltaPerc = Transforms.abs(theoreticallabelStd.sub(myNormalizer.getLabelStd())).div(theoreticallabelStd);
    double maxStdDeltaPerc = stdDeltaPerc.max(1).mul(100).getDouble(0, 0);
    double maxlabelStdDeltaPerc = stdlabelDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxStdDeltaPerc < tolerancePerc);
    assertTrue(maxlabelStdDeltaPerc < tolerancePerc);
    // SAME TEST WITH THE ITERATOR
    int bSize = 10;
    // 1% of correct value
    tolerancePerc = 0.1;
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
    myNormalizer.fit(sampleIter);
    meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
    maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxMeanDeltaPerc < tolerancePerc);
    stdDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
    maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0);
    assertTrue(maxStdDeltaPerc < tolerancePerc);
}
Also used : TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalizerStandardize(org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize) TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test) BaseNd4jTest(org.nd4j.linalg.BaseNd4jTest)

Example 8 with NormalizerStandardize

use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project nd4j by deeplearning4j.

the class NormalizerStandardizeTest method testRevert.

@Test
public void testRevert() {
    // 0.01% of correct value
    double tolerancePerc = 0.01;
    int nSamples = 500;
    int nFeatures = 3;
    INDArray featureSet = Nd4j.randn(nSamples, nFeatures);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);
    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleDataSet);
    DataSet transformed = sampleDataSet.copy();
    myNormalizer.transform(transformed);
    // System.out.println(transformed.getFeatures());
    myNormalizer.revert(transformed);
    // System.out.println(transformed.getFeatures());
    INDArray delta = Transforms.abs(transformed.getFeatures().sub(sampleDataSet.getFeatures())).div(sampleDataSet.getFeatures());
    double maxdeltaPerc = delta.max(0, 1).mul(100).getDouble(0, 0);
    assertTrue(maxdeltaPerc < tolerancePerc);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalizerStandardize(org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize) Test(org.junit.Test) BaseNd4jTest(org.nd4j.linalg.BaseNd4jTest)

Example 9 with NormalizerStandardize

use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project nd4j by deeplearning4j.

the class NormalizerStandardizeTest method testUnderOverflow.

@Test
public void testUnderOverflow() {
    // This dataset will be basically constant with a small std deviation
    // And the constant is large. Checking if algorithm can handle
    // Within 1 %
    double tolerancePerc = 1;
    double toleranceAbs = 0.0005;
    int nSamples = 1000;
    int bSize = 10;
    int x = -1000000, y = 1000000;
    double z = 1000000;
    INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x);
    INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y);
    INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z);
    INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
    INDArray labelSet = Nd4j.zeros(nSamples, 1);
    DataSet sampleDataSet = new DataSet(featureSet, labelSet);
    DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
    INDArray theoreticalMean = Nd4j.create(new double[] { x, y, z });
    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    myNormalizer.fit(sampleIter);
    INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
    INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean);
    assertTrue(meanDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc);
    // this just has to not barf
    // myNormalizer.transform(sampleIter);
    myNormalizer.transform(sampleDataSet);
}
Also used : TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalizerStandardize(org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize) TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test) BaseNd4jTest(org.nd4j.linalg.BaseNd4jTest)

Example 10 with NormalizerStandardize

use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project nd4j by deeplearning4j.

the class NormalizerTests method randomData.

@Before
public void randomData() {
    Nd4j.getRandom().setSeed(12345);
    batchSize = 13;
    batchCount = 20;
    lastBatch = batchSize / 2;
    INDArray origFeatures = Nd4j.rand(batchCount * batchSize + lastBatch, 10);
    INDArray origLabels = Nd4j.rand(batchCount * batchSize + lastBatch, 3);
    data = new DataSet(origFeatures, origLabels);
    stdScaler = new NormalizerStandardize();
    minMaxScaler = new NormalizerMinMaxScaler();
}
Also used : NormalizerMinMaxScaler(org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalizerStandardize(org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize) Before(org.junit.Before)

Aggregations

NormalizerStandardize (org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize)23 INDArray (org.nd4j.linalg.api.ndarray.INDArray)20 Test (org.junit.Test)19 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)13 BaseNd4jTest (org.nd4j.linalg.BaseNd4jTest)12 TestDataSetIterator (org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator)9 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)6 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)6 NormalizerMinMaxScaler (org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler)6 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)5 DataSet (org.nd4j.linalg.dataset.DataSet)5 DataNormalization (org.nd4j.linalg.dataset.api.preprocessor.DataNormalization)5 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)4 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)4 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)3 DataSet (org.nd4j.linalg.dataset.api.DataSet)3 ArrayList (java.util.ArrayList)2 RecordReader (org.datavec.api.records.reader.RecordReader)2 CSVRecordReader (org.datavec.api.records.reader.impl.csv.CSVRecordReader)2 FileSplit (org.datavec.api.split.FileSplit)2