Search in sources :

Example 6 with TestDataSetIterator

use of org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator in project nd4j by deeplearning4j.

the class NormalizerStandardizeTest method testTransform.

@Test
public void testTransform() {
    /*Random dataset is generated such that
            AX + B where X is from a normal distribution with mean 0 and std 1
            The mean of above will be B and std A
            Obtained mean and std dev are compared to theoretical
            Transformed values should be the same as X with the same seed.
         */
    long randSeed = 41732786;
    int nFeatures = 2;
    int nSamples = 6400;
    int bsize = 8;
    int a = 5;
    int b = 100;
    INDArray sampleMean, sampleStd, sampleMeanDelta, sampleStdDelta, delta, deltaPerc;
    double maxDeltaPerc, sampleMeanSEM;
    genRandomDataSet normData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed);
    DataSet genRandExpected = normData.theoreticalTransform;
    genRandomDataSet expectedData = new genRandomDataSet(nSamples, nFeatures, 1, 0, randSeed);
    genRandomDataSet beforeTransformData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed);
    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    DataSetIterator normIterator = normData.getIter(bsize);
    DataSetIterator genRandExpectedIter = new TestDataSetIterator(genRandExpected, bsize);
    DataSetIterator expectedIterator = expectedData.getIter(bsize);
    DataSetIterator beforeTransformIterator = beforeTransformData.getIter(bsize);
    myNormalizer.fit(normIterator);
    // within 0.1%
    double tolerancePerc = 0.10;
    sampleMean = myNormalizer.getMean();
    sampleMeanDelta = Transforms.abs(sampleMean.sub(normData.theoreticalMean));
    assertTrue(sampleMeanDelta.mul(100).div(normData.theoreticalMean).max(1).getDouble(0, 0) < tolerancePerc);
    // sanity check to see if it's within the theoretical standard error of mean
    sampleMeanSEM = sampleMeanDelta.div(normData.theoreticalSEM).max(1).getDouble(0, 0);
    // 99% of the time it should be within this many SEMs
    assertTrue(sampleMeanSEM < 2.6);
    // within 1% - std dev value
    tolerancePerc = 1;
    sampleStd = myNormalizer.getStd();
    sampleStdDelta = Transforms.abs(sampleStd.sub(normData.theoreticalStd));
    assertTrue(sampleStdDelta.div(normData.theoreticalStd).max(1).mul(100).getDouble(0, 0) < tolerancePerc);
    // within 1%
    tolerancePerc = 1;
    normIterator.setPreProcessor(myNormalizer);
    while (normIterator.hasNext()) {
        INDArray before = beforeTransformIterator.next().getFeatures();
        INDArray origBefore = genRandExpectedIter.next().getFeatures();
        INDArray after = normIterator.next().getFeatures();
        INDArray expected = expectedIterator.next().getFeatures();
        delta = Transforms.abs(after.sub(expected));
        deltaPerc = delta.div(Transforms.abs(before.sub(expected)));
        deltaPerc.muli(100);
        maxDeltaPerc = deltaPerc.max(0, 1).getDouble(0, 0);
        /*
            System.out.println("=== BEFORE ===");
            System.out.println(before);
            System.out.println("=== ORIG BEFORE ===");
            System.out.println(origBefore);
            System.out.println("=== AFTER ===");
            System.out.println(after);
            System.out.println("=== SHOULD BE ===");
            System.out.println(expected);
            System.out.println("% diff, "+ maxDeltaPerc);
            */
        assertTrue(maxDeltaPerc < tolerancePerc);
    }
}
Also used : TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalizerStandardize(org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize) TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test) BaseNd4jTest(org.nd4j.linalg.BaseNd4jTest)

Example 7 with TestDataSetIterator

use of org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator in project nd4j by deeplearning4j.

the class NormalizerStandardizeTest method testDifferentBatchSizes.

@Test
public void testDifferentBatchSizes() {
    // Create 6x1 matrix of the numbers 1 through 6
    INDArray values = Nd4j.linspace(1, 6, 6).transpose();
    DataSet dataSet = new DataSet(values, values);
    // Test fitting a DataSet
    NormalizerStandardize norm1 = new NormalizerStandardize();
    norm1.fit(dataSet);
    assertEquals(3.5f, norm1.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm1.getStd().getFloat(0), 1e-4);
    // Test fitting an iterator with equal batch sizes
    // Will yield 2 batches of 3 rows
    DataSetIterator testIter1 = new TestDataSetIterator(dataSet, 3);
    NormalizerStandardize norm2 = new NormalizerStandardize();
    norm2.fit(testIter1);
    assertEquals(3.5f, norm2.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm2.getStd().getFloat(0), 1e-4);
    // Test fitting an iterator with varying batch sizes
    // Will yield batch of 4 and batch of 2 rows
    DataSetIterator testIter2 = new TestDataSetIterator(dataSet, 4);
    NormalizerStandardize norm3 = new NormalizerStandardize();
    norm3.fit(testIter2);
    assertEquals(3.5f, norm3.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm3.getStd().getFloat(0), 1e-4);
    // Test fitting an iterator with batches of single rows
    // Will yield 6 batches of 1 row
    DataSetIterator testIter3 = new TestDataSetIterator(dataSet, 1);
    NormalizerStandardize norm4 = new NormalizerStandardize();
    norm4.fit(testIter3);
    assertEquals(3.5f, norm4.getMean().getFloat(0), 1e-6);
    assertEquals(1.70783f, norm4.getStd().getFloat(0), 1e-4);
}
Also used : TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalizerStandardize(org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize) TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test) BaseNd4jTest(org.nd4j.linalg.BaseNd4jTest)

Example 8 with TestDataSetIterator

use of org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator in project nd4j by deeplearning4j.

the class NormalizerTests method testItervsDataset.

public float testItervsDataset(DataNormalization preProcessor) {
    DataSet dataCopy = data.copy();
    DataSetIterator dataIter = new TestDataSetIterator(dataCopy, batchSize);
    preProcessor.fit(dataCopy);
    preProcessor.transform(dataCopy);
    INDArray transformA = dataCopy.getFeatures();
    preProcessor.fit(dataIter);
    dataIter.setPreProcessor(preProcessor);
    DataSet next = dataIter.next();
    INDArray transformB = next.getFeatures();
    while (dataIter.hasNext()) {
        next = dataIter.next();
        INDArray transformb = next.getFeatures();
        transformB = Nd4j.vstack(transformB, transformb);
    }
    return Transforms.abs(transformB.div(transformA).rsub(1)).maxNumber().floatValue();
}
Also used : TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator)

Example 9 with TestDataSetIterator

use of org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator in project nd4j by deeplearning4j.

the class NormalizerTests method testMasking.

@Test
public void testMasking() {
    Nd4j.getRandom().setSeed(235);
    DataNormalization[] normalizers = new DataNormalization[] { new NormalizerMinMaxScaler(), new NormalizerStandardize() };
    DataNormalization[] normalizersNoMask = new DataNormalization[] { new NormalizerMinMaxScaler(), new NormalizerStandardize() };
    DataNormalization[] normalizersByRow = new DataNormalization[] { new NormalizerMinMaxScaler(), new NormalizerStandardize() };
    for (int i = 0; i < normalizers.length; i++) {
        // First: check that normalization is the same with/without masking arrays
        DataNormalization norm = normalizers[i];
        DataNormalization normFitSubset = normalizersNoMask[i];
        DataNormalization normByRow = normalizersByRow[i];
        System.out.println(norm.getClass());
        INDArray arr = Nd4j.rand('c', new int[] { 2, 3, 5 }).muli(100).addi(100);
        arr.get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.interval(3, 5)).assign(0);
        INDArray arrCopy = arr.dup();
        INDArray arrPt1 = arr.get(NDArrayIndex.interval(0, 0, true), NDArrayIndex.all(), NDArrayIndex.all()).dup();
        INDArray arrPt2 = arr.get(NDArrayIndex.interval(1, 1, true), NDArrayIndex.all(), NDArrayIndex.interval(0, 3)).dup();
        INDArray mask = Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 1, 1, 1, 0, 0 } });
        DataSet ds = new DataSet(arr, null, mask, null);
        DataSet dsCopy1 = new DataSet(arr.dup(), null, mask, null);
        DataSet dsCopy2 = new DataSet(arr.dup(), null, mask, null);
        norm.fit(ds);
        // Check that values aren't modified by fit op
        assertEquals(arrCopy, arr);
        List<DataSet> toFitTimeSeries1Ex = new ArrayList<>();
        toFitTimeSeries1Ex.add(new DataSet(arrPt1, arrPt1));
        toFitTimeSeries1Ex.add(new DataSet(arrPt2, arrPt2));
        normFitSubset.fit(new TestDataSetIterator(toFitTimeSeries1Ex, 1));
        List<DataSet> toFitRows = new ArrayList<>();
        for (int j = 0; j < 5; j++) {
            INDArray row = arr.get(NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.interval(j, j, true)).transpose();
            assertTrue(row.isRowVector());
            toFitRows.add(new DataSet(row, row));
        }
        for (int j = 0; j < 3; j++) {
            INDArray row = arr.get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.interval(j, j, true)).transpose();
            assertTrue(row.isRowVector());
            toFitRows.add(new DataSet(row, row));
        }
        normByRow.fit(new TestDataSetIterator(toFitRows, 1));
        norm.transform(ds);
        normFitSubset.transform(dsCopy1);
        normByRow.transform(dsCopy2);
        assertEquals(ds.getFeatures(), dsCopy1.getFeatures());
        assertEquals(ds.getLabels(), dsCopy1.getLabels());
        assertEquals(ds.getFeaturesMaskArray(), dsCopy1.getFeaturesMaskArray());
        assertEquals(ds.getLabelsMaskArray(), dsCopy1.getLabelsMaskArray());
        assertEquals(ds, dsCopy1);
        assertEquals(ds, dsCopy2);
        // Second: ensure time steps post normalization (and post revert) are 0.0
        INDArray shouldBe0_1 = ds.getFeatureMatrix().get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.interval(3, 5));
        INDArray shouldBe0_2 = dsCopy1.getFeatureMatrix().get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.interval(3, 5));
        INDArray shouldBe0_3 = dsCopy2.getFeatureMatrix().get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.interval(3, 5));
        INDArray zeros = Nd4j.zeros(shouldBe0_1.shape());
        for (int j = 0; j < 2; j++) {
            System.out.println(ds.getFeatureMatrix().get(NDArrayIndex.point(j), NDArrayIndex.all(), NDArrayIndex.all()));
            System.out.println();
        }
        assertEquals(zeros, shouldBe0_1);
        assertEquals(zeros, shouldBe0_2);
        assertEquals(zeros, shouldBe0_3);
        // Check same thing after reverting:
        norm.revert(ds);
        normFitSubset.revert(dsCopy1);
        normByRow.revert(dsCopy2);
        shouldBe0_1 = ds.getFeatureMatrix().get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.interval(3, 5));
        shouldBe0_2 = dsCopy1.getFeatureMatrix().get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.interval(3, 5));
        shouldBe0_3 = dsCopy2.getFeatureMatrix().get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.interval(3, 5));
        assertEquals(zeros, shouldBe0_1);
        assertEquals(zeros, shouldBe0_2);
        assertEquals(zeros, shouldBe0_3);
    }
}
Also used : NormalizerMinMaxScaler(org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler) TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) DataNormalization(org.nd4j.linalg.dataset.api.preprocessor.DataNormalization) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ArrayList(java.util.ArrayList) NormalizerStandardize(org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize) Test(org.junit.Test) BaseNd4jTest(org.nd4j.linalg.BaseNd4jTest)

Example 10 with TestDataSetIterator

use of org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator in project nd4j by deeplearning4j.

the class PreProcessor3D4DTest method testBruteForce3d.

@Test
public void testBruteForce3d() {
    NormalizerStandardize myNormalizer = new NormalizerStandardize();
    NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();
    int timeSteps = 15;
    int samples = 100;
    // multiplier for the features
    INDArray featureScaleA = Nd4j.create(new double[] { 1, -2, 3 }).reshape(3, 1);
    INDArray featureScaleB = Nd4j.create(new double[] { 2, 2, 3 }).reshape(3, 1);
    Construct3dDataSet caseA = new Construct3dDataSet(featureScaleA, timeSteps, samples, 1);
    Construct3dDataSet caseB = new Construct3dDataSet(featureScaleB, timeSteps, samples, 1);
    myNormalizer.fit(caseA.sampleDataSet);
    assertEquals(caseA.expectedMean, myNormalizer.getMean());
    assertTrue(Transforms.abs(myNormalizer.getStd().div(caseA.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);
    myMinMaxScaler.fit(caseB.sampleDataSet);
    assertEquals(caseB.expectedMin, myMinMaxScaler.getMin());
    assertEquals(caseB.expectedMax, myMinMaxScaler.getMax());
    // Same Test with an Iterator, values should be close for std, exact for everything else
    DataSetIterator sampleIterA = new TestDataSetIterator(caseA.sampleDataSet, 5);
    DataSetIterator sampleIterB = new TestDataSetIterator(caseB.sampleDataSet, 5);
    myNormalizer.fit(sampleIterA);
    assertEquals(myNormalizer.getMean(), caseA.expectedMean);
    assertTrue(Transforms.abs(myNormalizer.getStd().div(caseA.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);
    myMinMaxScaler.fit(sampleIterB);
    assertEquals(myMinMaxScaler.getMin(), caseB.expectedMin);
    assertEquals(myMinMaxScaler.getMax(), caseB.expectedMax);
}
Also used : NormalizerMinMaxScaler(org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler) TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalizerStandardize(org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize) TestDataSetIterator(org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test) BaseNd4jTest(org.nd4j.linalg.BaseNd4jTest)

Aggregations

INDArray (org.nd4j.linalg.api.ndarray.INDArray)10 TestDataSetIterator (org.nd4j.linalg.dataset.api.iterator.TestDataSetIterator)10 Test (org.junit.Test)9 BaseNd4jTest (org.nd4j.linalg.BaseNd4jTest)9 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)9 NormalizerStandardize (org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize)8 NormalizerMinMaxScaler (org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler)4 ArrayList (java.util.ArrayList)2 DataNormalization (org.nd4j.linalg.dataset.api.preprocessor.DataNormalization)1