use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project nd4j by deeplearning4j.
the class NormalizerStandardizeLabelsTest method testTransform.
@Test
public void testTransform() {
/*Random dataset is generated such that
AX + B where X is from a normal distribution with mean 0 and std 1
The mean of above will be B and std A
Obtained mean and std dev are compared to theoretical
Transformed values should be the same as X with the same seed.
*/
long randSeed = 2227724;
int nFeatures = 2;
int nSamples = 6400;
int bsize = 8;
int a = 5;
int b = 100;
INDArray sampleMean, sampleStd, sampleMeanDelta, sampleStdDelta, delta, deltaPerc;
double maxDeltaPerc, sampleMeanSEM;
genRandomDataSet normData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed);
genRandomDataSet expectedData = new genRandomDataSet(nSamples, nFeatures, 1, 0, randSeed);
genRandomDataSet beforeTransformData = new genRandomDataSet(nSamples, nFeatures, a, b, randSeed);
NormalizerStandardize myNormalizer = new NormalizerStandardize();
myNormalizer.fitLabel(true);
DataSetIterator normIterator = normData.getIter(bsize);
DataSetIterator expectedIterator = expectedData.getIter(bsize);
DataSetIterator beforeTransformIterator = beforeTransformData.getIter(bsize);
myNormalizer.fit(normIterator);
// within 0.5%
double tolerancePerc = 0.5;
sampleMean = myNormalizer.getMean();
sampleMeanDelta = Transforms.abs(sampleMean.sub(normData.theoreticalMean));
assertTrue(sampleMeanDelta.mul(100).div(normData.theoreticalMean).max(1).getDouble(0, 0) < tolerancePerc);
// sanity check to see if it's within the theoretical standard error of mean
sampleMeanSEM = sampleMeanDelta.div(normData.theoreticalSEM).max(1).getDouble(0, 0);
// 99% of the time it should be within this many SEMs
assertTrue(sampleMeanSEM < 2.6);
// within 5%
tolerancePerc = 5;
sampleStd = myNormalizer.getStd();
sampleStdDelta = Transforms.abs(sampleStd.sub(normData.theoreticalStd));
assertTrue(sampleStdDelta.div(normData.theoreticalStd).max(1).mul(100).getDouble(0, 0) < tolerancePerc);
// within 1%
tolerancePerc = 1;
normIterator.setPreProcessor(myNormalizer);
while (normIterator.hasNext()) {
INDArray before = beforeTransformIterator.next().getFeatures();
DataSet here = normIterator.next();
// bootstrapping existing test on features
assertEquals(here.getFeatures(), here.getLabels());
INDArray after = here.getFeatures();
INDArray expected = expectedIterator.next().getFeatures();
delta = Transforms.abs(after.sub(expected));
deltaPerc = delta.div(before.sub(expected));
deltaPerc.muli(100);
maxDeltaPerc = deltaPerc.max(0, 1).getDouble(0, 0);
// System.out.println("=== BEFORE ===");
// System.out.println(before);
// System.out.println("=== AFTER ===");
// System.out.println(after);
// System.out.println("=== SHOULD BE ===");
// System.out.println(expected);
assertTrue(maxDeltaPerc < tolerancePerc);
}
}
use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project nd4j by deeplearning4j.
the class NormalizerStandardizeLabelsTest method testBruteForce.
@Test
public void testBruteForce() {
/* This test creates a dataset where feature values are multiples of consecutive natural numbers
The obtained values are compared to the theoretical mean and std dev
*/
double tolerancePerc = 0.01;
int nSamples = 5120;
int x = 1, y = 2, z = 3;
INDArray featureX = Nd4j.linspace(1, nSamples, nSamples).reshape(nSamples, 1).mul(x);
INDArray featureY = featureX.mul(y);
INDArray featureZ = featureX.mul(z);
INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
INDArray labelSet = featureSet.dup().getColumns(new int[] { 0 });
DataSet sampleDataSet = new DataSet(featureSet, labelSet);
double meanNaturalNums = (nSamples + 1) / 2.0;
INDArray theoreticalMean = Nd4j.create(new double[] { meanNaturalNums * x, meanNaturalNums * y, meanNaturalNums * z });
INDArray theoreticallabelMean = theoreticalMean.dup().getColumns(new int[] { 0 });
double stdNaturalNums = Math.sqrt((nSamples * nSamples - 1) / 12.0);
INDArray theoreticalStd = Nd4j.create(new double[] { stdNaturalNums * x, stdNaturalNums * y, stdNaturalNums * z });
INDArray theoreticallabelStd = theoreticalStd.dup().getColumns(new int[] { 0 });
NormalizerStandardize myNormalizer = new NormalizerStandardize();
myNormalizer.fitLabel(true);
myNormalizer.fit(sampleDataSet);
INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
INDArray labelDelta = Transforms.abs(theoreticallabelMean.sub(myNormalizer.getLabelMean()));
INDArray meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
INDArray labelDeltaPerc = labelDelta.div(theoreticallabelMean).mul(100);
double maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
assertTrue(maxMeanDeltaPerc < tolerancePerc);
assertTrue(labelDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc);
INDArray stdDelta = Transforms.abs(theoreticalStd.sub(myNormalizer.getStd()));
INDArray stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
INDArray stdlabelDeltaPerc = Transforms.abs(theoreticallabelStd.sub(myNormalizer.getLabelStd())).div(theoreticallabelStd);
double maxStdDeltaPerc = stdDeltaPerc.max(1).mul(100).getDouble(0, 0);
double maxlabelStdDeltaPerc = stdlabelDeltaPerc.max(1).getDouble(0, 0);
assertTrue(maxStdDeltaPerc < tolerancePerc);
assertTrue(maxlabelStdDeltaPerc < tolerancePerc);
// SAME TEST WITH THE ITERATOR
int bSize = 10;
// 1% of correct value
tolerancePerc = 0.1;
DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
myNormalizer.fit(sampleIter);
meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
meanDeltaPerc = meanDelta.div(theoreticalMean).mul(100);
maxMeanDeltaPerc = meanDeltaPerc.max(1).getDouble(0, 0);
assertTrue(maxMeanDeltaPerc < tolerancePerc);
stdDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
stdDeltaPerc = stdDelta.div(theoreticalStd).mul(100);
maxStdDeltaPerc = stdDeltaPerc.max(1).getDouble(0, 0);
assertTrue(maxStdDeltaPerc < tolerancePerc);
}
use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project nd4j by deeplearning4j.
the class NormalizerStandardizeTest method testRevert.
@Test
public void testRevert() {
// 0.01% of correct value
double tolerancePerc = 0.01;
int nSamples = 500;
int nFeatures = 3;
INDArray featureSet = Nd4j.randn(nSamples, nFeatures);
INDArray labelSet = Nd4j.zeros(nSamples, 1);
DataSet sampleDataSet = new DataSet(featureSet, labelSet);
NormalizerStandardize myNormalizer = new NormalizerStandardize();
myNormalizer.fit(sampleDataSet);
DataSet transformed = sampleDataSet.copy();
myNormalizer.transform(transformed);
// System.out.println(transformed.getFeatures());
myNormalizer.revert(transformed);
// System.out.println(transformed.getFeatures());
INDArray delta = Transforms.abs(transformed.getFeatures().sub(sampleDataSet.getFeatures())).div(sampleDataSet.getFeatures());
double maxdeltaPerc = delta.max(0, 1).mul(100).getDouble(0, 0);
assertTrue(maxdeltaPerc < tolerancePerc);
}
use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project nd4j by deeplearning4j.
the class NormalizerStandardizeTest method testUnderOverflow.
@Test
public void testUnderOverflow() {
// This dataset will be basically constant with a small std deviation
// And the constant is large. Checking if algorithm can handle
// Within 1 %
double tolerancePerc = 1;
double toleranceAbs = 0.0005;
int nSamples = 1000;
int bSize = 10;
int x = -1000000, y = 1000000;
double z = 1000000;
INDArray featureX = Nd4j.rand(nSamples, 1).mul(1).add(x);
INDArray featureY = Nd4j.rand(nSamples, 1).mul(2).add(y);
INDArray featureZ = Nd4j.rand(nSamples, 1).mul(3).add(z);
INDArray featureSet = Nd4j.concat(1, featureX, featureY, featureZ);
INDArray labelSet = Nd4j.zeros(nSamples, 1);
DataSet sampleDataSet = new DataSet(featureSet, labelSet);
DataSetIterator sampleIter = new TestDataSetIterator(sampleDataSet, bSize);
INDArray theoreticalMean = Nd4j.create(new double[] { x, y, z });
NormalizerStandardize myNormalizer = new NormalizerStandardize();
myNormalizer.fit(sampleIter);
INDArray meanDelta = Transforms.abs(theoreticalMean.sub(myNormalizer.getMean()));
INDArray meanDeltaPerc = meanDelta.mul(100).div(theoreticalMean);
assertTrue(meanDeltaPerc.max(1).getDouble(0, 0) < tolerancePerc);
// this just has to not barf
// myNormalizer.transform(sampleIter);
myNormalizer.transform(sampleDataSet);
}
use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project nd4j by deeplearning4j.
the class NormalizerTests method randomData.
@Before
public void randomData() {
Nd4j.getRandom().setSeed(12345);
batchSize = 13;
batchCount = 20;
lastBatch = batchSize / 2;
INDArray origFeatures = Nd4j.rand(batchCount * batchSize + lastBatch, 10);
INDArray origLabels = Nd4j.rand(batchCount * batchSize + lastBatch, 3);
data = new DataSet(origFeatures, origLabels);
stdScaler = new NormalizerStandardize();
minMaxScaler = new NormalizerMinMaxScaler();
}
Aggregations