use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project nd4j by deeplearning4j.
the class NormalizerStandardizeTest method testDifferentBatchSizes.
@Test
public void testDifferentBatchSizes() {
// Create 6x1 matrix of the numbers 1 through 6
INDArray values = Nd4j.linspace(1, 6, 6).transpose();
DataSet dataSet = new DataSet(values, values);
// Test fitting a DataSet
NormalizerStandardize norm1 = new NormalizerStandardize();
norm1.fit(dataSet);
assertEquals(3.5f, norm1.getMean().getFloat(0), 1e-6);
assertEquals(1.70783f, norm1.getStd().getFloat(0), 1e-4);
// Test fitting an iterator with equal batch sizes
// Will yield 2 batches of 3 rows
DataSetIterator testIter1 = new TestDataSetIterator(dataSet, 3);
NormalizerStandardize norm2 = new NormalizerStandardize();
norm2.fit(testIter1);
assertEquals(3.5f, norm2.getMean().getFloat(0), 1e-6);
assertEquals(1.70783f, norm2.getStd().getFloat(0), 1e-4);
// Test fitting an iterator with varying batch sizes
// Will yield batch of 4 and batch of 2 rows
DataSetIterator testIter2 = new TestDataSetIterator(dataSet, 4);
NormalizerStandardize norm3 = new NormalizerStandardize();
norm3.fit(testIter2);
assertEquals(3.5f, norm3.getMean().getFloat(0), 1e-6);
assertEquals(1.70783f, norm3.getStd().getFloat(0), 1e-4);
// Test fitting an iterator with batches of single rows
// Will yield 6 batches of 1 row
DataSetIterator testIter3 = new TestDataSetIterator(dataSet, 1);
NormalizerStandardize norm4 = new NormalizerStandardize();
norm4.fit(testIter3);
assertEquals(3.5f, norm4.getMean().getFloat(0), 1e-6);
assertEquals(1.70783f, norm4.getStd().getFloat(0), 1e-4);
}
use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project nd4j by deeplearning4j.
the class NormalizerTests method testMasking.
@Test
public void testMasking() {
Nd4j.getRandom().setSeed(235);
DataNormalization[] normalizers = new DataNormalization[] { new NormalizerMinMaxScaler(), new NormalizerStandardize() };
DataNormalization[] normalizersNoMask = new DataNormalization[] { new NormalizerMinMaxScaler(), new NormalizerStandardize() };
DataNormalization[] normalizersByRow = new DataNormalization[] { new NormalizerMinMaxScaler(), new NormalizerStandardize() };
for (int i = 0; i < normalizers.length; i++) {
// First: check that normalization is the same with/without masking arrays
DataNormalization norm = normalizers[i];
DataNormalization normFitSubset = normalizersNoMask[i];
DataNormalization normByRow = normalizersByRow[i];
System.out.println(norm.getClass());
INDArray arr = Nd4j.rand('c', new int[] { 2, 3, 5 }).muli(100).addi(100);
arr.get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.interval(3, 5)).assign(0);
INDArray arrCopy = arr.dup();
INDArray arrPt1 = arr.get(NDArrayIndex.interval(0, 0, true), NDArrayIndex.all(), NDArrayIndex.all()).dup();
INDArray arrPt2 = arr.get(NDArrayIndex.interval(1, 1, true), NDArrayIndex.all(), NDArrayIndex.interval(0, 3)).dup();
INDArray mask = Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 1, 1, 1, 0, 0 } });
DataSet ds = new DataSet(arr, null, mask, null);
DataSet dsCopy1 = new DataSet(arr.dup(), null, mask, null);
DataSet dsCopy2 = new DataSet(arr.dup(), null, mask, null);
norm.fit(ds);
// Check that values aren't modified by fit op
assertEquals(arrCopy, arr);
List<DataSet> toFitTimeSeries1Ex = new ArrayList<>();
toFitTimeSeries1Ex.add(new DataSet(arrPt1, arrPt1));
toFitTimeSeries1Ex.add(new DataSet(arrPt2, arrPt2));
normFitSubset.fit(new TestDataSetIterator(toFitTimeSeries1Ex, 1));
List<DataSet> toFitRows = new ArrayList<>();
for (int j = 0; j < 5; j++) {
INDArray row = arr.get(NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.interval(j, j, true)).transpose();
assertTrue(row.isRowVector());
toFitRows.add(new DataSet(row, row));
}
for (int j = 0; j < 3; j++) {
INDArray row = arr.get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.interval(j, j, true)).transpose();
assertTrue(row.isRowVector());
toFitRows.add(new DataSet(row, row));
}
normByRow.fit(new TestDataSetIterator(toFitRows, 1));
norm.transform(ds);
normFitSubset.transform(dsCopy1);
normByRow.transform(dsCopy2);
assertEquals(ds.getFeatures(), dsCopy1.getFeatures());
assertEquals(ds.getLabels(), dsCopy1.getLabels());
assertEquals(ds.getFeaturesMaskArray(), dsCopy1.getFeaturesMaskArray());
assertEquals(ds.getLabelsMaskArray(), dsCopy1.getLabelsMaskArray());
assertEquals(ds, dsCopy1);
assertEquals(ds, dsCopy2);
// Second: ensure time steps post normalization (and post revert) are 0.0
INDArray shouldBe0_1 = ds.getFeatureMatrix().get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.interval(3, 5));
INDArray shouldBe0_2 = dsCopy1.getFeatureMatrix().get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.interval(3, 5));
INDArray shouldBe0_3 = dsCopy2.getFeatureMatrix().get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.interval(3, 5));
INDArray zeros = Nd4j.zeros(shouldBe0_1.shape());
for (int j = 0; j < 2; j++) {
System.out.println(ds.getFeatureMatrix().get(NDArrayIndex.point(j), NDArrayIndex.all(), NDArrayIndex.all()));
System.out.println();
}
assertEquals(zeros, shouldBe0_1);
assertEquals(zeros, shouldBe0_2);
assertEquals(zeros, shouldBe0_3);
// Check same thing after reverting:
norm.revert(ds);
normFitSubset.revert(dsCopy1);
normByRow.revert(dsCopy2);
shouldBe0_1 = ds.getFeatureMatrix().get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.interval(3, 5));
shouldBe0_2 = dsCopy1.getFeatureMatrix().get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.interval(3, 5));
shouldBe0_3 = dsCopy2.getFeatureMatrix().get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.interval(3, 5));
assertEquals(zeros, shouldBe0_1);
assertEquals(zeros, shouldBe0_2);
assertEquals(zeros, shouldBe0_3);
}
}
use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project nd4j by deeplearning4j.
the class PreProcessor3D4DTest method testBruteForce3d.
@Test
public void testBruteForce3d() {
NormalizerStandardize myNormalizer = new NormalizerStandardize();
NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();
int timeSteps = 15;
int samples = 100;
// multiplier for the features
INDArray featureScaleA = Nd4j.create(new double[] { 1, -2, 3 }).reshape(3, 1);
INDArray featureScaleB = Nd4j.create(new double[] { 2, 2, 3 }).reshape(3, 1);
Construct3dDataSet caseA = new Construct3dDataSet(featureScaleA, timeSteps, samples, 1);
Construct3dDataSet caseB = new Construct3dDataSet(featureScaleB, timeSteps, samples, 1);
myNormalizer.fit(caseA.sampleDataSet);
assertEquals(caseA.expectedMean, myNormalizer.getMean());
assertTrue(Transforms.abs(myNormalizer.getStd().div(caseA.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);
myMinMaxScaler.fit(caseB.sampleDataSet);
assertEquals(caseB.expectedMin, myMinMaxScaler.getMin());
assertEquals(caseB.expectedMax, myMinMaxScaler.getMax());
// Same Test with an Iterator, values should be close for std, exact for everything else
DataSetIterator sampleIterA = new TestDataSetIterator(caseA.sampleDataSet, 5);
DataSetIterator sampleIterB = new TestDataSetIterator(caseB.sampleDataSet, 5);
myNormalizer.fit(sampleIterA);
assertEquals(myNormalizer.getMean(), caseA.expectedMean);
assertTrue(Transforms.abs(myNormalizer.getStd().div(caseA.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);
myMinMaxScaler.fit(sampleIterB);
assertEquals(myMinMaxScaler.getMin(), caseB.expectedMin);
assertEquals(myMinMaxScaler.getMax(), caseB.expectedMax);
}
Aggregations