use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project nd4j by deeplearning4j.
the class PreProcessor3D4DTest method testBruteForce3dMaskLabels.
@Test
public void testBruteForce3dMaskLabels() {
NormalizerStandardize myNormalizer = new NormalizerStandardize();
myNormalizer.fitLabel(true);
NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();
myMinMaxScaler.fitLabel(true);
// generating a dataset with consecutive numbers as feature values. Dataset also has masks
int samples = 100;
INDArray featureScale = Nd4j.create(new float[] { 1, 2, 10 }).reshape(3, 1);
int timeStepsU = 5;
Construct3dDataSet sampleU = new Construct3dDataSet(featureScale, timeStepsU, samples, 1);
int timeStepsV = 3;
Construct3dDataSet sampleV = new Construct3dDataSet(featureScale, timeStepsV, samples, sampleU.newOrigin);
List<DataSet> dataSetList = new ArrayList<DataSet>();
dataSetList.add(sampleU.sampleDataSet);
dataSetList.add(sampleV.sampleDataSet);
DataSet fullDataSetA = DataSet.merge(dataSetList);
DataSet fullDataSetAA = fullDataSetA.copy();
// This should be the same datasets as above without a mask
Construct3dDataSet fullDataSetNoMask = new Construct3dDataSet(featureScale, timeStepsU + timeStepsV, samples, 1);
// preprocessors - label and feature values are the same
myNormalizer.fit(fullDataSetA);
assertEquals(myNormalizer.getMean(), fullDataSetNoMask.expectedMean);
assertEquals(myNormalizer.getStd(), fullDataSetNoMask.expectedStd);
assertEquals(myNormalizer.getLabelMean(), fullDataSetNoMask.expectedMean);
assertEquals(myNormalizer.getLabelStd(), fullDataSetNoMask.expectedStd);
myMinMaxScaler.fit(fullDataSetAA);
assertEquals(myMinMaxScaler.getMin(), fullDataSetNoMask.expectedMin);
assertEquals(myMinMaxScaler.getMax(), fullDataSetNoMask.expectedMax);
assertEquals(myMinMaxScaler.getLabelMin(), fullDataSetNoMask.expectedMin);
assertEquals(myMinMaxScaler.getLabelMax(), fullDataSetNoMask.expectedMax);
// Same Test with an Iterator, values should be close for std, exact for everything else
DataSetIterator sampleIterA = new TestDataSetIterator(fullDataSetA, 5);
DataSetIterator sampleIterB = new TestDataSetIterator(fullDataSetAA, 5);
myNormalizer.fit(sampleIterA);
assertEquals(myNormalizer.getMean(), fullDataSetNoMask.expectedMean);
assertEquals(myNormalizer.getLabelMean(), fullDataSetNoMask.expectedMean);
assertTrue(Transforms.abs(myNormalizer.getStd().div(fullDataSetNoMask.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);
assertTrue(Transforms.abs(myNormalizer.getLabelStd().div(fullDataSetNoMask.expectedStd).sub(1)).maxNumber().floatValue() < 0.01);
myMinMaxScaler.fit(sampleIterB);
assertEquals(myMinMaxScaler.getMin(), fullDataSetNoMask.expectedMin);
assertEquals(myMinMaxScaler.getMax(), fullDataSetNoMask.expectedMax);
assertEquals(myMinMaxScaler.getLabelMin(), fullDataSetNoMask.expectedMin);
assertEquals(myMinMaxScaler.getLabelMax(), fullDataSetNoMask.expectedMax);
}
use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project nd4j by deeplearning4j.
the class PreProcessor3D4DTest method testBruteForce4d.
@Test
public void testBruteForce4d() {
Construct4dDataSet imageDataSet = new Construct4dDataSet(10, 5, 10, 15);
NormalizerStandardize myNormalizer = new NormalizerStandardize();
myNormalizer.fit(imageDataSet.sampleDataSet);
assertEquals(imageDataSet.expectedMean, myNormalizer.getMean());
float aat = Transforms.abs(myNormalizer.getStd().div(imageDataSet.expectedStd).sub(1)).maxNumber().floatValue();
float abt = myNormalizer.getStd().maxNumber().floatValue();
float act = imageDataSet.expectedStd.maxNumber().floatValue();
System.out.println("ValA: " + aat);
System.out.println("ValB: " + abt);
System.out.println("ValC: " + act);
assertTrue(aat < 0.05);
NormalizerMinMaxScaler myMinMaxScaler = new NormalizerMinMaxScaler();
myMinMaxScaler.fit(imageDataSet.sampleDataSet);
assertEquals(imageDataSet.expectedMin, myMinMaxScaler.getMin());
assertEquals(imageDataSet.expectedMax, myMinMaxScaler.getMax());
DataSet copyDataSet = imageDataSet.sampleDataSet.copy();
myNormalizer.transform(copyDataSet);
}
use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project deeplearning4j by deeplearning4j.
the class EvalTest method testEvaluationWithMetaData.
@Test
public void testEvaluationWithMetaData() throws Exception {
RecordReader csv = new CSVRecordReader();
csv.initialize(new FileSplit(new ClassPathResource("iris.txt").getTempFileFromArchive()));
int batchSize = 10;
int labelIdx = 4;
int numClasses = 3;
RecordReaderDataSetIterator rrdsi = new RecordReaderDataSetIterator(csv, batchSize, labelIdx, numClasses);
NormalizerStandardize ns = new NormalizerStandardize();
ns.fit(rrdsi);
rrdsi.setPreProcessor(ns);
rrdsi.reset();
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).iterations(1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD).learningRate(0.1).list().layer(0, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(4).nOut(3).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
for (int i = 0; i < 4; i++) {
net.fit(rrdsi);
rrdsi.reset();
}
Evaluation e = new Evaluation();
//*** New: Enable collection of metadata (stored in the DataSets) ***
rrdsi.setCollectMetaData(true);
while (rrdsi.hasNext()) {
DataSet ds = rrdsi.next();
//*** New - cross dependencies here make types difficult, usid Object internally in DataSet for this***
List<RecordMetaData> meta = ds.getExampleMetaData(RecordMetaData.class);
INDArray out = net.output(ds.getFeatures());
//*** New - evaluate and also store metadata ***
e.eval(ds.getLabels(), out, meta);
}
System.out.println(e.stats());
System.out.println("\n\n*** Prediction Errors: ***");
//*** New - get list of prediction errors from evaluation ***
List<Prediction> errors = e.getPredictionErrors();
List<RecordMetaData> metaForErrors = new ArrayList<>();
for (Prediction p : errors) {
metaForErrors.add((RecordMetaData) p.getRecordMetaData());
}
//*** New - dynamically load a subset of the data, just for prediction errors ***
DataSet ds = rrdsi.loadFromMetaData(metaForErrors);
INDArray output = net.output(ds.getFeatures());
int count = 0;
for (Prediction t : errors) {
System.out.println(t + "\t\tRaw Data: " + //*** New - load subset of data from MetaData object (usually batched for efficiency) ***
csv.loadFromMetaData((RecordMetaData) t.getRecordMetaData()).getRecord() + "\tNormalized: " + ds.getFeatureMatrix().getRow(count) + "\tLabels: " + ds.getLabels().getRow(count) + "\tNetwork predictions: " + output.getRow(count));
count++;
}
int errorCount = errors.size();
double expAcc = 1.0 - errorCount / 150.0;
assertEquals(expAcc, e.accuracy(), 1e-5);
ConfusionMatrix<Integer> confusion = e.getConfusionMatrix();
int[] actualCounts = new int[3];
int[] predictedCounts = new int[3];
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
//(actual,predicted)
int entry = confusion.getCount(i, j);
List<Prediction> list = e.getPredictions(i, j);
assertEquals(entry, list.size());
actualCounts[i] += entry;
predictedCounts[j] += entry;
}
}
for (int i = 0; i < 3; i++) {
List<Prediction> actualClassI = e.getPredictionsByActualClass(i);
List<Prediction> predictedClassI = e.getPredictionByPredictedClass(i);
assertEquals(actualCounts[i], actualClassI.size());
assertEquals(predictedCounts[i], predictedClassI.size());
}
}
use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project deeplearning4j by deeplearning4j.
the class EvaluationToolsTests method testRocMultiToHtml.
@Test
public void testRocMultiToHtml() throws Exception {
DataSetIterator iter = new IrisDataSetIterator(150, 150);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
NormalizerStandardize ns = new NormalizerStandardize();
DataSet ds = iter.next();
ns.fit(ds);
ns.transform(ds);
for (int i = 0; i < 30; i++) {
net.fit(ds);
}
ROCMultiClass roc = new ROCMultiClass(20);
iter.reset();
INDArray f = ds.getFeatures();
INDArray l = ds.getLabels();
INDArray out = net.output(f);
roc.eval(l, out);
String str = EvaluationTools.rocChartToHtml(roc, Arrays.asList("setosa", "versicolor", "virginica"));
// System.out.println(str);
}
use of org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize in project deeplearning4j by deeplearning4j.
the class GradientCheckTests method testAutoEncoder.
@Test
public void testAutoEncoder() {
//As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied
//Need to run gradient through updater, so that L2 can be applied
String[] activFns = { "sigmoid", "tanh" };
//If true: run some backprop steps first
boolean[] characteristic = { false, true };
LossFunction[] lossFunctions = { LossFunction.MCXENT, LossFunction.MSE };
//i.e., lossFunctions[i] used with outputActivations[i] here
String[] outputActivations = { "softmax", "tanh" };
DataNormalization scaler = new NormalizerMinMaxScaler();
DataSetIterator iter = new IrisDataSetIterator(150, 150);
scaler.fit(iter);
iter.setPreProcessor(scaler);
DataSet ds = iter.next();
INDArray input = ds.getFeatureMatrix();
INDArray labels = ds.getLabels();
NormalizerStandardize norm = new NormalizerStandardize();
norm.fit(ds);
norm.transform(ds);
double[] l2vals = { 0.2, 0.0, 0.2 };
//i.e., use l2vals[i] with l1vals[i]
double[] l1vals = { 0.0, 0.3, 0.3 };
for (String afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunction lf = lossFunctions[i];
String outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).learningRate(1.0).l2(l2).l1(l1).optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).seed(12345L).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.SGD).list().layer(0, new AutoEncoder.Builder().nIn(4).nOut(3).activation(afn).build()).layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3).activation(outputActivation).build()).pretrain(true).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(ds.getFeatures());
mln.setLabels(ds.getLabels());
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int j = 0; j < 10; j++) mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1 + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println("testGradientMLP2LayerIrisSimpleRandom() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1);
for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
String msg = "testGradMLP2LayerIrisSimple() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1;
assertTrue(msg, gradOK);
}
}
}
}
}
Aggregations