Search in sources :

Example 6 with DataSet

use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testRecordReaderMultiRegression.

@Test
public void testRecordReaderMultiRegression() throws Exception {
    RecordReader csv = new CSVRecordReader();
    csv.initialize(new FileSplit(new ClassPathResource("iris.txt").getTempFileFromArchive()));
    int batchSize = 3;
    int labelIdxFrom = 3;
    int labelIdxTo = 4;
    DataSetIterator iter = new RecordReaderDataSetIterator(csv, batchSize, labelIdxFrom, labelIdxTo, true);
    DataSet ds = iter.next();
    INDArray f = ds.getFeatureMatrix();
    INDArray l = ds.getLabels();
    assertArrayEquals(new int[] { 3, 3 }, f.shape());
    assertArrayEquals(new int[] { 3, 2 }, l.shape());
    //Check values:
    double[][] fExpD = new double[][] { { 5.1, 3.5, 1.4 }, { 4.9, 3.0, 1.4 }, { 4.7, 3.2, 1.3 } };
    double[][] lExpD = new double[][] { { 0.2, 0 }, { 0.2, 0 }, { 0.2, 0 } };
    INDArray fExp = Nd4j.create(fExpD);
    INDArray lExp = Nd4j.create(lExpD);
    assertEquals(fExp, f);
    assertEquals(lExp, l);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) RecordReader(org.datavec.api.records.reader.RecordReader) CollectionRecordReader(org.datavec.api.records.reader.impl.collection.CollectionRecordReader) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) FileSplit(org.datavec.api.split.FileSplit) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test)

Example 7 with DataSet

use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testVariableLengthSequence.

@Test
public void testVariableLengthSequence() throws Exception {
    //need to manually extract
    for (int i = 0; i < 3; i++) {
        new ClassPathResource(String.format("csvsequence_%d.txt", i)).getTempFileFromArchive();
        new ClassPathResource(String.format("csvsequencelabelsShort_%d.txt", i)).getTempFileFromArchive();
    }
    ClassPathResource resource = new ClassPathResource("csvsequence_0.txt");
    String featuresPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    resource = new ClassPathResource("csvsequencelabelsShort_0.txt");
    String labelsPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ",");
    SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ",");
    featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
    labelReader.initialize(new NumberedFileInputSplit(labelsPath, 0, 2));
    SequenceRecordReader featureReader2 = new CSVSequenceRecordReader(1, ",");
    SequenceRecordReader labelReader2 = new CSVSequenceRecordReader(1, ",");
    featureReader2.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
    labelReader2.initialize(new NumberedFileInputSplit(labelsPath, 0, 2));
    SequenceRecordReaderDataSetIterator iterAlignStart = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, 4, false, SequenceRecordReaderDataSetIterator.AlignmentMode.ALIGN_START);
    SequenceRecordReaderDataSetIterator iterAlignEnd = new SequenceRecordReaderDataSetIterator(featureReader2, labelReader2, 1, 4, false, SequenceRecordReaderDataSetIterator.AlignmentMode.ALIGN_END);
    assertEquals(3, iterAlignStart.inputColumns());
    assertEquals(4, iterAlignStart.totalOutcomes());
    assertEquals(3, iterAlignEnd.inputColumns());
    assertEquals(4, iterAlignEnd.totalOutcomes());
    List<DataSet> dsListAlignStart = new ArrayList<>();
    while (iterAlignStart.hasNext()) {
        dsListAlignStart.add(iterAlignStart.next());
    }
    List<DataSet> dsListAlignEnd = new ArrayList<>();
    while (iterAlignEnd.hasNext()) {
        dsListAlignEnd.add(iterAlignEnd.next());
    }
    //3 files
    assertEquals(3, dsListAlignStart.size());
    //3 files
    assertEquals(3, dsListAlignEnd.size());
    for (int i = 0; i < 3; i++) {
        DataSet ds = dsListAlignStart.get(i);
        INDArray features = ds.getFeatureMatrix();
        INDArray labels = ds.getLabels();
        //1 example in mini-batch
        assertEquals(1, features.size(0));
        assertEquals(1, labels.size(0));
        //3 values per line/time step
        assertEquals(3, features.size(1));
        //1 value per line, but 4 possible values -> one-hot vector
        assertEquals(4, labels.size(1));
        //sequence length = 4
        assertEquals(4, features.size(2));
        assertEquals(4, labels.size(2));
        DataSet ds2 = dsListAlignEnd.get(i);
        features = ds2.getFeatureMatrix();
        labels = ds2.getLabels();
        //1 example in mini-batch
        assertEquals(1, features.size(0));
        assertEquals(1, labels.size(0));
        //3 values per line/time step
        assertEquals(3, features.size(1));
        //1 value per line, but 4 possible values -> one-hot vector
        assertEquals(4, labels.size(1));
        //sequence length = 4
        assertEquals(4, features.size(2));
        assertEquals(4, labels.size(2));
    }
    //Check features vs. expected:
    //Here: labels always longer than features -> same features for align start and align end
    INDArray expF0 = Nd4j.create(1, 3, 4);
    expF0.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 0, 1, 2 }));
    expF0.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 10, 11, 12 }));
    expF0.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 20, 21, 22 }));
    expF0.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 30, 31, 32 }));
    assertEquals(expF0, dsListAlignStart.get(0).getFeatureMatrix());
    assertEquals(expF0, dsListAlignEnd.get(0).getFeatureMatrix());
    INDArray expF1 = Nd4j.create(1, 3, 4);
    expF1.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 100, 101, 102 }));
    expF1.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 110, 111, 112 }));
    expF1.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 120, 121, 122 }));
    expF1.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 130, 131, 132 }));
    assertEquals(expF1, dsListAlignStart.get(1).getFeatureMatrix());
    assertEquals(expF1, dsListAlignEnd.get(1).getFeatureMatrix());
    INDArray expF2 = Nd4j.create(1, 3, 4);
    expF2.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 200, 201, 202 }));
    expF2.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 210, 211, 212 }));
    expF2.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 220, 221, 222 }));
    expF2.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 230, 231, 232 }));
    assertEquals(expF2, dsListAlignStart.get(2).getFeatureMatrix());
    assertEquals(expF2, dsListAlignEnd.get(2).getFeatureMatrix());
    //Check features mask array:
    //1 example, 4 values: same for both start/end align here
    INDArray featuresMaskExpected = Nd4j.ones(1, 4);
    for (int i = 0; i < 3; i++) {
        INDArray featuresMaskStart = dsListAlignStart.get(i).getFeaturesMaskArray();
        INDArray featuresMaskEnd = dsListAlignEnd.get(i).getFeaturesMaskArray();
        assertEquals(featuresMaskExpected, featuresMaskStart);
        assertEquals(featuresMaskExpected, featuresMaskEnd);
    }
    //Check labels vs. expected:
    //First: aligning start
    INDArray expL0 = Nd4j.create(1, 4, 4);
    expL0.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 1, 0, 0, 0 }));
    expL0.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 0, 1, 0, 0 }));
    assertEquals(expL0, dsListAlignStart.get(0).getLabels());
    INDArray expL1 = Nd4j.create(1, 4, 4);
    expL1.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 0, 1, 0, 0 }));
    assertEquals(expL1, dsListAlignStart.get(1).getLabels());
    INDArray expL2 = Nd4j.create(1, 4, 4);
    expL2.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 0, 0, 0, 1 }));
    expL2.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 0, 0, 1, 0 }));
    expL2.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 0, 1, 0, 0 }));
    assertEquals(expL2, dsListAlignStart.get(2).getLabels());
    //Second: align end
    INDArray expL0end = Nd4j.create(1, 4, 4);
    expL0end.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 1, 0, 0, 0 }));
    expL0end.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 0, 1, 0, 0 }));
    assertEquals(expL0end, dsListAlignEnd.get(0).getLabels());
    INDArray expL1end = Nd4j.create(1, 4, 4);
    expL1end.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 0, 1, 0, 0 }));
    assertEquals(expL1end, dsListAlignEnd.get(1).getLabels());
    INDArray expL2end = Nd4j.create(1, 4, 4);
    expL2end.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 0, 0, 0, 1 }));
    expL2end.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 0, 0, 1, 0 }));
    expL2end.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 0, 1, 0, 0 }));
    assertEquals(expL2end, dsListAlignEnd.get(2).getLabels());
    //Check labels mask array
    INDArray[] labelsMaskExpectedStart = new INDArray[] { Nd4j.create(new float[] { 1, 1, 0, 0 }, new int[] { 1, 4 }), Nd4j.create(new float[] { 1, 0, 0, 0 }, new int[] { 1, 4 }), Nd4j.create(new float[] { 1, 1, 1, 0 }, new int[] { 1, 4 }) };
    INDArray[] labelsMaskExpectedEnd = new INDArray[] { Nd4j.create(new float[] { 0, 0, 1, 1 }, new int[] { 1, 4 }), Nd4j.create(new float[] { 0, 0, 0, 1 }, new int[] { 1, 4 }), Nd4j.create(new float[] { 0, 1, 1, 1 }, new int[] { 1, 4 }) };
    for (int i = 0; i < 3; i++) {
        INDArray labelsMaskStart = dsListAlignStart.get(i).getLabelsMaskArray();
        INDArray labelsMaskEnd = dsListAlignEnd.get(i).getLabelsMaskArray();
        assertEquals(labelsMaskExpectedStart[i], labelsMaskStart);
        assertEquals(labelsMaskExpectedEnd[i], labelsMaskEnd);
    }
}
Also used : CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) NumberedFileInputSplit(org.datavec.api.split.NumberedFileInputSplit) Test(org.junit.Test)

Example 8 with DataSet

use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testRecordReader.

@Test
public void testRecordReader() throws Exception {
    RecordReader recordReader = new CSVRecordReader();
    FileSplit csv = new FileSplit(new ClassPathResource("csv-example.csv").getTempFileFromArchive());
    recordReader.initialize(csv);
    DataSetIterator iter = new RecordReaderDataSetIterator(recordReader, 34);
    DataSet next = iter.next();
    assertEquals(34, next.numExamples());
}
Also used : DataSet(org.nd4j.linalg.dataset.DataSet) RecordReader(org.datavec.api.records.reader.RecordReader) CollectionRecordReader(org.datavec.api.records.reader.impl.collection.CollectionRecordReader) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) FileSplit(org.datavec.api.split.FileSplit) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test)

Example 9 with DataSet

use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testSequenceRecordReaderRegression.

@Test
public void testSequenceRecordReaderRegression() throws Exception {
    //need to manually extract
    for (int i = 0; i < 3; i++) {
        new ClassPathResource(String.format("csvsequence_%d.txt", i)).getTempFileFromArchive();
    }
    ClassPathResource resource = new ClassPathResource("csvsequence_0.txt");
    String featuresPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    resource = new ClassPathResource("csvsequence_0.txt");
    String labelsPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ",");
    SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ",");
    featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
    labelReader.initialize(new NumberedFileInputSplit(labelsPath, 0, 2));
    SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, 0, true);
    assertEquals(3, iter.inputColumns());
    assertEquals(3, iter.totalOutcomes());
    List<DataSet> dsList = new ArrayList<>();
    while (iter.hasNext()) {
        dsList.add(iter.next());
    }
    //3 files
    assertEquals(3, dsList.size());
    for (int i = 0; i < 3; i++) {
        DataSet ds = dsList.get(i);
        INDArray features = ds.getFeatureMatrix();
        INDArray labels = ds.getLabels();
        //1 examples, 3 values, 4 time steps
        assertArrayEquals(new int[] { 1, 3, 4 }, features.shape());
        assertArrayEquals(new int[] { 1, 3, 4 }, labels.shape());
        assertEquals(features, labels);
    }
    //Also test regression + reset from a single reader:
    featureReader.reset();
    iter = new SequenceRecordReaderDataSetIterator(featureReader, 1, 0, 2, true);
    int count = 0;
    while (iter.hasNext()) {
        DataSet ds = iter.next();
        assertEquals(2, ds.getFeatureMatrix().size(1));
        assertEquals(1, ds.getLabels().size(1));
        count++;
    }
    assertEquals(3, count);
    iter.reset();
    count = 0;
    while (iter.hasNext()) {
        iter.next();
        count++;
    }
    assertEquals(3, count);
}
Also used : CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) NumberedFileInputSplit(org.datavec.api.split.NumberedFileInputSplit) Test(org.junit.Test)

Example 10 with DataSet

use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.

the class BNGradientCheckTest method testGradient2dSimple.

@Test
public void testGradient2dSimple() {
    DataNormalization scaler = new NormalizerMinMaxScaler();
    DataSetIterator iter = new IrisDataSetIterator(150, 150);
    scaler.fit(iter);
    iter.setPreProcessor(scaler);
    DataSet ds = iter.next();
    INDArray input = ds.getFeatureMatrix();
    INDArray labels = ds.getLabels();
    MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0).regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(3).activation(Activation.IDENTITY).build()).layer(1, new BatchNormalization.Builder().nOut(3).build()).layer(2, new ActivationLayer.Builder().activation(Activation.TANH).build()).layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3).build()).pretrain(false).backprop(true);
    MultiLayerNetwork mln = new MultiLayerNetwork(builder.build());
    mln.init();
    if (PRINT_RESULTS) {
        for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
    }
    boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
    assertTrue(gradOK);
}
Also used : NormalizerMinMaxScaler(org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSet(org.nd4j.linalg.dataset.DataSet) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) DataNormalization(org.nd4j.linalg.dataset.api.preprocessor.DataNormalization) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test)

Aggregations

DataSet (org.nd4j.linalg.dataset.DataSet)334 Test (org.junit.Test)226 INDArray (org.nd4j.linalg.api.ndarray.INDArray)194 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)93 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)82 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)79 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)73 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)62 ArrayList (java.util.ArrayList)50 MnistDataSetIterator (org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator)41 ScoreIterationListener (org.deeplearning4j.optimize.listeners.ScoreIterationListener)38 BaseSparkTest (org.deeplearning4j.spark.BaseSparkTest)34 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)32 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)31 MultiDataSet (org.nd4j.linalg.dataset.MultiDataSet)31 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)25 SequenceRecordReader (org.datavec.api.records.reader.SequenceRecordReader)24 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)24 CSVSequenceRecordReader (org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader)23 ClassPathResource (org.nd4j.linalg.io.ClassPathResource)23