Search in sources :

Example 11 with NumberedFileInputSplit

use of org.datavec.api.split.NumberedFileInputSplit in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testSequenceRecordReader.

@Test
public void testSequenceRecordReader() throws Exception {
    //need to manually extract
    for (int i = 0; i < 3; i++) {
        new ClassPathResource(String.format("csvsequence_%d.txt", i)).getTempFileFromArchive();
        new ClassPathResource(String.format("csvsequencelabels_%d.txt", i)).getTempFileFromArchive();
    }
    ClassPathResource resource = new ClassPathResource("csvsequence_0.txt");
    String featuresPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    resource = new ClassPathResource("csvsequencelabels_0.txt");
    String labelsPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ",");
    SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ",");
    featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
    labelReader.initialize(new NumberedFileInputSplit(labelsPath, 0, 2));
    SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, 4, false);
    assertEquals(3, iter.inputColumns());
    assertEquals(4, iter.totalOutcomes());
    List<DataSet> dsList = new ArrayList<>();
    while (iter.hasNext()) {
        dsList.add(iter.next());
    }
    //3 files
    assertEquals(3, dsList.size());
    for (int i = 0; i < 3; i++) {
        DataSet ds = dsList.get(i);
        INDArray features = ds.getFeatureMatrix();
        INDArray labels = ds.getLabels();
        //1 example in mini-batch
        assertEquals(1, features.size(0));
        assertEquals(1, labels.size(0));
        //3 values per line/time step
        assertEquals(3, features.size(1));
        //1 value per line, but 4 possible values -> one-hot vector
        assertEquals(4, labels.size(1));
        //sequence length = 4
        assertEquals(4, features.size(2));
        assertEquals(4, labels.size(2));
    }
    //Check features vs. expected:
    INDArray expF0 = Nd4j.create(1, 3, 4);
    expF0.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 0, 1, 2 }));
    expF0.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 10, 11, 12 }));
    expF0.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 20, 21, 22 }));
    expF0.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 30, 31, 32 }));
    assertEquals(dsList.get(0).getFeatureMatrix(), expF0);
    INDArray expF1 = Nd4j.create(1, 3, 4);
    expF1.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 100, 101, 102 }));
    expF1.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 110, 111, 112 }));
    expF1.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 120, 121, 122 }));
    expF1.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 130, 131, 132 }));
    assertEquals(dsList.get(1).getFeatureMatrix(), expF1);
    INDArray expF2 = Nd4j.create(1, 3, 4);
    expF2.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 200, 201, 202 }));
    expF2.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 210, 211, 212 }));
    expF2.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 220, 221, 222 }));
    expF2.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 230, 231, 232 }));
    assertEquals(dsList.get(2).getFeatureMatrix(), expF2);
    //Check labels vs. expected:
    INDArray expL0 = Nd4j.create(1, 4, 4);
    expL0.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 1, 0, 0, 0 }));
    expL0.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 0, 1, 0, 0 }));
    expL0.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 0, 0, 1, 0 }));
    expL0.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 0, 0, 0, 1 }));
    assertEquals(dsList.get(0).getLabels(), expL0);
    INDArray expL1 = Nd4j.create(1, 4, 4);
    expL1.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 0, 0, 0, 1 }));
    expL1.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 0, 0, 1, 0 }));
    expL1.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 0, 1, 0, 0 }));
    expL1.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 1, 0, 0, 0 }));
    assertEquals(dsList.get(1).getLabels(), expL1);
    INDArray expL2 = Nd4j.create(1, 4, 4);
    expL2.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 0, 1, 0, 0 }));
    expL2.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 1, 0, 0, 0 }));
    expL2.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 0, 0, 0, 1 }));
    expL2.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 0, 0, 1, 0 }));
    assertEquals(dsList.get(2).getLabels(), expL2);
}
Also used : CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) NumberedFileInputSplit(org.datavec.api.split.NumberedFileInputSplit) Test(org.junit.Test)

Example 12 with NumberedFileInputSplit

use of org.datavec.api.split.NumberedFileInputSplit in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testSequenceRecordReaderSingleReader.

@Test
public void testSequenceRecordReaderSingleReader() throws Exception {
    //need to manually extract
    for (int i = 0; i < 3; i++) {
        new ClassPathResource(String.format("csvsequenceSingle_%d.txt", i)).getTempFileFromArchive();
    }
    ClassPathResource resource = new ClassPathResource("csvsequenceSingle_0.txt");
    String path = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    SequenceRecordReader reader = new CSVSequenceRecordReader(1, ",");
    reader.initialize(new NumberedFileInputSplit(path, 0, 2));
    SequenceRecordReaderDataSetIterator iteratorClassification = new SequenceRecordReaderDataSetIterator(reader, 1, 3, 0, false);
    SequenceRecordReader reader2 = new CSVSequenceRecordReader(1, ",");
    reader2.initialize(new NumberedFileInputSplit(path, 0, 2));
    SequenceRecordReaderDataSetIterator iteratorRegression = new SequenceRecordReaderDataSetIterator(reader2, 1, 3, 0, true);
    INDArray expF0 = Nd4j.create(1, 2, 4);
    expF0.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 1, 2 }));
    expF0.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 11, 12 }));
    expF0.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 21, 22 }));
    expF0.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 31, 32 }));
    INDArray expF1 = Nd4j.create(1, 2, 4);
    expF1.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 101, 102 }));
    expF1.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 111, 112 }));
    expF1.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 121, 122 }));
    expF1.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 131, 132 }));
    INDArray expF2 = Nd4j.create(1, 2, 4);
    expF2.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 201, 202 }));
    expF2.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 211, 212 }));
    expF2.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 221, 222 }));
    expF2.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 231, 232 }));
    INDArray[] expF = new INDArray[] { expF0, expF1, expF2 };
    //Expected out for classification:
    INDArray expOut0 = Nd4j.create(1, 3, 4);
    expOut0.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 1, 0, 0 }));
    expOut0.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 0, 1, 0 }));
    expOut0.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 0, 0, 1 }));
    expOut0.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 1, 0, 0 }));
    INDArray expOut1 = Nd4j.create(1, 3, 4);
    expOut1.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 0, 1, 0 }));
    expOut1.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 0, 0, 1 }));
    expOut1.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 1, 0, 0 }));
    expOut1.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 0, 0, 1 }));
    INDArray expOut2 = Nd4j.create(1, 3, 4);
    expOut2.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 0, 1, 0 }));
    expOut2.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 1, 0, 0 }));
    expOut2.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 0, 1, 0 }));
    expOut2.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 0, 0, 1 }));
    INDArray[] expOutClassification = new INDArray[] { expOut0, expOut1, expOut2 };
    //Expected out for regression:
    INDArray expOutR0 = Nd4j.create(1, 1, 4);
    expOutR0.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 0 }));
    expOutR0.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 1 }));
    expOutR0.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 2 }));
    expOutR0.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 0 }));
    INDArray expOutR1 = Nd4j.create(1, 1, 4);
    expOutR1.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 1 }));
    expOutR1.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 2 }));
    expOutR1.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 0 }));
    expOutR1.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 2 }));
    INDArray expOutR2 = Nd4j.create(1, 1, 4);
    expOutR2.tensorAlongDimension(0, 1).assign(Nd4j.create(new double[] { 1 }));
    expOutR2.tensorAlongDimension(1, 1).assign(Nd4j.create(new double[] { 0 }));
    expOutR2.tensorAlongDimension(2, 1).assign(Nd4j.create(new double[] { 1 }));
    expOutR2.tensorAlongDimension(3, 1).assign(Nd4j.create(new double[] { 2 }));
    INDArray[] expOutRegression = new INDArray[] { expOutR0, expOutR1, expOutR2 };
    int countC = 0;
    while (iteratorClassification.hasNext()) {
        DataSet ds = iteratorClassification.next();
        INDArray f = ds.getFeatures();
        INDArray l = ds.getLabels();
        assertNull(ds.getFeaturesMaskArray());
        assertNull(ds.getLabelsMaskArray());
        assertArrayEquals(new int[] { 1, 2, 4 }, f.shape());
        //One-hot representation
        assertArrayEquals(new int[] { 1, 3, 4 }, l.shape());
        assertEquals(expF[countC], f);
        assertEquals(expOutClassification[countC++], l);
    }
    assertEquals(3, countC);
    assertEquals(3, iteratorClassification.totalOutcomes());
    int countF = 0;
    while (iteratorRegression.hasNext()) {
        DataSet ds = iteratorRegression.next();
        INDArray f = ds.getFeatures();
        INDArray l = ds.getLabels();
        assertNull(ds.getFeaturesMaskArray());
        assertNull(ds.getLabelsMaskArray());
        assertArrayEquals(new int[] { 1, 2, 4 }, f.shape());
        //Regression (single output)
        assertArrayEquals(new int[] { 1, 1, 4 }, l.shape());
        assertEquals(expF[countF], f);
        assertEquals(expOutRegression[countF++], l);
    }
    assertEquals(3, countF);
    assertEquals(1, iteratorRegression.totalOutcomes());
}
Also used : CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) NumberedFileInputSplit(org.datavec.api.split.NumberedFileInputSplit) Test(org.junit.Test)

Example 13 with NumberedFileInputSplit

use of org.datavec.api.split.NumberedFileInputSplit in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testSequenceRecordReaderSingleReaderMetaData.

@Test
public void testSequenceRecordReaderSingleReaderMetaData() throws Exception {
    //need to manually extract
    for (int i = 0; i < 3; i++) {
        new ClassPathResource(String.format("csvsequenceSingle_%d.txt", i)).getTempFileFromArchive();
    }
    ClassPathResource resource = new ClassPathResource("csvsequenceSingle_0.txt");
    String path = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    SequenceRecordReader reader = new CSVSequenceRecordReader(1, ",");
    reader.initialize(new NumberedFileInputSplit(path, 0, 2));
    SequenceRecordReaderDataSetIterator iteratorClassification = new SequenceRecordReaderDataSetIterator(reader, 1, 3, 0, false);
    SequenceRecordReader reader2 = new CSVSequenceRecordReader(1, ",");
    reader2.initialize(new NumberedFileInputSplit(path, 0, 2));
    SequenceRecordReaderDataSetIterator iteratorRegression = new SequenceRecordReaderDataSetIterator(reader2, 1, 3, 0, true);
    iteratorClassification.setCollectMetaData(true);
    iteratorRegression.setCollectMetaData(true);
    while (iteratorClassification.hasNext()) {
        DataSet ds = iteratorClassification.next();
        DataSet fromMeta = iteratorClassification.loadFromMetaData(ds.getExampleMetaData(RecordMetaData.class));
        assertEquals(ds, fromMeta);
    }
    while (iteratorRegression.hasNext()) {
        DataSet ds = iteratorRegression.next();
        DataSet fromMeta = iteratorRegression.loadFromMetaData(ds.getExampleMetaData(RecordMetaData.class));
        assertEquals(ds, fromMeta);
    }
}
Also used : RecordMetaData(org.datavec.api.records.metadata.RecordMetaData) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) DataSet(org.nd4j.linalg.dataset.DataSet) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) NumberedFileInputSplit(org.datavec.api.split.NumberedFileInputSplit) Test(org.junit.Test)

Example 14 with NumberedFileInputSplit

use of org.datavec.api.split.NumberedFileInputSplit in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testSequenceRecordReaderMeta.

@Test
public void testSequenceRecordReaderMeta() throws Exception {
    //need to manually extract
    for (int i = 0; i < 3; i++) {
        new ClassPathResource(String.format("csvsequence_%d.txt", i)).getTempFileFromArchive();
        new ClassPathResource(String.format("csvsequencelabels_%d.txt", i)).getTempFileFromArchive();
    }
    ClassPathResource resource = new ClassPathResource("csvsequence_0.txt");
    String featuresPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    resource = new ClassPathResource("csvsequencelabels_0.txt");
    String labelsPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ",");
    SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ",");
    featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
    labelReader.initialize(new NumberedFileInputSplit(labelsPath, 0, 2));
    SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, 4, false);
    iter.setCollectMetaData(true);
    assertEquals(3, iter.inputColumns());
    assertEquals(4, iter.totalOutcomes());
    while (iter.hasNext()) {
        DataSet ds = iter.next();
        List<RecordMetaData> meta = ds.getExampleMetaData(RecordMetaData.class);
        DataSet fromMeta = iter.loadFromMetaData(meta);
        assertEquals(ds, fromMeta);
    }
}
Also used : RecordMetaData(org.datavec.api.records.metadata.RecordMetaData) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) DataSet(org.nd4j.linalg.dataset.DataSet) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) NumberedFileInputSplit(org.datavec.api.split.NumberedFileInputSplit) Test(org.junit.Test)

Aggregations

SequenceRecordReader (org.datavec.api.records.reader.SequenceRecordReader)14 CSVSequenceRecordReader (org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader)14 NumberedFileInputSplit (org.datavec.api.split.NumberedFileInputSplit)14 Test (org.junit.Test)14 DataSet (org.nd4j.linalg.dataset.DataSet)12 ClassPathResource (org.nd4j.linalg.io.ClassPathResource)12 INDArray (org.nd4j.linalg.api.ndarray.INDArray)9 CollectionSequenceRecordReader (org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader)7 MultiDataSet (org.nd4j.linalg.dataset.api.MultiDataSet)5 RecordMetaData (org.datavec.api.records.metadata.RecordMetaData)4 File (java.io.File)2 Path (java.nio.file.Path)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Text (org.apache.hadoop.io.Text)2 SequenceRecordReaderDataSetIterator (org.deeplearning4j.datasets.datavec.SequenceRecordReaderDataSetIterator)2 BaseSparkTest (org.deeplearning4j.spark.BaseSparkTest)2 MultiDataSetIterator (org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator)2 Tuple2 (scala.Tuple2)2 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)1