Search in sources :

Example 21 with SequenceRecordReader

use of org.datavec.api.records.reader.SequenceRecordReader in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testSeqRRDSIArrayWritableTwoReaders.

@Test
public void testSeqRRDSIArrayWritableTwoReaders() {
    List<List<Writable>> sequence1 = new ArrayList<>();
    sequence1.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 1, 2, 3 })), new IntWritable(100)));
    sequence1.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 4, 5, 6 })), new IntWritable(200)));
    List<List<Writable>> sequence2 = new ArrayList<>();
    sequence2.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 7, 8, 9 })), new IntWritable(300)));
    sequence2.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 10, 11, 12 })), new IntWritable(400)));
    SequenceRecordReader rrFeatures = new CollectionSequenceRecordReader(Arrays.asList(sequence1, sequence2));
    List<List<Writable>> sequence1L = new ArrayList<>();
    sequence1L.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 100, 200, 300 })), new IntWritable(101)));
    sequence1L.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 400, 500, 600 })), new IntWritable(201)));
    List<List<Writable>> sequence2L = new ArrayList<>();
    sequence2L.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 700, 800, 900 })), new IntWritable(301)));
    sequence2L.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 1000, 1100, 1200 })), new IntWritable(401)));
    SequenceRecordReader rrLabels = new CollectionSequenceRecordReader(Arrays.asList(sequence1L, sequence2L));
    SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(rrFeatures, rrLabels, 2, -1, true);
    //2 examples, 4 values per time step, 2 time steps
    INDArray expFeatures = Nd4j.create(2, 4, 2);
    expFeatures.tensorAlongDimension(0, 1, 2).assign(Nd4j.create(new double[][] { { 1, 4 }, { 2, 5 }, { 3, 6 }, { 100, 200 } }));
    expFeatures.tensorAlongDimension(1, 1, 2).assign(Nd4j.create(new double[][] { { 7, 10 }, { 8, 11 }, { 9, 12 }, { 300, 400 } }));
    INDArray expLabels = Nd4j.create(2, 4, 2);
    expLabels.tensorAlongDimension(0, 1, 2).assign(Nd4j.create(new double[][] { { 100, 400 }, { 200, 500 }, { 300, 600 }, { 101, 201 } }));
    expLabels.tensorAlongDimension(1, 1, 2).assign(Nd4j.create(new double[][] { { 700, 1000 }, { 800, 1100 }, { 900, 1200 }, { 301, 401 } }));
    DataSet ds = iter.next();
    assertEquals(expFeatures, ds.getFeatureMatrix());
    assertEquals(expLabels, ds.getLabels());
}
Also used : NDArrayWritable(org.datavec.common.data.NDArrayWritable) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) IntWritable(org.datavec.api.writable.IntWritable) NDArrayWritable(org.datavec.common.data.NDArrayWritable) DoubleWritable(org.datavec.api.writable.DoubleWritable) Writable(org.datavec.api.writable.Writable) IntWritable(org.datavec.api.writable.IntWritable) Test(org.junit.Test)

Example 22 with SequenceRecordReader

use of org.datavec.api.records.reader.SequenceRecordReader in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testSequenceRecordReaderSingleReaderMetaData.

@Test
public void testSequenceRecordReaderSingleReaderMetaData() throws Exception {
    //need to manually extract
    for (int i = 0; i < 3; i++) {
        new ClassPathResource(String.format("csvsequenceSingle_%d.txt", i)).getTempFileFromArchive();
    }
    ClassPathResource resource = new ClassPathResource("csvsequenceSingle_0.txt");
    String path = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    SequenceRecordReader reader = new CSVSequenceRecordReader(1, ",");
    reader.initialize(new NumberedFileInputSplit(path, 0, 2));
    SequenceRecordReaderDataSetIterator iteratorClassification = new SequenceRecordReaderDataSetIterator(reader, 1, 3, 0, false);
    SequenceRecordReader reader2 = new CSVSequenceRecordReader(1, ",");
    reader2.initialize(new NumberedFileInputSplit(path, 0, 2));
    SequenceRecordReaderDataSetIterator iteratorRegression = new SequenceRecordReaderDataSetIterator(reader2, 1, 3, 0, true);
    iteratorClassification.setCollectMetaData(true);
    iteratorRegression.setCollectMetaData(true);
    while (iteratorClassification.hasNext()) {
        DataSet ds = iteratorClassification.next();
        DataSet fromMeta = iteratorClassification.loadFromMetaData(ds.getExampleMetaData(RecordMetaData.class));
        assertEquals(ds, fromMeta);
    }
    while (iteratorRegression.hasNext()) {
        DataSet ds = iteratorRegression.next();
        DataSet fromMeta = iteratorRegression.loadFromMetaData(ds.getExampleMetaData(RecordMetaData.class));
        assertEquals(ds, fromMeta);
    }
}
Also used : RecordMetaData(org.datavec.api.records.metadata.RecordMetaData) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) DataSet(org.nd4j.linalg.dataset.DataSet) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) NumberedFileInputSplit(org.datavec.api.split.NumberedFileInputSplit) Test(org.junit.Test)

Example 23 with SequenceRecordReader

use of org.datavec.api.records.reader.SequenceRecordReader in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testSequenceRecordReaderMeta.

@Test
public void testSequenceRecordReaderMeta() throws Exception {
    //need to manually extract
    for (int i = 0; i < 3; i++) {
        new ClassPathResource(String.format("csvsequence_%d.txt", i)).getTempFileFromArchive();
        new ClassPathResource(String.format("csvsequencelabels_%d.txt", i)).getTempFileFromArchive();
    }
    ClassPathResource resource = new ClassPathResource("csvsequence_0.txt");
    String featuresPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    resource = new ClassPathResource("csvsequencelabels_0.txt");
    String labelsPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ",");
    SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ",");
    featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
    labelReader.initialize(new NumberedFileInputSplit(labelsPath, 0, 2));
    SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, 4, false);
    iter.setCollectMetaData(true);
    assertEquals(3, iter.inputColumns());
    assertEquals(4, iter.totalOutcomes());
    while (iter.hasNext()) {
        DataSet ds = iter.next();
        List<RecordMetaData> meta = ds.getExampleMetaData(RecordMetaData.class);
        DataSet fromMeta = iter.loadFromMetaData(meta);
        assertEquals(ds, fromMeta);
    }
}
Also used : RecordMetaData(org.datavec.api.records.metadata.RecordMetaData) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) DataSet(org.nd4j.linalg.dataset.DataSet) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) NumberedFileInputSplit(org.datavec.api.split.NumberedFileInputSplit) Test(org.junit.Test)

Example 24 with SequenceRecordReader

use of org.datavec.api.records.reader.SequenceRecordReader in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testSeqRRDSIArrayWritableOneReader.

@Test
public void testSeqRRDSIArrayWritableOneReader() {
    List<List<Writable>> sequence1 = new ArrayList<>();
    sequence1.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 1, 2, 3 })), new IntWritable(0)));
    sequence1.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 4, 5, 6 })), new IntWritable(1)));
    List<List<Writable>> sequence2 = new ArrayList<>();
    sequence2.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 7, 8, 9 })), new IntWritable(2)));
    sequence2.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 10, 11, 12 })), new IntWritable(3)));
    SequenceRecordReader rr = new CollectionSequenceRecordReader(Arrays.asList(sequence1, sequence2));
    SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(rr, 2, 4, 1, false);
    DataSet ds = iter.next();
    //2 examples, 3 values per time step, 2 time steps
    INDArray expFeatures = Nd4j.create(2, 3, 2);
    expFeatures.tensorAlongDimension(0, 1, 2).assign(Nd4j.create(new double[][] { { 1, 4 }, { 2, 5 }, { 3, 6 } }));
    expFeatures.tensorAlongDimension(1, 1, 2).assign(Nd4j.create(new double[][] { { 7, 10 }, { 8, 11 }, { 9, 12 } }));
    INDArray expLabels = Nd4j.create(2, 4, 2);
    expLabels.tensorAlongDimension(0, 1, 2).assign(Nd4j.create(new double[][] { { 1, 0 }, { 0, 1 }, { 0, 0 }, { 0, 0 } }));
    expLabels.tensorAlongDimension(1, 1, 2).assign(Nd4j.create(new double[][] { { 0, 0 }, { 0, 0 }, { 1, 0 }, { 0, 1 } }));
    assertEquals(expFeatures, ds.getFeatureMatrix());
    assertEquals(expLabels, ds.getLabels());
}
Also used : NDArrayWritable(org.datavec.common.data.NDArrayWritable) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) IntWritable(org.datavec.api.writable.IntWritable) NDArrayWritable(org.datavec.common.data.NDArrayWritable) DoubleWritable(org.datavec.api.writable.DoubleWritable) Writable(org.datavec.api.writable.Writable) IntWritable(org.datavec.api.writable.IntWritable) Test(org.junit.Test)

Example 25 with SequenceRecordReader

use of org.datavec.api.records.reader.SequenceRecordReader in project deeplearning4j by deeplearning4j.

the class TestDataVecDataSetFunctions method testDataVecSequenceDataSetFunction.

@Test
public void testDataVecSequenceDataSetFunction() throws Exception {
    JavaSparkContext sc = getContext();
    //Test Spark record reader functionality vs. local
    File f = new File("src/test/resources/csvsequence/csvsequence_0.txt");
    String path = f.getPath();
    String folder = path.substring(0, path.length() - 17);
    path = folder + "*";
    JavaPairRDD<String, PortableDataStream> origData = sc.binaryFiles(path);
    //3 CSV sequences
    assertEquals(3, origData.count());
    SequenceRecordReader seqRR = new CSVSequenceRecordReader(1, ",");
    SequenceRecordReaderFunction rrf = new SequenceRecordReaderFunction(seqRR);
    JavaRDD<List<List<Writable>>> rdd = origData.map(rrf);
    JavaRDD<DataSet> data = rdd.map(new DataVecSequenceDataSetFunction(2, -1, true, null, null));
    List<DataSet> collected = data.collect();
    //Load normally (i.e., not via Spark), and check that we get the same results (order not withstanding)
    InputSplit is = new FileSplit(new File(folder), new String[] { "txt" }, true);
    SequenceRecordReader seqRR2 = new CSVSequenceRecordReader(1, ",");
    seqRR2.initialize(is);
    SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(seqRR2, 1, -1, 2, true);
    List<DataSet> listLocal = new ArrayList<>(3);
    while (iter.hasNext()) {
        listLocal.add(iter.next());
    }
    //Compare:
    assertEquals(3, collected.size());
    assertEquals(3, listLocal.size());
    //Check that results are the same (order not withstanding)
    boolean[] found = new boolean[3];
    for (int i = 0; i < 3; i++) {
        int foundIndex = -1;
        DataSet ds = collected.get(i);
        for (int j = 0; j < 3; j++) {
            if (ds.equals(listLocal.get(j))) {
                if (foundIndex != -1)
                    //Already found this value -> suggests this spark value equals two or more of local version? (Shouldn't happen)
                    fail();
                foundIndex = j;
                if (found[foundIndex])
                    //One of the other spark values was equal to this one -> suggests duplicates in Spark list
                    fail();
                //mark this one as seen before
                found[foundIndex] = true;
            }
        }
    }
    int count = 0;
    for (boolean b : found) if (b)
        count++;
    //Expect all 3 and exactly 3 pairwise matches between spark and local versions
    assertEquals(3, count);
}
Also used : CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) SequenceRecordReaderFunction(org.datavec.spark.functions.SequenceRecordReaderFunction) DataSet(org.nd4j.linalg.dataset.DataSet) SequenceRecordReaderDataSetIterator(org.deeplearning4j.datasets.datavec.SequenceRecordReaderDataSetIterator) ArrayList(java.util.ArrayList) PortableDataStream(org.apache.spark.input.PortableDataStream) Writable(org.datavec.api.writable.Writable) FileSplit(org.datavec.api.split.FileSplit) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) ArrayList(java.util.ArrayList) List(java.util.List) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) File(java.io.File) NumberedFileInputSplit(org.datavec.api.split.NumberedFileInputSplit) InputSplit(org.datavec.api.split.InputSplit) BaseSparkTest(org.deeplearning4j.spark.BaseSparkTest) Test(org.junit.Test)

Aggregations

SequenceRecordReader (org.datavec.api.records.reader.SequenceRecordReader)25 CSVSequenceRecordReader (org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader)22 Test (org.junit.Test)22 DataSet (org.nd4j.linalg.dataset.DataSet)18 NumberedFileInputSplit (org.datavec.api.split.NumberedFileInputSplit)15 ClassPathResource (org.nd4j.linalg.io.ClassPathResource)15 CollectionSequenceRecordReader (org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader)14 INDArray (org.nd4j.linalg.api.ndarray.INDArray)13 Writable (org.datavec.api.writable.Writable)8 NDArrayWritable (org.datavec.common.data.NDArrayWritable)7 RecordMetaData (org.datavec.api.records.metadata.RecordMetaData)6 FileSplit (org.datavec.api.split.FileSplit)5 MultiDataSet (org.nd4j.linalg.dataset.api.MultiDataSet)5 ArrayList (java.util.ArrayList)4 List (java.util.List)4 DoubleWritable (org.datavec.api.writable.DoubleWritable)4 IntWritable (org.datavec.api.writable.IntWritable)4 File (java.io.File)3 Record (org.datavec.api.records.Record)3 RecordReader (org.datavec.api.records.reader.RecordReader)3