use of org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader in project deeplearning4j by deeplearning4j.
the class RecordReaderDataSetiteratorTest method testSequenceRecordReaderSingleReaderMetaData.
@Test
public void testSequenceRecordReaderSingleReaderMetaData() throws Exception {
//need to manually extract
for (int i = 0; i < 3; i++) {
new ClassPathResource(String.format("csvsequenceSingle_%d.txt", i)).getTempFileFromArchive();
}
ClassPathResource resource = new ClassPathResource("csvsequenceSingle_0.txt");
String path = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
SequenceRecordReader reader = new CSVSequenceRecordReader(1, ",");
reader.initialize(new NumberedFileInputSplit(path, 0, 2));
SequenceRecordReaderDataSetIterator iteratorClassification = new SequenceRecordReaderDataSetIterator(reader, 1, 3, 0, false);
SequenceRecordReader reader2 = new CSVSequenceRecordReader(1, ",");
reader2.initialize(new NumberedFileInputSplit(path, 0, 2));
SequenceRecordReaderDataSetIterator iteratorRegression = new SequenceRecordReaderDataSetIterator(reader2, 1, 3, 0, true);
iteratorClassification.setCollectMetaData(true);
iteratorRegression.setCollectMetaData(true);
while (iteratorClassification.hasNext()) {
DataSet ds = iteratorClassification.next();
DataSet fromMeta = iteratorClassification.loadFromMetaData(ds.getExampleMetaData(RecordMetaData.class));
assertEquals(ds, fromMeta);
}
while (iteratorRegression.hasNext()) {
DataSet ds = iteratorRegression.next();
DataSet fromMeta = iteratorRegression.loadFromMetaData(ds.getExampleMetaData(RecordMetaData.class));
assertEquals(ds, fromMeta);
}
}
use of org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader in project deeplearning4j by deeplearning4j.
the class RecordReaderDataSetiteratorTest method testSequenceRecordReaderMeta.
@Test
public void testSequenceRecordReaderMeta() throws Exception {
//need to manually extract
for (int i = 0; i < 3; i++) {
new ClassPathResource(String.format("csvsequence_%d.txt", i)).getTempFileFromArchive();
new ClassPathResource(String.format("csvsequencelabels_%d.txt", i)).getTempFileFromArchive();
}
ClassPathResource resource = new ClassPathResource("csvsequence_0.txt");
String featuresPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
resource = new ClassPathResource("csvsequencelabels_0.txt");
String labelsPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ",");
SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ",");
featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
labelReader.initialize(new NumberedFileInputSplit(labelsPath, 0, 2));
SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, 4, false);
iter.setCollectMetaData(true);
assertEquals(3, iter.inputColumns());
assertEquals(4, iter.totalOutcomes());
while (iter.hasNext()) {
DataSet ds = iter.next();
List<RecordMetaData> meta = ds.getExampleMetaData(RecordMetaData.class);
DataSet fromMeta = iter.loadFromMetaData(meta);
assertEquals(ds, fromMeta);
}
}
use of org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader in project deeplearning4j by deeplearning4j.
the class TestDataVecDataSetFunctions method testDataVecSequenceDataSetFunction.
@Test
public void testDataVecSequenceDataSetFunction() throws Exception {
JavaSparkContext sc = getContext();
//Test Spark record reader functionality vs. local
File f = new File("src/test/resources/csvsequence/csvsequence_0.txt");
String path = f.getPath();
String folder = path.substring(0, path.length() - 17);
path = folder + "*";
JavaPairRDD<String, PortableDataStream> origData = sc.binaryFiles(path);
//3 CSV sequences
assertEquals(3, origData.count());
SequenceRecordReader seqRR = new CSVSequenceRecordReader(1, ",");
SequenceRecordReaderFunction rrf = new SequenceRecordReaderFunction(seqRR);
JavaRDD<List<List<Writable>>> rdd = origData.map(rrf);
JavaRDD<DataSet> data = rdd.map(new DataVecSequenceDataSetFunction(2, -1, true, null, null));
List<DataSet> collected = data.collect();
//Load normally (i.e., not via Spark), and check that we get the same results (order not withstanding)
InputSplit is = new FileSplit(new File(folder), new String[] { "txt" }, true);
SequenceRecordReader seqRR2 = new CSVSequenceRecordReader(1, ",");
seqRR2.initialize(is);
SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(seqRR2, 1, -1, 2, true);
List<DataSet> listLocal = new ArrayList<>(3);
while (iter.hasNext()) {
listLocal.add(iter.next());
}
//Compare:
assertEquals(3, collected.size());
assertEquals(3, listLocal.size());
//Check that results are the same (order not withstanding)
boolean[] found = new boolean[3];
for (int i = 0; i < 3; i++) {
int foundIndex = -1;
DataSet ds = collected.get(i);
for (int j = 0; j < 3; j++) {
if (ds.equals(listLocal.get(j))) {
if (foundIndex != -1)
//Already found this value -> suggests this spark value equals two or more of local version? (Shouldn't happen)
fail();
foundIndex = j;
if (found[foundIndex])
//One of the other spark values was equal to this one -> suggests duplicates in Spark list
fail();
//mark this one as seen before
found[foundIndex] = true;
}
}
}
int count = 0;
for (boolean b : found) if (b)
count++;
//Expect all 3 and exactly 3 pairwise matches between spark and local versions
assertEquals(3, count);
}
Aggregations