use of org.datavec.api.split.FileSplit in project deeplearning4j by deeplearning4j.
the class RecordReaderMultiDataSetIteratorTest method testsBasicMeta.
@Test
public void testsBasicMeta() throws Exception {
//As per testBasic - but also loading metadata
RecordReader rr2 = new CSVRecordReader(0, ",");
rr2.initialize(new FileSplit(new ClassPathResource("iris.txt").getTempFileFromArchive()));
RecordReaderMultiDataSetIterator rrmdsi = new RecordReaderMultiDataSetIterator.Builder(10).addReader("reader", rr2).addInput("reader", 0, 3).addOutputOneHot("reader", 4, 3).build();
rrmdsi.setCollectMetaData(true);
int count = 0;
while (rrmdsi.hasNext()) {
MultiDataSet mds = rrmdsi.next();
MultiDataSet fromMeta = rrmdsi.loadFromMetaData(mds.getExampleMetaData(RecordMetaData.class));
assertEquals(mds, fromMeta);
count++;
}
assertEquals(150 / 10, count);
}
use of org.datavec.api.split.FileSplit in project deeplearning4j by deeplearning4j.
the class RecordReaderMultiDataSetIteratorTest method testsBasic.
@Test
public void testsBasic() throws Exception {
//Load details from CSV files; single input/output -> compare to RecordReaderDataSetIterator
RecordReader rr = new CSVRecordReader(0, ",");
rr.initialize(new FileSplit(new ClassPathResource("iris.txt").getTempFileFromArchive()));
RecordReaderDataSetIterator rrdsi = new RecordReaderDataSetIterator(rr, 10, 4, 3);
RecordReader rr2 = new CSVRecordReader(0, ",");
rr2.initialize(new FileSplit(new ClassPathResource("iris.txt").getTempFileFromArchive()));
MultiDataSetIterator rrmdsi = new RecordReaderMultiDataSetIterator.Builder(10).addReader("reader", rr2).addInput("reader", 0, 3).addOutputOneHot("reader", 4, 3).build();
while (rrdsi.hasNext()) {
DataSet ds = rrdsi.next();
INDArray fds = ds.getFeatureMatrix();
INDArray lds = ds.getLabels();
MultiDataSet mds = rrmdsi.next();
assertEquals(1, mds.getFeatures().length);
assertEquals(1, mds.getLabels().length);
assertNull(mds.getFeaturesMaskArrays());
assertNull(mds.getLabelsMaskArrays());
INDArray fmds = mds.getFeatures(0);
INDArray lmds = mds.getLabels(0);
assertNotNull(fmds);
assertNotNull(lmds);
assertEquals(fds, fmds);
assertEquals(lds, lmds);
}
assertFalse(rrmdsi.hasNext());
//need to manually extract
for (int i = 0; i < 3; i++) {
new ClassPathResource(String.format("csvsequence_%d.txt", i)).getTempFileFromArchive();
new ClassPathResource(String.format("csvsequencelabels_%d.txt", i)).getTempFileFromArchive();
new ClassPathResource(String.format("csvsequencelabelsShort_%d.txt", i)).getTempFileFromArchive();
}
//Load time series from CSV sequence files; compare to SequenceRecordReaderDataSetIterator
ClassPathResource resource = new ClassPathResource("csvsequence_0.txt");
String featuresPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
resource = new ClassPathResource("csvsequencelabels_0.txt");
String labelsPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ",");
SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ",");
featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
labelReader.initialize(new NumberedFileInputSplit(labelsPath, 0, 2));
SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, 4, false);
SequenceRecordReader featureReader2 = new CSVSequenceRecordReader(1, ",");
SequenceRecordReader labelReader2 = new CSVSequenceRecordReader(1, ",");
featureReader2.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
labelReader2.initialize(new NumberedFileInputSplit(labelsPath, 0, 2));
MultiDataSetIterator srrmdsi = new RecordReaderMultiDataSetIterator.Builder(1).addSequenceReader("in", featureReader2).addSequenceReader("out", labelReader2).addInput("in").addOutputOneHot("out", 0, 4).build();
while (iter.hasNext()) {
DataSet ds = iter.next();
INDArray fds = ds.getFeatureMatrix();
INDArray lds = ds.getLabels();
MultiDataSet mds = srrmdsi.next();
assertEquals(1, mds.getFeatures().length);
assertEquals(1, mds.getLabels().length);
assertNull(mds.getFeaturesMaskArrays());
assertNull(mds.getLabelsMaskArrays());
INDArray fmds = mds.getFeatures(0);
INDArray lmds = mds.getLabels(0);
assertNotNull(fmds);
assertNotNull(lmds);
assertEquals(fds, fmds);
assertEquals(lds, lmds);
}
assertFalse(srrmdsi.hasNext());
}
use of org.datavec.api.split.FileSplit in project deeplearning4j by deeplearning4j.
the class EvalTest method testEvaluationWithMetaData.
@Test
public void testEvaluationWithMetaData() throws Exception {
RecordReader csv = new CSVRecordReader();
csv.initialize(new FileSplit(new ClassPathResource("iris.txt").getTempFileFromArchive()));
int batchSize = 10;
int labelIdx = 4;
int numClasses = 3;
RecordReaderDataSetIterator rrdsi = new RecordReaderDataSetIterator(csv, batchSize, labelIdx, numClasses);
NormalizerStandardize ns = new NormalizerStandardize();
ns.fit(rrdsi);
rrdsi.setPreProcessor(ns);
rrdsi.reset();
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).iterations(1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD).learningRate(0.1).list().layer(0, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(4).nOut(3).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
for (int i = 0; i < 4; i++) {
net.fit(rrdsi);
rrdsi.reset();
}
Evaluation e = new Evaluation();
//*** New: Enable collection of metadata (stored in the DataSets) ***
rrdsi.setCollectMetaData(true);
while (rrdsi.hasNext()) {
DataSet ds = rrdsi.next();
//*** New - cross dependencies here make types difficult, usid Object internally in DataSet for this***
List<RecordMetaData> meta = ds.getExampleMetaData(RecordMetaData.class);
INDArray out = net.output(ds.getFeatures());
//*** New - evaluate and also store metadata ***
e.eval(ds.getLabels(), out, meta);
}
System.out.println(e.stats());
System.out.println("\n\n*** Prediction Errors: ***");
//*** New - get list of prediction errors from evaluation ***
List<Prediction> errors = e.getPredictionErrors();
List<RecordMetaData> metaForErrors = new ArrayList<>();
for (Prediction p : errors) {
metaForErrors.add((RecordMetaData) p.getRecordMetaData());
}
//*** New - dynamically load a subset of the data, just for prediction errors ***
DataSet ds = rrdsi.loadFromMetaData(metaForErrors);
INDArray output = net.output(ds.getFeatures());
int count = 0;
for (Prediction t : errors) {
System.out.println(t + "\t\tRaw Data: " + //*** New - load subset of data from MetaData object (usually batched for efficiency) ***
csv.loadFromMetaData((RecordMetaData) t.getRecordMetaData()).getRecord() + "\tNormalized: " + ds.getFeatureMatrix().getRow(count) + "\tLabels: " + ds.getLabels().getRow(count) + "\tNetwork predictions: " + output.getRow(count));
count++;
}
int errorCount = errors.size();
double expAcc = 1.0 - errorCount / 150.0;
assertEquals(expAcc, e.accuracy(), 1e-5);
ConfusionMatrix<Integer> confusion = e.getConfusionMatrix();
int[] actualCounts = new int[3];
int[] predictedCounts = new int[3];
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
//(actual,predicted)
int entry = confusion.getCount(i, j);
List<Prediction> list = e.getPredictions(i, j);
assertEquals(entry, list.size());
actualCounts[i] += entry;
predictedCounts[j] += entry;
}
}
for (int i = 0; i < 3; i++) {
List<Prediction> actualClassI = e.getPredictionsByActualClass(i);
List<Prediction> predictedClassI = e.getPredictionByPredictedClass(i);
assertEquals(actualCounts[i], actualClassI.size());
assertEquals(predictedCounts[i], predictedClassI.size());
}
}
use of org.datavec.api.split.FileSplit in project deeplearning4j by deeplearning4j.
the class RecordReaderDataSetiteratorTest method testSequenceRecordReaderSingleReaderWithEmptySequenceThrows.
@Test(expected = ZeroLengthSequenceException.class)
public void testSequenceRecordReaderSingleReaderWithEmptySequenceThrows() throws Exception {
SequenceRecordReader reader = new CSVSequenceRecordReader(1, ",");
reader.initialize(new FileSplit(new ClassPathResource("empty.txt").getTempFileFromArchive()));
new SequenceRecordReaderDataSetIterator(reader, 1, -1, 1, true).next();
}
use of org.datavec.api.split.FileSplit in project deeplearning4j by deeplearning4j.
the class RecordReaderDataSetiteratorTest method testRecordReaderMetaData.
@Test
public void testRecordReaderMetaData() throws Exception {
RecordReader csv = new CSVRecordReader();
csv.initialize(new FileSplit(new ClassPathResource("iris.txt").getTempFileFromArchive()));
int batchSize = 10;
int labelIdx = 4;
int numClasses = 3;
RecordReaderDataSetIterator rrdsi = new RecordReaderDataSetIterator(csv, batchSize, labelIdx, numClasses);
rrdsi.setCollectMetaData(true);
while (rrdsi.hasNext()) {
DataSet ds = rrdsi.next();
List<RecordMetaData> meta = ds.getExampleMetaData(RecordMetaData.class);
int i = 0;
for (RecordMetaData m : meta) {
Record r = csv.loadFromMetaData(m);
INDArray row = ds.getFeatureMatrix().getRow(i);
System.out.println(m.getLocation() + "\t" + r.getRecord() + "\t" + row);
for (int j = 0; j < 4; j++) {
double exp = r.getRecord().get(j).toDouble();
double act = row.getDouble(j);
assertEquals(exp, act, 1e-6);
}
i++;
}
System.out.println();
DataSet fromMeta = rrdsi.loadFromMetaData(meta);
assertEquals(ds, fromMeta);
}
}
Aggregations