Search in sources :

Example 11 with RecordReader

use of org.datavec.api.records.reader.RecordReader in project deeplearning4j by deeplearning4j.

the class RecordReaderMultiDataSetIterator method next.

@Override
public MultiDataSet next(int num) {
    if (!hasNext())
        throw new NoSuchElementException("No next elements");
    //First: load the next values from the RR / SeqRRs
    Map<String, List<List<Writable>>> nextRRVals = new HashMap<>();
    Map<String, List<List<List<Writable>>>> nextSeqRRVals = new HashMap<>();
    List<RecordMetaDataComposableMap> nextMetas = (collectMetaData ? new ArrayList<RecordMetaDataComposableMap>() : null);
    for (Map.Entry<String, RecordReader> entry : recordReaders.entrySet()) {
        RecordReader rr = entry.getValue();
        List<List<Writable>> writables = new ArrayList<>(num);
        for (int i = 0; i < num && rr.hasNext(); i++) {
            List<Writable> record;
            if (collectMetaData) {
                Record r = rr.nextRecord();
                record = r.getRecord();
                if (nextMetas.size() <= i) {
                    nextMetas.add(new RecordMetaDataComposableMap(new HashMap<String, RecordMetaData>()));
                }
                RecordMetaDataComposableMap map = nextMetas.get(i);
                map.getMeta().put(entry.getKey(), r.getMetaData());
            } else {
                record = rr.next();
            }
            writables.add(record);
        }
        nextRRVals.put(entry.getKey(), writables);
    }
    for (Map.Entry<String, SequenceRecordReader> entry : sequenceRecordReaders.entrySet()) {
        SequenceRecordReader rr = entry.getValue();
        List<List<List<Writable>>> writables = new ArrayList<>(num);
        for (int i = 0; i < num && rr.hasNext(); i++) {
            List<List<Writable>> sequence;
            if (collectMetaData) {
                SequenceRecord r = rr.nextSequence();
                sequence = r.getSequenceRecord();
                if (nextMetas.size() <= i) {
                    nextMetas.add(new RecordMetaDataComposableMap(new HashMap<String, RecordMetaData>()));
                }
                RecordMetaDataComposableMap map = nextMetas.get(i);
                map.getMeta().put(entry.getKey(), r.getMetaData());
            } else {
                sequence = rr.sequenceRecord();
            }
            writables.add(sequence);
        }
        nextSeqRRVals.put(entry.getKey(), writables);
    }
    return nextMultiDataSet(nextRRVals, nextSeqRRVals, nextMetas);
}
Also used : SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) RecordReader(org.datavec.api.records.reader.RecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) NDArrayWritable(org.datavec.common.data.NDArrayWritable) Writable(org.datavec.api.writable.Writable) SequenceRecord(org.datavec.api.records.SequenceRecord) SequenceRecord(org.datavec.api.records.SequenceRecord) Record(org.datavec.api.records.Record) RecordMetaDataComposableMap(org.datavec.api.records.metadata.RecordMetaDataComposableMap) RecordMetaDataComposableMap(org.datavec.api.records.metadata.RecordMetaDataComposableMap)

Example 12 with RecordReader

use of org.datavec.api.records.reader.RecordReader in project deeplearning4j by deeplearning4j.

the class RecordReaderMultiDataSetIteratorTest method testInputValidation.

@Test
public void testInputValidation() {
    //Test: no readers
    try {
        MultiDataSetIterator r = new RecordReaderMultiDataSetIterator.Builder(1).addInput("something").addOutput("something").build();
        fail("Should have thrown exception");
    } catch (Exception e) {
    }
    //Test: reference to reader that doesn't exist
    try {
        RecordReader rr = new CSVRecordReader(0, ",");
        rr.initialize(new FileSplit(new ClassPathResource("iris.txt").getTempFileFromArchive()));
        MultiDataSetIterator r = new RecordReaderMultiDataSetIterator.Builder(1).addReader("iris", rr).addInput("thisDoesntExist", 0, 3).addOutputOneHot("iris", 4, 3).build();
        fail("Should have thrown exception");
    } catch (Exception e) {
    }
    //Test: no inputs or outputs
    try {
        RecordReader rr = new CSVRecordReader(0, ",");
        rr.initialize(new FileSplit(new ClassPathResource("iris.txt").getTempFileFromArchive()));
        MultiDataSetIterator r = new RecordReaderMultiDataSetIterator.Builder(1).addReader("iris", rr).build();
        fail("Should have thrown exception");
    } catch (Exception e) {
    }
}
Also used : MultiDataSetIterator(org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator) RecordReader(org.datavec.api.records.reader.RecordReader) ImageRecordReader(org.datavec.image.recordreader.ImageRecordReader) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) FileSplit(org.datavec.api.split.FileSplit) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) Test(org.junit.Test)

Example 13 with RecordReader

use of org.datavec.api.records.reader.RecordReader in project deeplearning4j by deeplearning4j.

the class RecordReaderMultiDataSetIteratorTest method testsBasicMeta.

@Test
public void testsBasicMeta() throws Exception {
    //As per testBasic - but also loading metadata
    RecordReader rr2 = new CSVRecordReader(0, ",");
    rr2.initialize(new FileSplit(new ClassPathResource("iris.txt").getTempFileFromArchive()));
    RecordReaderMultiDataSetIterator rrmdsi = new RecordReaderMultiDataSetIterator.Builder(10).addReader("reader", rr2).addInput("reader", 0, 3).addOutputOneHot("reader", 4, 3).build();
    rrmdsi.setCollectMetaData(true);
    int count = 0;
    while (rrmdsi.hasNext()) {
        MultiDataSet mds = rrmdsi.next();
        MultiDataSet fromMeta = rrmdsi.loadFromMetaData(mds.getExampleMetaData(RecordMetaData.class));
        assertEquals(mds, fromMeta);
        count++;
    }
    assertEquals(150 / 10, count);
}
Also used : RecordMetaData(org.datavec.api.records.metadata.RecordMetaData) MultiDataSet(org.nd4j.linalg.dataset.api.MultiDataSet) RecordReader(org.datavec.api.records.reader.RecordReader) ImageRecordReader(org.datavec.image.recordreader.ImageRecordReader) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) FileSplit(org.datavec.api.split.FileSplit) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) Test(org.junit.Test)

Example 14 with RecordReader

use of org.datavec.api.records.reader.RecordReader in project deeplearning4j by deeplearning4j.

the class RecordReaderMultiDataSetIteratorTest method testsBasic.

@Test
public void testsBasic() throws Exception {
    //Load details from CSV files; single input/output -> compare to RecordReaderDataSetIterator
    RecordReader rr = new CSVRecordReader(0, ",");
    rr.initialize(new FileSplit(new ClassPathResource("iris.txt").getTempFileFromArchive()));
    RecordReaderDataSetIterator rrdsi = new RecordReaderDataSetIterator(rr, 10, 4, 3);
    RecordReader rr2 = new CSVRecordReader(0, ",");
    rr2.initialize(new FileSplit(new ClassPathResource("iris.txt").getTempFileFromArchive()));
    MultiDataSetIterator rrmdsi = new RecordReaderMultiDataSetIterator.Builder(10).addReader("reader", rr2).addInput("reader", 0, 3).addOutputOneHot("reader", 4, 3).build();
    while (rrdsi.hasNext()) {
        DataSet ds = rrdsi.next();
        INDArray fds = ds.getFeatureMatrix();
        INDArray lds = ds.getLabels();
        MultiDataSet mds = rrmdsi.next();
        assertEquals(1, mds.getFeatures().length);
        assertEquals(1, mds.getLabels().length);
        assertNull(mds.getFeaturesMaskArrays());
        assertNull(mds.getLabelsMaskArrays());
        INDArray fmds = mds.getFeatures(0);
        INDArray lmds = mds.getLabels(0);
        assertNotNull(fmds);
        assertNotNull(lmds);
        assertEquals(fds, fmds);
        assertEquals(lds, lmds);
    }
    assertFalse(rrmdsi.hasNext());
    //need to manually extract
    for (int i = 0; i < 3; i++) {
        new ClassPathResource(String.format("csvsequence_%d.txt", i)).getTempFileFromArchive();
        new ClassPathResource(String.format("csvsequencelabels_%d.txt", i)).getTempFileFromArchive();
        new ClassPathResource(String.format("csvsequencelabelsShort_%d.txt", i)).getTempFileFromArchive();
    }
    //Load time series from CSV sequence files; compare to SequenceRecordReaderDataSetIterator
    ClassPathResource resource = new ClassPathResource("csvsequence_0.txt");
    String featuresPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    resource = new ClassPathResource("csvsequencelabels_0.txt");
    String labelsPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ",");
    SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ",");
    featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
    labelReader.initialize(new NumberedFileInputSplit(labelsPath, 0, 2));
    SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, 4, false);
    SequenceRecordReader featureReader2 = new CSVSequenceRecordReader(1, ",");
    SequenceRecordReader labelReader2 = new CSVSequenceRecordReader(1, ",");
    featureReader2.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
    labelReader2.initialize(new NumberedFileInputSplit(labelsPath, 0, 2));
    MultiDataSetIterator srrmdsi = new RecordReaderMultiDataSetIterator.Builder(1).addSequenceReader("in", featureReader2).addSequenceReader("out", labelReader2).addInput("in").addOutputOneHot("out", 0, 4).build();
    while (iter.hasNext()) {
        DataSet ds = iter.next();
        INDArray fds = ds.getFeatureMatrix();
        INDArray lds = ds.getLabels();
        MultiDataSet mds = srrmdsi.next();
        assertEquals(1, mds.getFeatures().length);
        assertEquals(1, mds.getLabels().length);
        assertNull(mds.getFeaturesMaskArrays());
        assertNull(mds.getLabelsMaskArrays());
        INDArray fmds = mds.getFeatures(0);
        INDArray lmds = mds.getLabels(0);
        assertNotNull(fmds);
        assertNotNull(lmds);
        assertEquals(fds, fmds);
        assertEquals(lds, lmds);
    }
    assertFalse(srrmdsi.hasNext());
}
Also used : CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) DataSet(org.nd4j.linalg.dataset.DataSet) MultiDataSet(org.nd4j.linalg.dataset.api.MultiDataSet) RecordReader(org.datavec.api.records.reader.RecordReader) ImageRecordReader(org.datavec.image.recordreader.ImageRecordReader) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) FileSplit(org.datavec.api.split.FileSplit) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) NumberedFileInputSplit(org.datavec.api.split.NumberedFileInputSplit) MultiDataSetIterator(org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) MultiDataSet(org.nd4j.linalg.dataset.api.MultiDataSet) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) Test(org.junit.Test)

Example 15 with RecordReader

use of org.datavec.api.records.reader.RecordReader in project deeplearning4j by deeplearning4j.

the class EvalTest method testEvaluationWithMetaData.

@Test
public void testEvaluationWithMetaData() throws Exception {
    RecordReader csv = new CSVRecordReader();
    csv.initialize(new FileSplit(new ClassPathResource("iris.txt").getTempFileFromArchive()));
    int batchSize = 10;
    int labelIdx = 4;
    int numClasses = 3;
    RecordReaderDataSetIterator rrdsi = new RecordReaderDataSetIterator(csv, batchSize, labelIdx, numClasses);
    NormalizerStandardize ns = new NormalizerStandardize();
    ns.fit(rrdsi);
    rrdsi.setPreProcessor(ns);
    rrdsi.reset();
    Nd4j.getRandom().setSeed(12345);
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).iterations(1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD).learningRate(0.1).list().layer(0, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(4).nOut(3).build()).pretrain(false).backprop(true).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    for (int i = 0; i < 4; i++) {
        net.fit(rrdsi);
        rrdsi.reset();
    }
    Evaluation e = new Evaluation();
    //*** New: Enable collection of metadata (stored in the DataSets) ***
    rrdsi.setCollectMetaData(true);
    while (rrdsi.hasNext()) {
        DataSet ds = rrdsi.next();
        //*** New - cross dependencies here make types difficult, usid Object internally in DataSet for this***
        List<RecordMetaData> meta = ds.getExampleMetaData(RecordMetaData.class);
        INDArray out = net.output(ds.getFeatures());
        //*** New - evaluate and also store metadata ***
        e.eval(ds.getLabels(), out, meta);
    }
    System.out.println(e.stats());
    System.out.println("\n\n*** Prediction Errors: ***");
    //*** New - get list of prediction errors from evaluation ***
    List<Prediction> errors = e.getPredictionErrors();
    List<RecordMetaData> metaForErrors = new ArrayList<>();
    for (Prediction p : errors) {
        metaForErrors.add((RecordMetaData) p.getRecordMetaData());
    }
    //*** New - dynamically load a subset of the data, just for prediction errors ***
    DataSet ds = rrdsi.loadFromMetaData(metaForErrors);
    INDArray output = net.output(ds.getFeatures());
    int count = 0;
    for (Prediction t : errors) {
        System.out.println(t + "\t\tRaw Data: " + //*** New - load subset of data from MetaData object (usually batched for efficiency) ***
        csv.loadFromMetaData((RecordMetaData) t.getRecordMetaData()).getRecord() + "\tNormalized: " + ds.getFeatureMatrix().getRow(count) + "\tLabels: " + ds.getLabels().getRow(count) + "\tNetwork predictions: " + output.getRow(count));
        count++;
    }
    int errorCount = errors.size();
    double expAcc = 1.0 - errorCount / 150.0;
    assertEquals(expAcc, e.accuracy(), 1e-5);
    ConfusionMatrix<Integer> confusion = e.getConfusionMatrix();
    int[] actualCounts = new int[3];
    int[] predictedCounts = new int[3];
    for (int i = 0; i < 3; i++) {
        for (int j = 0; j < 3; j++) {
            //(actual,predicted)
            int entry = confusion.getCount(i, j);
            List<Prediction> list = e.getPredictions(i, j);
            assertEquals(entry, list.size());
            actualCounts[i] += entry;
            predictedCounts[j] += entry;
        }
    }
    for (int i = 0; i < 3; i++) {
        List<Prediction> actualClassI = e.getPredictionsByActualClass(i);
        List<Prediction> predictedClassI = e.getPredictionByPredictedClass(i);
        assertEquals(actualCounts[i], actualClassI.size());
        assertEquals(predictedCounts[i], predictedClassI.size());
    }
}
Also used : RecordMetaData(org.datavec.api.records.metadata.RecordMetaData) DataSet(org.nd4j.linalg.dataset.DataSet) RecordReader(org.datavec.api.records.reader.RecordReader) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) Prediction(org.deeplearning4j.eval.meta.Prediction) RecordReaderDataSetIterator(org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator) FileSplit(org.datavec.api.split.FileSplit) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) NormalizerStandardize(org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Aggregations

RecordReader (org.datavec.api.records.reader.RecordReader)21 Test (org.junit.Test)18 CSVRecordReader (org.datavec.api.records.reader.impl.csv.CSVRecordReader)17 FileSplit (org.datavec.api.split.FileSplit)17 SequenceRecordReader (org.datavec.api.records.reader.SequenceRecordReader)13 DataSet (org.nd4j.linalg.dataset.DataSet)13 ClassPathResource (org.nd4j.linalg.io.ClassPathResource)12 CSVSequenceRecordReader (org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader)11 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)10 CollectionRecordReader (org.datavec.api.records.reader.impl.collection.CollectionRecordReader)7 INDArray (org.nd4j.linalg.api.ndarray.INDArray)7 CollectionSequenceRecordReader (org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader)6 ImageRecordReader (org.datavec.image.recordreader.ImageRecordReader)6 RecordReaderDataSetIterator (org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator)6 RecordMetaData (org.datavec.api.records.metadata.RecordMetaData)5 MultiDataSet (org.nd4j.linalg.dataset.api.MultiDataSet)5 MultiDataSetIterator (org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator)5 ClassPathResource (org.datavec.api.util.ClassPathResource)4 Record (org.datavec.api.records.Record)3 NDArrayWritable (org.datavec.common.data.NDArrayWritable)3