Search in sources :

Example 6 with RecordMetaData

use of org.datavec.api.records.metadata.RecordMetaData in project deeplearning4j by deeplearning4j.

the class SequenceRecordReaderDataSetIterator method loadFromMetaData.

/**
     * Load a multiple sequence examples to a DataSet, using the provided RecordMetaData instances.
     *
     * @param list List of RecordMetaData instances to load from. Should have been produced by the record reader provided
     *             to the SequenceRecordReaderDataSetIterator constructor
     * @return DataSet with the specified examples
     * @throws IOException If an error occurs during loading of the data
     */
public DataSet loadFromMetaData(List<RecordMetaData> list) throws IOException {
    //Two cases: single vs. multiple reader...
    if (singleSequenceReaderMode) {
        List<SequenceRecord> records = recordReader.loadSequenceFromMetaData(list);
        List<INDArray> listFeatures = new ArrayList<>(list.size());
        List<INDArray> listLabels = new ArrayList<>(list.size());
        int minLength = Integer.MAX_VALUE;
        int maxLength = Integer.MIN_VALUE;
        for (SequenceRecord sr : records) {
            INDArray[] fl = getFeaturesLabelsSingleReader(sr.getSequenceRecord());
            listFeatures.add(fl[0]);
            listLabels.add(fl[1]);
            minLength = Math.min(minLength, fl[0].size(0));
            maxLength = Math.max(maxLength, fl[1].size(0));
        }
        return getSingleSequenceReader(listFeatures, listLabels, minLength, maxLength, list);
    } else {
        //Expect to get a RecordReaderMetaComposable here
        List<RecordMetaData> fMeta = new ArrayList<>();
        List<RecordMetaData> lMeta = new ArrayList<>();
        for (RecordMetaData m : list) {
            RecordMetaDataComposable m2 = (RecordMetaDataComposable) m;
            fMeta.add(m2.getMeta()[0]);
            lMeta.add(m2.getMeta()[1]);
        }
        List<SequenceRecord> f = recordReader.loadSequenceFromMetaData(fMeta);
        List<SequenceRecord> l = labelsReader.loadSequenceFromMetaData(lMeta);
        List<INDArray> featureList = new ArrayList<>(fMeta.size());
        List<INDArray> labelList = new ArrayList<>(fMeta.size());
        for (int i = 0; i < fMeta.size(); i++) {
            featureList.add(getFeatures(f.get(i).getSequenceRecord()));
            labelList.add(getLabels(l.get(i).getSequenceRecord()));
        }
        return nextMultipleSequenceReaders(featureList, labelList, list);
    }
}
Also used : RecordMetaData(org.datavec.api.records.metadata.RecordMetaData) RecordMetaDataComposable(org.datavec.api.records.metadata.RecordMetaDataComposable) SequenceRecord(org.datavec.api.records.SequenceRecord) INDArray(org.nd4j.linalg.api.ndarray.INDArray)

Example 7 with RecordMetaData

use of org.datavec.api.records.metadata.RecordMetaData in project deeplearning4j by deeplearning4j.

the class EvalTest method testEvaluationWithMetaData.

@Test
public void testEvaluationWithMetaData() throws Exception {
    RecordReader csv = new CSVRecordReader();
    csv.initialize(new FileSplit(new ClassPathResource("iris.txt").getTempFileFromArchive()));
    int batchSize = 10;
    int labelIdx = 4;
    int numClasses = 3;
    RecordReaderDataSetIterator rrdsi = new RecordReaderDataSetIterator(csv, batchSize, labelIdx, numClasses);
    NormalizerStandardize ns = new NormalizerStandardize();
    ns.fit(rrdsi);
    rrdsi.setPreProcessor(ns);
    rrdsi.reset();
    Nd4j.getRandom().setSeed(12345);
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).iterations(1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD).learningRate(0.1).list().layer(0, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(4).nOut(3).build()).pretrain(false).backprop(true).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    for (int i = 0; i < 4; i++) {
        net.fit(rrdsi);
        rrdsi.reset();
    }
    Evaluation e = new Evaluation();
    //*** New: Enable collection of metadata (stored in the DataSets) ***
    rrdsi.setCollectMetaData(true);
    while (rrdsi.hasNext()) {
        DataSet ds = rrdsi.next();
        //*** New - cross dependencies here make types difficult, usid Object internally in DataSet for this***
        List<RecordMetaData> meta = ds.getExampleMetaData(RecordMetaData.class);
        INDArray out = net.output(ds.getFeatures());
        //*** New - evaluate and also store metadata ***
        e.eval(ds.getLabels(), out, meta);
    }
    System.out.println(e.stats());
    System.out.println("\n\n*** Prediction Errors: ***");
    //*** New - get list of prediction errors from evaluation ***
    List<Prediction> errors = e.getPredictionErrors();
    List<RecordMetaData> metaForErrors = new ArrayList<>();
    for (Prediction p : errors) {
        metaForErrors.add((RecordMetaData) p.getRecordMetaData());
    }
    //*** New - dynamically load a subset of the data, just for prediction errors ***
    DataSet ds = rrdsi.loadFromMetaData(metaForErrors);
    INDArray output = net.output(ds.getFeatures());
    int count = 0;
    for (Prediction t : errors) {
        System.out.println(t + "\t\tRaw Data: " + //*** New - load subset of data from MetaData object (usually batched for efficiency) ***
        csv.loadFromMetaData((RecordMetaData) t.getRecordMetaData()).getRecord() + "\tNormalized: " + ds.getFeatureMatrix().getRow(count) + "\tLabels: " + ds.getLabels().getRow(count) + "\tNetwork predictions: " + output.getRow(count));
        count++;
    }
    int errorCount = errors.size();
    double expAcc = 1.0 - errorCount / 150.0;
    assertEquals(expAcc, e.accuracy(), 1e-5);
    ConfusionMatrix<Integer> confusion = e.getConfusionMatrix();
    int[] actualCounts = new int[3];
    int[] predictedCounts = new int[3];
    for (int i = 0; i < 3; i++) {
        for (int j = 0; j < 3; j++) {
            //(actual,predicted)
            int entry = confusion.getCount(i, j);
            List<Prediction> list = e.getPredictions(i, j);
            assertEquals(entry, list.size());
            actualCounts[i] += entry;
            predictedCounts[j] += entry;
        }
    }
    for (int i = 0; i < 3; i++) {
        List<Prediction> actualClassI = e.getPredictionsByActualClass(i);
        List<Prediction> predictedClassI = e.getPredictionByPredictedClass(i);
        assertEquals(actualCounts[i], actualClassI.size());
        assertEquals(predictedCounts[i], predictedClassI.size());
    }
}
Also used : RecordMetaData(org.datavec.api.records.metadata.RecordMetaData) DataSet(org.nd4j.linalg.dataset.DataSet) RecordReader(org.datavec.api.records.reader.RecordReader) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) Prediction(org.deeplearning4j.eval.meta.Prediction) RecordReaderDataSetIterator(org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator) FileSplit(org.datavec.api.split.FileSplit) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) NormalizerStandardize(org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 8 with RecordMetaData

use of org.datavec.api.records.metadata.RecordMetaData in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testRecordReaderMetaData.

@Test
public void testRecordReaderMetaData() throws Exception {
    RecordReader csv = new CSVRecordReader();
    csv.initialize(new FileSplit(new ClassPathResource("iris.txt").getTempFileFromArchive()));
    int batchSize = 10;
    int labelIdx = 4;
    int numClasses = 3;
    RecordReaderDataSetIterator rrdsi = new RecordReaderDataSetIterator(csv, batchSize, labelIdx, numClasses);
    rrdsi.setCollectMetaData(true);
    while (rrdsi.hasNext()) {
        DataSet ds = rrdsi.next();
        List<RecordMetaData> meta = ds.getExampleMetaData(RecordMetaData.class);
        int i = 0;
        for (RecordMetaData m : meta) {
            Record r = csv.loadFromMetaData(m);
            INDArray row = ds.getFeatureMatrix().getRow(i);
            System.out.println(m.getLocation() + "\t" + r.getRecord() + "\t" + row);
            for (int j = 0; j < 4; j++) {
                double exp = r.getRecord().get(j).toDouble();
                double act = row.getDouble(j);
                assertEquals(exp, act, 1e-6);
            }
            i++;
        }
        System.out.println();
        DataSet fromMeta = rrdsi.loadFromMetaData(meta);
        assertEquals(ds, fromMeta);
    }
}
Also used : RecordMetaData(org.datavec.api.records.metadata.RecordMetaData) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) RecordReader(org.datavec.api.records.reader.RecordReader) CollectionRecordReader(org.datavec.api.records.reader.impl.collection.CollectionRecordReader) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) Record(org.datavec.api.records.Record) FileSplit(org.datavec.api.split.FileSplit) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) Test(org.junit.Test)

Example 9 with RecordMetaData

use of org.datavec.api.records.metadata.RecordMetaData in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testSequenceRecordReaderMeta.

@Test
public void testSequenceRecordReaderMeta() throws Exception {
    //need to manually extract
    for (int i = 0; i < 3; i++) {
        new ClassPathResource(String.format("csvsequence_%d.txt", i)).getTempFileFromArchive();
        new ClassPathResource(String.format("csvsequencelabels_%d.txt", i)).getTempFileFromArchive();
    }
    ClassPathResource resource = new ClassPathResource("csvsequence_0.txt");
    String featuresPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    resource = new ClassPathResource("csvsequencelabels_0.txt");
    String labelsPath = resource.getTempFileFromArchive().getAbsolutePath().replaceAll("0", "%d");
    SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ",");
    SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ",");
    featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
    labelReader.initialize(new NumberedFileInputSplit(labelsPath, 0, 2));
    SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, 4, false);
    iter.setCollectMetaData(true);
    assertEquals(3, iter.inputColumns());
    assertEquals(4, iter.totalOutcomes());
    while (iter.hasNext()) {
        DataSet ds = iter.next();
        List<RecordMetaData> meta = ds.getExampleMetaData(RecordMetaData.class);
        DataSet fromMeta = iter.loadFromMetaData(meta);
        assertEquals(ds, fromMeta);
    }
}
Also used : RecordMetaData(org.datavec.api.records.metadata.RecordMetaData) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) DataSet(org.nd4j.linalg.dataset.DataSet) ClassPathResource(org.nd4j.linalg.io.ClassPathResource) NumberedFileInputSplit(org.datavec.api.split.NumberedFileInputSplit) Test(org.junit.Test)

Aggregations

RecordMetaData (org.datavec.api.records.metadata.RecordMetaData)9 INDArray (org.nd4j.linalg.api.ndarray.INDArray)5 DataSet (org.nd4j.linalg.dataset.DataSet)5 Record (org.datavec.api.records.Record)4 SequenceRecord (org.datavec.api.records.SequenceRecord)4 SequenceRecordReader (org.datavec.api.records.reader.SequenceRecordReader)4 RecordReader (org.datavec.api.records.reader.RecordReader)3 Test (org.junit.Test)3 ClassPathResource (org.nd4j.linalg.io.ClassPathResource)3 ArrayList (java.util.ArrayList)2 RecordMetaDataComposable (org.datavec.api.records.metadata.RecordMetaDataComposable)2 CollectionSequenceRecordReader (org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader)2 CSVRecordReader (org.datavec.api.records.reader.impl.csv.CSVRecordReader)2 CSVSequenceRecordReader (org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader)2 FileSplit (org.datavec.api.split.FileSplit)2 Writable (org.datavec.api.writable.Writable)2 NDArrayWritable (org.datavec.common.data.NDArrayWritable)2 List (java.util.List)1 RecordMetaDataComposableMap (org.datavec.api.records.metadata.RecordMetaDataComposableMap)1 CollectionRecordReader (org.datavec.api.records.reader.impl.collection.CollectionRecordReader)1