Search in sources :

Example 1 with NDArrayWritable

use of org.datavec.common.data.NDArrayWritable in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testSeqRRDSIMultipleArrayWritablesOneReader.

@Test
public void testSeqRRDSIMultipleArrayWritablesOneReader() {
    //Input with multiple array writables:
    List<List<Writable>> sequence1 = new ArrayList<>();
    sequence1.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 1, 2, 3 })), new NDArrayWritable(Nd4j.create(new double[] { 100, 200, 300 })), new IntWritable(0)));
    sequence1.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 4, 5, 6 })), new NDArrayWritable(Nd4j.create(new double[] { 400, 500, 600 })), new IntWritable(1)));
    List<List<Writable>> sequence2 = new ArrayList<>();
    sequence2.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 7, 8, 9 })), new NDArrayWritable(Nd4j.create(new double[] { 700, 800, 900 })), new IntWritable(2)));
    sequence2.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 10, 11, 12 })), new NDArrayWritable(Nd4j.create(new double[] { 1000, 1100, 1200 })), new IntWritable(3)));
    SequenceRecordReader rr = new CollectionSequenceRecordReader(Arrays.asList(sequence1, sequence2));
    SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(rr, 2, 4, 2, false);
    DataSet ds = iter.next();
    //2 examples, 6 values per time step, 2 time steps
    INDArray expFeatures = Nd4j.create(2, 6, 2);
    expFeatures.tensorAlongDimension(0, 1, 2).assign(Nd4j.create(new double[][] { { 1, 4 }, { 2, 5 }, { 3, 6 }, { 100, 400 }, { 200, 500 }, { 300, 600 } }));
    expFeatures.tensorAlongDimension(1, 1, 2).assign(Nd4j.create(new double[][] { { 7, 10 }, { 8, 11 }, { 9, 12 }, { 700, 1000 }, { 800, 1100 }, { 900, 1200 } }));
    INDArray expLabels = Nd4j.create(2, 4, 2);
    expLabels.tensorAlongDimension(0, 1, 2).assign(Nd4j.create(new double[][] { { 1, 0 }, { 0, 1 }, { 0, 0 }, { 0, 0 } }));
    expLabels.tensorAlongDimension(1, 1, 2).assign(Nd4j.create(new double[][] { { 0, 0 }, { 0, 0 }, { 1, 0 }, { 0, 1 } }));
    assertEquals(expFeatures, ds.getFeatureMatrix());
    assertEquals(expLabels, ds.getLabels());
}
Also used : NDArrayWritable(org.datavec.common.data.NDArrayWritable) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) IntWritable(org.datavec.api.writable.IntWritable) NDArrayWritable(org.datavec.common.data.NDArrayWritable) DoubleWritable(org.datavec.api.writable.DoubleWritable) Writable(org.datavec.api.writable.Writable) IntWritable(org.datavec.api.writable.IntWritable) Test(org.junit.Test)

Example 2 with NDArrayWritable

use of org.datavec.common.data.NDArrayWritable in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testRecordReaderDataSetIteratorNDArrayWritableLabels.

@Test
public void testRecordReaderDataSetIteratorNDArrayWritableLabels() {
    Collection<Collection<Writable>> data = new ArrayList<>();
    data.add(Arrays.<Writable>asList(new DoubleWritable(0), new DoubleWritable(1), new NDArrayWritable(Nd4j.create(new double[] { 1.1, 2.1, 3.1 }))));
    data.add(Arrays.<Writable>asList(new DoubleWritable(2), new DoubleWritable(3), new NDArrayWritable(Nd4j.create(new double[] { 4.1, 5.1, 6.1 }))));
    data.add(Arrays.<Writable>asList(new DoubleWritable(4), new DoubleWritable(5), new NDArrayWritable(Nd4j.create(new double[] { 7.1, 8.1, 9.1 }))));
    RecordReader rr = new CollectionRecordReader(data);
    int batchSize = 3;
    int labelIndexFrom = 2;
    int labelIndexTo = 2;
    boolean regression = true;
    DataSetIterator rrdsi = new RecordReaderDataSetIterator(rr, batchSize, labelIndexFrom, labelIndexTo, regression);
    DataSet ds = rrdsi.next();
    INDArray expFeatures = Nd4j.create(new double[][] { { 0, 1 }, { 2, 3 }, { 4, 5 } });
    INDArray expLabels = Nd4j.create(new double[][] { { 1.1, 2.1, 3.1 }, { 4.1, 5.1, 6.1 }, { 7.1, 8.1, 9.1 } });
    assertEquals(expFeatures, ds.getFeatures());
    assertEquals(expLabels, ds.getLabels());
    //ALSO: test if we have NDArrayWritables for BOTH the features and the labels
    data = new ArrayList<>();
    data.add(Arrays.<Writable>asList(new NDArrayWritable(Nd4j.create(new double[] { 0, 1 })), new NDArrayWritable(Nd4j.create(new double[] { 1.1, 2.1, 3.1 }))));
    data.add(Arrays.<Writable>asList(new NDArrayWritable(Nd4j.create(new double[] { 2, 3 })), new NDArrayWritable(Nd4j.create(new double[] { 4.1, 5.1, 6.1 }))));
    data.add(Arrays.<Writable>asList(new NDArrayWritable(Nd4j.create(new double[] { 4, 5 })), new NDArrayWritable(Nd4j.create(new double[] { 7.1, 8.1, 9.1 }))));
    rr = new CollectionRecordReader(data);
    rrdsi = new RecordReaderDataSetIterator(rr, batchSize, labelIndexFrom, labelIndexTo, regression);
    ds = rrdsi.next();
    assertEquals(expFeatures, ds.getFeatures());
    assertEquals(expLabels, ds.getLabels());
}
Also used : DataSet(org.nd4j.linalg.dataset.DataSet) RecordReader(org.datavec.api.records.reader.RecordReader) CollectionRecordReader(org.datavec.api.records.reader.impl.collection.CollectionRecordReader) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) CSVRecordReader(org.datavec.api.records.reader.impl.csv.CSVRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) CollectionRecordReader(org.datavec.api.records.reader.impl.collection.CollectionRecordReader) DoubleWritable(org.datavec.api.writable.DoubleWritable) NDArrayWritable(org.datavec.common.data.NDArrayWritable) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test)

Example 3 with NDArrayWritable

use of org.datavec.common.data.NDArrayWritable in project deeplearning4j by deeplearning4j.

the class DataVecSequenceDataSetFunction method call.

@Override
public DataSet call(List<List<Writable>> input) throws Exception {
    Iterator<List<Writable>> iter = input.iterator();
    INDArray features = null;
    INDArray labels = Nd4j.zeros(1, (regression ? 1 : numPossibleLabels), input.size());
    int[] fIdx = new int[3];
    int[] lIdx = new int[3];
    int i = 0;
    while (iter.hasNext()) {
        List<Writable> step = iter.next();
        if (i == 0) {
            features = Nd4j.zeros(1, step.size() - 1, input.size());
        }
        Iterator<Writable> timeStepIter = step.iterator();
        int countIn = 0;
        int countFeatures = 0;
        while (timeStepIter.hasNext()) {
            Writable current = timeStepIter.next();
            if (converter != null)
                current = converter.convert(current);
            if (countIn++ == labelIndex) {
                //label
                if (regression) {
                    lIdx[2] = i;
                    labels.putScalar(lIdx, current.toDouble());
                } else {
                    INDArray line = FeatureUtil.toOutcomeVector(current.toInt(), numPossibleLabels);
                    //1d from [1,nOut,timeSeriesLength] -> tensor i along dimension 1 is at time i
                    labels.tensorAlongDimension(i, 1).assign(line);
                }
            } else {
                //feature
                fIdx[1] = countFeatures++;
                fIdx[2] = i;
                try {
                    features.putScalar(fIdx, current.toDouble());
                } catch (UnsupportedOperationException e) {
                    // This isn't a scalar, so check if we got an array already
                    if (current instanceof NDArrayWritable) {
                        features.get(NDArrayIndex.point(fIdx[0]), NDArrayIndex.all(), NDArrayIndex.point(fIdx[2])).putRow(0, ((NDArrayWritable) current).get());
                    } else {
                        throw e;
                    }
                }
            }
        }
        i++;
    }
    DataSet ds = new DataSet(features, labels);
    if (preProcessor != null)
        preProcessor.preProcess(ds);
    return ds;
}
Also used : NDArrayWritable(org.datavec.common.data.NDArrayWritable) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) NDArrayWritable(org.datavec.common.data.NDArrayWritable) Writable(org.datavec.api.writable.Writable) List(java.util.List)

Example 4 with NDArrayWritable

use of org.datavec.common.data.NDArrayWritable in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetIterator method getDataSet.

private DataSet getDataSet(List<Writable> record) {
    List<Writable> currList;
    if (record instanceof List)
        currList = record;
    else
        currList = new ArrayList<>(record);
    //allow people to specify label index as -1 and infer the last possible label
    if (numPossibleLabels >= 1 && labelIndex < 0) {
        labelIndex = record.size() - 1;
    }
    INDArray label = null;
    INDArray featureVector = null;
    int featureCount = 0;
    int labelCount = 0;
    //no labels
    if (currList.size() == 2 && currList.get(1) instanceof NDArrayWritable && currList.get(0) instanceof NDArrayWritable && currList.get(0) == currList.get(1)) {
        NDArrayWritable writable = (NDArrayWritable) currList.get(0);
        return new DataSet(writable.get(), writable.get());
    }
    if (currList.size() == 2 && currList.get(0) instanceof NDArrayWritable) {
        if (!regression) {
            label = FeatureUtil.toOutcomeVector((int) Double.parseDouble(currList.get(1).toString()), numPossibleLabels);
        } else {
            if (currList.get(1) instanceof NDArrayWritable) {
                label = ((NDArrayWritable) currList.get(1)).get();
            } else {
                label = Nd4j.scalar(currList.get(1).toDouble());
            }
        }
        NDArrayWritable ndArrayWritable = (NDArrayWritable) currList.get(0);
        featureVector = ndArrayWritable.get();
        return new DataSet(featureVector, label);
    }
    for (int j = 0; j < currList.size(); j++) {
        Writable current = currList.get(j);
        //ndarray writable is an insane slow down herecd
        if (!(current instanceof NDArrayWritable) && current.toString().isEmpty())
            continue;
        if (regression && j == labelIndex && j == labelIndexTo && current instanceof NDArrayWritable) {
            //Case: NDArrayWritable for the labels
            label = ((NDArrayWritable) current).get();
        } else if (regression && j >= labelIndex && j <= labelIndexTo) {
            //This is the multi-label regression case
            if (label == null)
                label = Nd4j.create(1, (labelIndexTo - labelIndex + 1));
            label.putScalar(labelCount++, current.toDouble());
        } else if (labelIndex >= 0 && j == labelIndex) {
            //single label case (classification, etc)
            if (converter != null)
                try {
                    current = converter.convert(current);
                } catch (WritableConverterException e) {
                    e.printStackTrace();
                }
            if (numPossibleLabels < 1)
                throw new IllegalStateException("Number of possible labels invalid, must be >= 1");
            if (regression) {
                label = Nd4j.scalar(current.toDouble());
            } else {
                int curr = current.toInt();
                if (curr < 0 || curr >= numPossibleLabels) {
                    throw new DL4JInvalidInputException("Invalid classification data: expect label value (at label index column = " + labelIndex + ") to be in range 0 to " + (numPossibleLabels - 1) + " inclusive (0 to numClasses-1, with numClasses=" + numPossibleLabels + "); got label value of " + current);
                }
                label = FeatureUtil.toOutcomeVector(curr, numPossibleLabels);
            }
        } else {
            try {
                double value = current.toDouble();
                if (featureVector == null) {
                    if (regression && labelIndex >= 0) {
                        //Handle the possibly multi-label regression case here:
                        int nLabels = labelIndexTo - labelIndex + 1;
                        featureVector = Nd4j.create(1, currList.size() - nLabels);
                    } else {
                        //Classification case, and also no-labels case
                        featureVector = Nd4j.create(labelIndex >= 0 ? currList.size() - 1 : currList.size());
                    }
                }
                featureVector.putScalar(featureCount++, value);
            } catch (UnsupportedOperationException e) {
                // This isn't a scalar, so check if we got an array already
                if (current instanceof NDArrayWritable) {
                    assert featureVector == null;
                    featureVector = ((NDArrayWritable) current).get();
                } else {
                    throw e;
                }
            }
        }
    }
    return new DataSet(featureVector, labelIndex >= 0 ? label : featureVector);
}
Also used : DataSet(org.nd4j.linalg.dataset.DataSet) ArrayList(java.util.ArrayList) NDArrayWritable(org.datavec.common.data.NDArrayWritable) Writable(org.datavec.api.writable.Writable) WritableConverterException(org.datavec.api.io.converters.WritableConverterException) NDArrayWritable(org.datavec.common.data.NDArrayWritable) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ArrayList(java.util.ArrayList) List(java.util.List) DL4JInvalidInputException(org.deeplearning4j.exception.DL4JInvalidInputException)

Example 5 with NDArrayWritable

use of org.datavec.common.data.NDArrayWritable in project deeplearning4j by deeplearning4j.

the class SequenceRecordReaderDataSetIterator method getLabels.

private INDArray getLabels(List<List<Writable>> labels) {
    //Size of the record?
    //[timeSeriesLength,vectorSize]
    int[] shape = new int[2];
    //time series/sequence length
    shape[0] = labels.size();
    Iterator<List<Writable>> iter = labels.iterator();
    int i = 0;
    INDArray out = null;
    while (iter.hasNext()) {
        List<Writable> step = iter.next();
        if (i == 0) {
            if (regression) {
                for (Writable w : step) {
                    if (w instanceof NDArrayWritable) {
                        shape[1] += ((NDArrayWritable) w).get().length();
                    } else {
                        shape[1]++;
                    }
                }
            } else {
                shape[1] = numPossibleLabels;
            }
            out = Nd4j.create(shape, 'f');
        }
        Iterator<Writable> timeStepIter = step.iterator();
        int f = 0;
        if (regression) {
            //Load all values
            while (timeStepIter.hasNext()) {
                Writable current = timeStepIter.next();
                if (current instanceof NDArrayWritable) {
                    INDArray w = ((NDArrayWritable) current).get();
                    out.put(new INDArrayIndex[] { NDArrayIndex.point(i), NDArrayIndex.interval(f, f + w.length()) }, w);
                    f += w.length();
                } else {
                    out.put(i, f++, current.toDouble());
                }
            }
        } else {
            //Expect a single value (index) -> convert to one-hot vector
            Writable value = timeStepIter.next();
            int idx = value.toInt();
            if (idx < 0 || idx >= numPossibleLabels) {
                throw new DL4JInvalidInputException("Invalid classification data: expect label value to be in range 0 to " + (numPossibleLabels - 1) + " inclusive (0 to numClasses-1, with numClasses=" + numPossibleLabels + "); got label value of " + idx);
            }
            INDArray line = FeatureUtil.toOutcomeVector(idx, numPossibleLabels);
            out.getRow(i).assign(line);
        }
        i++;
    }
    return out;
}
Also used : NDArrayWritable(org.datavec.common.data.NDArrayWritable) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NDArrayWritable(org.datavec.common.data.NDArrayWritable) Writable(org.datavec.api.writable.Writable) DL4JInvalidInputException(org.deeplearning4j.exception.DL4JInvalidInputException)

Aggregations

NDArrayWritable (org.datavec.common.data.NDArrayWritable)15 INDArray (org.nd4j.linalg.api.ndarray.INDArray)15 Writable (org.datavec.api.writable.Writable)14 DataSet (org.nd4j.linalg.dataset.DataSet)9 SequenceRecordReader (org.datavec.api.records.reader.SequenceRecordReader)5 CollectionSequenceRecordReader (org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader)5 CSVSequenceRecordReader (org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader)5 DoubleWritable (org.datavec.api.writable.DoubleWritable)5 Test (org.junit.Test)5 IntWritable (org.datavec.api.writable.IntWritable)4 List (java.util.List)3 DL4JInvalidInputException (org.deeplearning4j.exception.DL4JInvalidInputException)3 WritableConverterException (org.datavec.api.io.converters.WritableConverterException)2 MultiDataSetIterator (org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator)2 ArrayList (java.util.ArrayList)1 RecordReader (org.datavec.api.records.reader.RecordReader)1 CollectionRecordReader (org.datavec.api.records.reader.impl.collection.CollectionRecordReader)1 CSVRecordReader (org.datavec.api.records.reader.impl.csv.CSVRecordReader)1 Pair (org.deeplearning4j.berkeley.Pair)1 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)1