Search in sources :

Example 6 with NDArrayWritable

use of org.datavec.common.data.NDArrayWritable in project deeplearning4j by deeplearning4j.

the class SequenceRecordReaderDataSetIterator method getFeaturesLabelsSingleReader.

private INDArray[] getFeaturesLabelsSingleReader(List<List<Writable>> input) {
    Iterator<List<Writable>> iter = input.iterator();
    int i = 0;
    INDArray features = null;
    //= Nd4j.zeros(input.size(), regression ? 1 : numPossibleLabels);
    INDArray labels = null;
    int featureSize = 0;
    while (iter.hasNext()) {
        List<Writable> step = iter.next();
        if (i == 0) {
            //First: determine the features size. Usually equal to the number of Writable objects, except when
            // one or more of the Writables is an INDArray (i.e., NDArrayWritable)
            int j = 0;
            for (Writable w : step) {
                if (j++ != labelIndex) {
                    if (w instanceof NDArrayWritable) {
                        featureSize += ((NDArrayWritable) w).get().length();
                    } else {
                        featureSize += 1;
                    }
                }
            }
            features = Nd4j.zeros(input.size(), featureSize);
            //Second: determine the output (labels) size.
            int labelSize;
            if (regression) {
                if (step.get(labelIndex) instanceof NDArrayWritable) {
                    labelSize = ((NDArrayWritable) step.get(labelIndex)).get().length();
                } else {
                    labelSize = 1;
                }
            } else {
                //Classification: integer -> one-hot
                labelSize = numPossibleLabels;
            }
            labels = Nd4j.zeros(input.size(), labelSize);
        }
        Iterator<Writable> timeStepIter = step.iterator();
        int countIn = 0;
        int countFeatures = 0;
        while (timeStepIter.hasNext()) {
            Writable current = timeStepIter.next();
            if (countIn++ == labelIndex) {
                //label
                if (regression) {
                    if (current instanceof NDArrayWritable) {
                        //Standard case
                        labels.putRow(i, ((NDArrayWritable) current).get());
                    } else {
                        labels.put(i, 0, current.toDouble());
                    }
                } else {
                    int idx = current.toInt();
                    if (idx < 0 || idx >= numPossibleLabels) {
                        throw new DL4JInvalidInputException("Invalid classification data: expect label value (at label index column = " + labelIndex + ") to be in range 0 to " + (numPossibleLabels - 1) + " inclusive (0 to numClasses-1, with numClasses=" + numPossibleLabels + "); got label value of " + current);
                    }
                    //Labels initialized as 0s
                    labels.putScalar(i, current.toInt(), 1.0);
                }
            } else {
                //feature
                if (current instanceof NDArrayWritable) {
                    //NDArrayWritable: multiple values
                    INDArray w = ((NDArrayWritable) current).get();
                    int length = w.length();
                    features.put(new INDArrayIndex[] { NDArrayIndex.point(i), NDArrayIndex.interval(countFeatures, countFeatures + length) }, w);
                    countFeatures += length;
                } else {
                    //Standard case: single value
                    features.put(i, countFeatures++, current.toDouble());
                }
            }
        }
        i++;
    }
    return new INDArray[] { features, labels };
}
Also used : NDArrayWritable(org.datavec.common.data.NDArrayWritable) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NDArrayWritable(org.datavec.common.data.NDArrayWritable) Writable(org.datavec.api.writable.Writable) DL4JInvalidInputException(org.deeplearning4j.exception.DL4JInvalidInputException)

Example 7 with NDArrayWritable

use of org.datavec.common.data.NDArrayWritable in project deeplearning4j by deeplearning4j.

the class RecordReaderMultiDataSetIterator method convertWritablesSequence.

/**
     * Convert the writables to a sequence (3d) data set, and also return the mask array (if necessary)
     */
private Pair<INDArray, INDArray> convertWritablesSequence(List<List<List<Writable>>> list, int minValues, int maxTSLength, SubsetDetails details, int[] longestSequence) {
    if (maxTSLength == -1)
        maxTSLength = list.get(0).size();
    INDArray arr;
    if (details.entireReader) {
        int size = list.get(0).iterator().next().size();
        arr = Nd4j.create(new int[] { minValues, size, maxTSLength }, 'f');
    } else if (details.oneHot)
        arr = Nd4j.create(new int[] { minValues, details.oneHotNumClasses, maxTSLength }, 'f');
    else
        arr = Nd4j.create(new int[] { minValues, details.subsetEndInclusive - details.subsetStart + 1, maxTSLength }, 'f');
    boolean needMaskArray = false;
    for (List<List<Writable>> c : list) {
        if (c.size() < maxTSLength)
            needMaskArray = true;
    }
    INDArray maskArray;
    if (needMaskArray)
        maskArray = Nd4j.ones(minValues, maxTSLength);
    else
        maskArray = null;
    for (int i = 0; i < minValues; i++) {
        List<List<Writable>> sequence = list.get(i);
        //Offset for alignment:
        int startOffset;
        if (alignmentMode == AlignmentMode.ALIGN_START || alignmentMode == AlignmentMode.EQUAL_LENGTH) {
            startOffset = 0;
        } else {
            //Align end
            //Only practical differences here are: (a) offset, and (b) masking
            startOffset = longestSequence[i] - sequence.size();
        }
        int t = 0;
        int k;
        for (List<Writable> timeStep : sequence) {
            k = startOffset + t++;
            if (details.entireReader) {
                //Convert entire reader contents, without modification
                Iterator<Writable> iter = timeStep.iterator();
                int j = 0;
                while (iter.hasNext()) {
                    Writable w = iter.next();
                    try {
                        arr.putScalar(i, j, k, w.toDouble());
                    } catch (UnsupportedOperationException e) {
                        // This isn't a scalar, so check if we got an array already
                        if (w instanceof NDArrayWritable) {
                            arr.get(NDArrayIndex.point(i), NDArrayIndex.all(), NDArrayIndex.point(k)).putRow(0, ((NDArrayWritable) w).get());
                        } else {
                            throw e;
                        }
                    }
                    j++;
                }
            } else if (details.oneHot) {
                //Convert a single column to a one-hot representation
                Writable w = null;
                if (timeStep instanceof List)
                    w = ((List<Writable>) timeStep).get(details.subsetStart);
                else {
                    Iterator<Writable> iter = timeStep.iterator();
                    for (int x = 0; x <= details.subsetStart; x++) w = iter.next();
                }
                int classIdx = w.toInt();
                arr.putScalar(i, classIdx, k, 1.0);
            } else {
                //Convert a subset of the columns...
                Iterator<Writable> iter = timeStep.iterator();
                for (int j = 0; j < details.subsetStart; j++) iter.next();
                int l = 0;
                for (int j = details.subsetStart; j <= details.subsetEndInclusive; j++) {
                    Writable w = iter.next();
                    try {
                        arr.putScalar(i, l++, k, w.toDouble());
                    } catch (UnsupportedOperationException e) {
                        // This isn't a scalar, so check if we got an array already
                        if (w instanceof NDArrayWritable) {
                            arr.get(NDArrayIndex.point(i), NDArrayIndex.all(), NDArrayIndex.point(k)).putRow(0, ((NDArrayWritable) w).get().get(NDArrayIndex.all(), NDArrayIndex.interval(details.subsetStart, details.subsetEndInclusive + 1)));
                        } else {
                            throw e;
                        }
                    }
                }
            }
        }
        //For any remaining time steps: set mask array to 0 (just padding)
        if (needMaskArray) {
            //Masking array entries at start (for align end)
            if (alignmentMode == AlignmentMode.ALIGN_END) {
                for (int t2 = 0; t2 < startOffset; t2++) {
                    maskArray.putScalar(i, t2, 0.0);
                }
            }
            //Masking array entries at end (for align start)
            if (alignmentMode == AlignmentMode.ALIGN_START) {
                for (int t2 = t; t2 < maxTSLength; t2++) {
                    maskArray.putScalar(i, t2, 0.0);
                }
            }
        }
    }
    return new Pair<>(arr, maskArray);
}
Also used : NDArrayWritable(org.datavec.common.data.NDArrayWritable) INDArray(org.nd4j.linalg.api.ndarray.INDArray) MultiDataSetIterator(org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator) NDArrayWritable(org.datavec.common.data.NDArrayWritable) Writable(org.datavec.api.writable.Writable) Pair(org.deeplearning4j.berkeley.Pair)

Example 8 with NDArrayWritable

use of org.datavec.common.data.NDArrayWritable in project deeplearning4j by deeplearning4j.

the class RecordReaderMultiDataSetIterator method convertWritables.

private INDArray convertWritables(List<List<Writable>> list, int minValues, SubsetDetails details) {
    INDArray arr;
    if (details.entireReader) {
        if (list.get(0).size() == 1 && list.get(0).get(0) instanceof NDArrayWritable) {
            //Special case: single NDArrayWritable...
            INDArray temp = ((NDArrayWritable) list.get(0).get(0)).get();
            int[] shape = ArrayUtils.clone(temp.shape());
            shape[0] = minValues;
            arr = Nd4j.create(shape);
        } else {
            arr = Nd4j.create(minValues, list.get(0).size());
        }
    } else if (details.oneHot) {
        arr = Nd4j.zeros(minValues, details.oneHotNumClasses);
    } else {
        if (details.subsetStart == details.subsetEndInclusive && list.get(0).get(details.subsetStart) instanceof NDArrayWritable) {
            //Special case: single NDArrayWritable (example: ImageRecordReader)
            INDArray temp = ((NDArrayWritable) list.get(0).get(details.subsetStart)).get();
            int[] shape = ArrayUtils.clone(temp.shape());
            shape[0] = minValues;
            arr = Nd4j.create(shape);
        } else {
            arr = Nd4j.create(minValues, details.subsetEndInclusive - details.subsetStart + 1);
        }
    }
    for (int i = 0; i < minValues; i++) {
        List<Writable> c = list.get(i);
        if (details.entireReader) {
            //Convert entire reader contents, without modification
            int j = 0;
            for (Writable w : c) {
                try {
                    arr.putScalar(i, j, w.toDouble());
                } catch (UnsupportedOperationException e) {
                    // This isn't a scalar, so check if we got an array already
                    if (w instanceof NDArrayWritable) {
                        putExample(arr, ((NDArrayWritable) w).get(), i);
                    } else {
                        throw e;
                    }
                }
                j++;
            }
        } else if (details.oneHot) {
            //Convert a single column to a one-hot representation
            Writable w = c.get(details.subsetStart);
            //Index of class
            arr.putScalar(i, w.toInt(), 1.0);
        } else {
            //Special case: subsetStart == subsetEndInclusive && NDArrayWritable. Example: ImageRecordReader
            if (details.subsetStart == details.subsetEndInclusive && (c.get(details.subsetStart) instanceof NDArrayWritable)) {
                putExample(arr, ((NDArrayWritable) c.get(details.subsetStart)).get(), i);
            } else {
                Iterator<Writable> iter = c.iterator();
                for (int j = 0; j < details.subsetStart; j++) iter.next();
                int k = 0;
                for (int j = details.subsetStart; j <= details.subsetEndInclusive; j++) {
                    Writable w = iter.next();
                    try {
                        arr.putScalar(i, k, w.toDouble());
                    } catch (UnsupportedOperationException e) {
                        // This isn't a scalar, so check if we got an array already
                        if (w instanceof NDArrayWritable) {
                            putExample(arr, ((NDArrayWritable) w).get(), i);
                        } else {
                            throw e;
                        }
                    }
                    k++;
                }
            }
        }
    }
    return arr;
}
Also used : NDArrayWritable(org.datavec.common.data.NDArrayWritable) INDArray(org.nd4j.linalg.api.ndarray.INDArray) MultiDataSetIterator(org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator) NDArrayWritable(org.datavec.common.data.NDArrayWritable) Writable(org.datavec.api.writable.Writable)

Example 9 with NDArrayWritable

use of org.datavec.common.data.NDArrayWritable in project deeplearning4j by deeplearning4j.

the class SequenceRecordReaderDataSetIterator method getFeatures.

private INDArray getFeatures(List<List<Writable>> features) {
    //Size of the record?
    //[timeSeriesLength,vectorSize]
    int[] shape = new int[2];
    shape[0] = features.size();
    Iterator<List<Writable>> iter = features.iterator();
    int i = 0;
    INDArray out = null;
    while (iter.hasNext()) {
        List<Writable> step = iter.next();
        if (i == 0) {
            for (Writable w : step) {
                if (w instanceof NDArrayWritable) {
                    shape[1] += ((NDArrayWritable) w).get().length();
                } else {
                    shape[1]++;
                }
            }
            out = Nd4j.create(shape, 'f');
        }
        Iterator<Writable> timeStepIter = step.iterator();
        int f = 0;
        while (timeStepIter.hasNext()) {
            Writable current = timeStepIter.next();
            if (current instanceof NDArrayWritable) {
                //Array writable -> multiple values
                INDArray arr = ((NDArrayWritable) current).get();
                out.put(new INDArrayIndex[] { NDArrayIndex.point(i), NDArrayIndex.interval(f, f + arr.length()) }, arr);
                f += arr.length();
            } else {
                //Single value case
                out.put(i, f++, current.toDouble());
            }
        }
        i++;
    }
    return out;
}
Also used : NDArrayWritable(org.datavec.common.data.NDArrayWritable) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NDArrayWritable(org.datavec.common.data.NDArrayWritable) Writable(org.datavec.api.writable.Writable)

Example 10 with NDArrayWritable

use of org.datavec.common.data.NDArrayWritable in project deeplearning4j by deeplearning4j.

the class RecordReaderDataSetiteratorTest method testSeqRRDSIArrayWritableOneReaderRegression.

@Test
public void testSeqRRDSIArrayWritableOneReaderRegression() {
    //Regression, where the output is an array writable
    List<List<Writable>> sequence1 = new ArrayList<>();
    sequence1.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 1, 2, 3 })), new NDArrayWritable(Nd4j.create(new double[] { 100, 200, 300 }))));
    sequence1.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 4, 5, 6 })), new NDArrayWritable(Nd4j.create(new double[] { 400, 500, 600 }))));
    List<List<Writable>> sequence2 = new ArrayList<>();
    sequence2.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 7, 8, 9 })), new NDArrayWritable(Nd4j.create(new double[] { 700, 800, 900 }))));
    sequence2.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.create(new double[] { 10, 11, 12 })), new NDArrayWritable(Nd4j.create(new double[] { 1000, 1100, 1200 }))));
    SequenceRecordReader rr = new CollectionSequenceRecordReader(Arrays.asList(sequence1, sequence2));
    SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(rr, 2, -1, 1, true);
    DataSet ds = iter.next();
    //2 examples, 3 values per time step, 2 time steps
    INDArray expFeatures = Nd4j.create(2, 3, 2);
    expFeatures.tensorAlongDimension(0, 1, 2).assign(Nd4j.create(new double[][] { { 1, 4 }, { 2, 5 }, { 3, 6 } }));
    expFeatures.tensorAlongDimension(1, 1, 2).assign(Nd4j.create(new double[][] { { 7, 10 }, { 8, 11 }, { 9, 12 } }));
    INDArray expLabels = Nd4j.create(2, 3, 2);
    expLabels.tensorAlongDimension(0, 1, 2).assign(Nd4j.create(new double[][] { { 100, 400 }, { 200, 500 }, { 300, 600 } }));
    expLabels.tensorAlongDimension(1, 1, 2).assign(Nd4j.create(new double[][] { { 700, 1000 }, { 800, 1100 }, { 900, 1200 } }));
    assertEquals(expFeatures, ds.getFeatureMatrix());
    assertEquals(expLabels, ds.getLabels());
}
Also used : NDArrayWritable(org.datavec.common.data.NDArrayWritable) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) CollectionSequenceRecordReader(org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader) IntWritable(org.datavec.api.writable.IntWritable) NDArrayWritable(org.datavec.common.data.NDArrayWritable) DoubleWritable(org.datavec.api.writable.DoubleWritable) Writable(org.datavec.api.writable.Writable) Test(org.junit.Test)

Aggregations

NDArrayWritable (org.datavec.common.data.NDArrayWritable)15 INDArray (org.nd4j.linalg.api.ndarray.INDArray)15 Writable (org.datavec.api.writable.Writable)14 DataSet (org.nd4j.linalg.dataset.DataSet)9 SequenceRecordReader (org.datavec.api.records.reader.SequenceRecordReader)5 CollectionSequenceRecordReader (org.datavec.api.records.reader.impl.collection.CollectionSequenceRecordReader)5 CSVSequenceRecordReader (org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader)5 DoubleWritable (org.datavec.api.writable.DoubleWritable)5 Test (org.junit.Test)5 IntWritable (org.datavec.api.writable.IntWritable)4 List (java.util.List)3 DL4JInvalidInputException (org.deeplearning4j.exception.DL4JInvalidInputException)3 WritableConverterException (org.datavec.api.io.converters.WritableConverterException)2 MultiDataSetIterator (org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator)2 ArrayList (java.util.ArrayList)1 RecordReader (org.datavec.api.records.reader.RecordReader)1 CollectionRecordReader (org.datavec.api.records.reader.impl.collection.CollectionRecordReader)1 CSVRecordReader (org.datavec.api.records.reader.impl.csv.CSVRecordReader)1 Pair (org.deeplearning4j.berkeley.Pair)1 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)1