Search in sources :

Example 41 with DataSet

use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.

the class MLLibUtil method fromLabeledPoint.

/**
     *
     * @param point
     * @param numPossibleLabels
     * @return {@link DataSet}
     */
private static DataSet fromLabeledPoint(LabeledPoint point, int numPossibleLabels) {
    Vector features = point.features();
    double label = point.label();
    return new DataSet(Nd4j.create(features.toArray()), FeatureUtil.toOutcomeVector((int) label, numPossibleLabels));
}
Also used : DataSet(org.nd4j.linalg.dataset.DataSet) Vector(org.apache.spark.mllib.linalg.Vector)

Example 42 with DataSet

use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.

the class MLLibUtil method fromLabeledPoint.

/**
     * Convert an rdd
     * of labeled point
     * based on the specified batch size
     * in to data set
     * @param data the data to convert
     * @param numPossibleLabels the number of possible labels
     * @param batchSize the batch size
     * @return the new rdd
     */
public static JavaRDD<DataSet> fromLabeledPoint(JavaRDD<LabeledPoint> data, final int numPossibleLabels, int batchSize) {
    //map by index
    JavaPairRDD<Long, LabeledPoint> dataWithIndex = data.zipWithIndex().mapToPair(new PairFunction<Tuple2<LabeledPoint, Long>, Long, LabeledPoint>() {

        @Override
        public Tuple2<Long, LabeledPoint> call(Tuple2<LabeledPoint, Long> labeledPointLongTuple2) throws Exception {
            return new Tuple2<>(labeledPointLongTuple2._2(), labeledPointLongTuple2._1());
        }
    });
    JavaPairRDD<Long, DataSet> mappedData = dataWithIndex.mapToPair(new PairFunction<Tuple2<Long, LabeledPoint>, Long, DataSet>() {

        @Override
        public Tuple2<Long, DataSet> call(Tuple2<Long, LabeledPoint> longLabeledPointTuple2) throws Exception {
            return new Tuple2<>(longLabeledPointTuple2._1(), MLLibUtil.fromLabeledPoint(longLabeledPointTuple2._2(), numPossibleLabels));
        }
    });
    JavaPairRDD<Long, DataSet> aggregated = mappedData.reduceByKey(new Function2<DataSet, DataSet, DataSet>() {

        @Override
        public DataSet call(DataSet v1, DataSet v2) throws Exception {
            return new DataSet(Nd4j.vstack(v1.getFeatureMatrix(), v2.getFeatureMatrix()), Nd4j.vstack(v1.getLabels(), v2.getLabels()));
        }
    }, (int) (mappedData.count() / batchSize));
    JavaRDD<DataSet> data2 = aggregated.flatMap(new BaseFlatMapFunctionAdaptee<Tuple2<Long, DataSet>, DataSet>(new FlatMapFunctionAdapter<Tuple2<Long, DataSet>, DataSet>() {

        @Override
        public Iterable<DataSet> call(Tuple2<Long, DataSet> longDataSetTuple2) throws Exception {
            return longDataSetTuple2._2();
        }
    }));
    return data2;
}
Also used : DataSet(org.nd4j.linalg.dataset.DataSet) FlatMapFunctionAdapter(org.datavec.spark.functions.FlatMapFunctionAdapter) LabeledPoint(org.apache.spark.mllib.regression.LabeledPoint) Tuple2(scala.Tuple2)

Example 43 with DataSet

use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.

the class DataSetExportFunction method call.

@Override
public void call(Iterator<DataSet> iter) throws Exception {
    String jvmuid = UIDProvider.getJVMUID();
    uid = Thread.currentThread().getId() + jvmuid.substring(0, Math.min(8, jvmuid.length()));
    while (iter.hasNext()) {
        DataSet next = iter.next();
        String filename = "dataset_" + uid + "_" + (outputCount++) + ".bin";
        String path = outputDir.getPath();
        URI uri = new URI(path + (path.endsWith("/") || path.endsWith("\\") ? "" : "/") + filename);
        FileSystem file = FileSystem.get(uri, conf);
        try (FSDataOutputStream out = file.create(new Path(uri))) {
            next.save(out);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DataSet(org.nd4j.linalg.dataset.DataSet) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) URI(java.net.URI)

Example 44 with DataSet

use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.

the class SplitDataSetExamplesPairFlatMapFunctionAdapter method call.

@Override
public Iterable<Tuple2<Integer, DataSet>> call(DataSet dataSet) throws Exception {
    if (r == null) {
        r = new Random();
    }
    List<DataSet> singleExamples = dataSet.asList();
    List<Tuple2<Integer, DataSet>> out = new ArrayList<>(singleExamples.size());
    for (DataSet ds : singleExamples) {
        out.add(new Tuple2<>(r.nextInt(maxKeyIndex), ds));
    }
    return out;
}
Also used : Random(java.util.Random) DataSet(org.nd4j.linalg.dataset.DataSet) Tuple2(scala.Tuple2) ArrayList(java.util.ArrayList)

Example 45 with DataSet

use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.

the class DataVecSequenceDataSetFunction method call.

@Override
public DataSet call(List<List<Writable>> input) throws Exception {
    Iterator<List<Writable>> iter = input.iterator();
    INDArray features = null;
    INDArray labels = Nd4j.zeros(1, (regression ? 1 : numPossibleLabels), input.size());
    int[] fIdx = new int[3];
    int[] lIdx = new int[3];
    int i = 0;
    while (iter.hasNext()) {
        List<Writable> step = iter.next();
        if (i == 0) {
            features = Nd4j.zeros(1, step.size() - 1, input.size());
        }
        Iterator<Writable> timeStepIter = step.iterator();
        int countIn = 0;
        int countFeatures = 0;
        while (timeStepIter.hasNext()) {
            Writable current = timeStepIter.next();
            if (converter != null)
                current = converter.convert(current);
            if (countIn++ == labelIndex) {
                //label
                if (regression) {
                    lIdx[2] = i;
                    labels.putScalar(lIdx, current.toDouble());
                } else {
                    INDArray line = FeatureUtil.toOutcomeVector(current.toInt(), numPossibleLabels);
                    //1d from [1,nOut,timeSeriesLength] -> tensor i along dimension 1 is at time i
                    labels.tensorAlongDimension(i, 1).assign(line);
                }
            } else {
                //feature
                fIdx[1] = countFeatures++;
                fIdx[2] = i;
                try {
                    features.putScalar(fIdx, current.toDouble());
                } catch (UnsupportedOperationException e) {
                    // This isn't a scalar, so check if we got an array already
                    if (current instanceof NDArrayWritable) {
                        features.get(NDArrayIndex.point(fIdx[0]), NDArrayIndex.all(), NDArrayIndex.point(fIdx[2])).putRow(0, ((NDArrayWritable) current).get());
                    } else {
                        throw e;
                    }
                }
            }
        }
        i++;
    }
    DataSet ds = new DataSet(features, labels);
    if (preProcessor != null)
        preProcessor.preProcess(ds);
    return ds;
}
Also used : NDArrayWritable(org.datavec.common.data.NDArrayWritable) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) NDArrayWritable(org.datavec.common.data.NDArrayWritable) Writable(org.datavec.api.writable.Writable) List(java.util.List)

Aggregations

DataSet (org.nd4j.linalg.dataset.DataSet)334 Test (org.junit.Test)226 INDArray (org.nd4j.linalg.api.ndarray.INDArray)194 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)93 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)82 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)79 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)73 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)62 ArrayList (java.util.ArrayList)50 MnistDataSetIterator (org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator)41 ScoreIterationListener (org.deeplearning4j.optimize.listeners.ScoreIterationListener)38 BaseSparkTest (org.deeplearning4j.spark.BaseSparkTest)34 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)32 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)31 MultiDataSet (org.nd4j.linalg.dataset.MultiDataSet)31 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)25 SequenceRecordReader (org.datavec.api.records.reader.SequenceRecordReader)24 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)24 CSVSequenceRecordReader (org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader)23 ClassPathResource (org.nd4j.linalg.io.ClassPathResource)23