use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.
the class MLLibUtil method fromLabeledPoint.
/**
*
* @param point
* @param numPossibleLabels
* @return {@link DataSet}
*/
private static DataSet fromLabeledPoint(LabeledPoint point, int numPossibleLabels) {
Vector features = point.features();
double label = point.label();
return new DataSet(Nd4j.create(features.toArray()), FeatureUtil.toOutcomeVector((int) label, numPossibleLabels));
}
use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.
the class MLLibUtil method fromLabeledPoint.
/**
* Convert an rdd
* of labeled point
* based on the specified batch size
* in to data set
* @param data the data to convert
* @param numPossibleLabels the number of possible labels
* @param batchSize the batch size
* @return the new rdd
*/
public static JavaRDD<DataSet> fromLabeledPoint(JavaRDD<LabeledPoint> data, final int numPossibleLabels, int batchSize) {
//map by index
JavaPairRDD<Long, LabeledPoint> dataWithIndex = data.zipWithIndex().mapToPair(new PairFunction<Tuple2<LabeledPoint, Long>, Long, LabeledPoint>() {
@Override
public Tuple2<Long, LabeledPoint> call(Tuple2<LabeledPoint, Long> labeledPointLongTuple2) throws Exception {
return new Tuple2<>(labeledPointLongTuple2._2(), labeledPointLongTuple2._1());
}
});
JavaPairRDD<Long, DataSet> mappedData = dataWithIndex.mapToPair(new PairFunction<Tuple2<Long, LabeledPoint>, Long, DataSet>() {
@Override
public Tuple2<Long, DataSet> call(Tuple2<Long, LabeledPoint> longLabeledPointTuple2) throws Exception {
return new Tuple2<>(longLabeledPointTuple2._1(), MLLibUtil.fromLabeledPoint(longLabeledPointTuple2._2(), numPossibleLabels));
}
});
JavaPairRDD<Long, DataSet> aggregated = mappedData.reduceByKey(new Function2<DataSet, DataSet, DataSet>() {
@Override
public DataSet call(DataSet v1, DataSet v2) throws Exception {
return new DataSet(Nd4j.vstack(v1.getFeatureMatrix(), v2.getFeatureMatrix()), Nd4j.vstack(v1.getLabels(), v2.getLabels()));
}
}, (int) (mappedData.count() / batchSize));
JavaRDD<DataSet> data2 = aggregated.flatMap(new BaseFlatMapFunctionAdaptee<Tuple2<Long, DataSet>, DataSet>(new FlatMapFunctionAdapter<Tuple2<Long, DataSet>, DataSet>() {
@Override
public Iterable<DataSet> call(Tuple2<Long, DataSet> longDataSetTuple2) throws Exception {
return longDataSetTuple2._2();
}
}));
return data2;
}
use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.
the class DataSetExportFunction method call.
@Override
public void call(Iterator<DataSet> iter) throws Exception {
String jvmuid = UIDProvider.getJVMUID();
uid = Thread.currentThread().getId() + jvmuid.substring(0, Math.min(8, jvmuid.length()));
while (iter.hasNext()) {
DataSet next = iter.next();
String filename = "dataset_" + uid + "_" + (outputCount++) + ".bin";
String path = outputDir.getPath();
URI uri = new URI(path + (path.endsWith("/") || path.endsWith("\\") ? "" : "/") + filename);
FileSystem file = FileSystem.get(uri, conf);
try (FSDataOutputStream out = file.create(new Path(uri))) {
next.save(out);
}
}
}
use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.
the class SplitDataSetExamplesPairFlatMapFunctionAdapter method call.
@Override
public Iterable<Tuple2<Integer, DataSet>> call(DataSet dataSet) throws Exception {
if (r == null) {
r = new Random();
}
List<DataSet> singleExamples = dataSet.asList();
List<Tuple2<Integer, DataSet>> out = new ArrayList<>(singleExamples.size());
for (DataSet ds : singleExamples) {
out.add(new Tuple2<>(r.nextInt(maxKeyIndex), ds));
}
return out;
}
use of org.nd4j.linalg.dataset.DataSet in project deeplearning4j by deeplearning4j.
the class DataVecSequenceDataSetFunction method call.
@Override
public DataSet call(List<List<Writable>> input) throws Exception {
Iterator<List<Writable>> iter = input.iterator();
INDArray features = null;
INDArray labels = Nd4j.zeros(1, (regression ? 1 : numPossibleLabels), input.size());
int[] fIdx = new int[3];
int[] lIdx = new int[3];
int i = 0;
while (iter.hasNext()) {
List<Writable> step = iter.next();
if (i == 0) {
features = Nd4j.zeros(1, step.size() - 1, input.size());
}
Iterator<Writable> timeStepIter = step.iterator();
int countIn = 0;
int countFeatures = 0;
while (timeStepIter.hasNext()) {
Writable current = timeStepIter.next();
if (converter != null)
current = converter.convert(current);
if (countIn++ == labelIndex) {
//label
if (regression) {
lIdx[2] = i;
labels.putScalar(lIdx, current.toDouble());
} else {
INDArray line = FeatureUtil.toOutcomeVector(current.toInt(), numPossibleLabels);
//1d from [1,nOut,timeSeriesLength] -> tensor i along dimension 1 is at time i
labels.tensorAlongDimension(i, 1).assign(line);
}
} else {
//feature
fIdx[1] = countFeatures++;
fIdx[2] = i;
try {
features.putScalar(fIdx, current.toDouble());
} catch (UnsupportedOperationException e) {
// This isn't a scalar, so check if we got an array already
if (current instanceof NDArrayWritable) {
features.get(NDArrayIndex.point(fIdx[0]), NDArrayIndex.all(), NDArrayIndex.point(fIdx[2])).putRow(0, ((NDArrayWritable) current).get());
} else {
throw e;
}
}
}
}
i++;
}
DataSet ds = new DataSet(features, labels);
if (preProcessor != null)
preProcessor.preProcess(ds);
return ds;
}
Aggregations