use of org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator in project tutorials by eugenp.
the class IrisClassifier method main.
public static void main(String[] args) throws IOException, InterruptedException {
DataSet allData;
try (RecordReader recordReader = new CSVRecordReader(0, ',')) {
recordReader.initialize(new FileSplit(new ClassPathResource("iris.txt").getFile()));
DataSetIterator iterator = new RecordReaderDataSetIterator(recordReader, 150, FEATURES_COUNT, CLASSES_COUNT);
allData = iterator.next();
}
allData.shuffle(42);
DataNormalization normalizer = new NormalizerStandardize();
normalizer.fit(allData);
normalizer.transform(allData);
SplitTestAndTrain testAndTrain = allData.splitTestAndTrain(0.65);
DataSet trainingData = testAndTrain.getTrain();
DataSet testData = testAndTrain.getTest();
MultiLayerConfiguration configuration = new NeuralNetConfiguration.Builder().iterations(1000).activation(Activation.TANH).weightInit(WeightInit.XAVIER).learningRate(0.1).regularization(true).l2(0.0001).list().layer(0, new DenseLayer.Builder().nIn(FEATURES_COUNT).nOut(3).build()).layer(1, new DenseLayer.Builder().nIn(3).nOut(3).build()).layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).activation(Activation.SOFTMAX).nIn(3).nOut(CLASSES_COUNT).build()).backprop(true).pretrain(false).build();
MultiLayerNetwork model = new MultiLayerNetwork(configuration);
model.init();
model.fit(trainingData);
INDArray output = model.output(testData.getFeatureMatrix());
Evaluation eval = new Evaluation(CLASSES_COUNT);
eval.eval(testData.getLabels(), output);
System.out.println(eval.stats());
}
use of org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator in project deeplearning4j by deeplearning4j.
the class EvalTest method testEvaluationWithMetaData.
@Test
public void testEvaluationWithMetaData() throws Exception {
RecordReader csv = new CSVRecordReader();
csv.initialize(new FileSplit(new ClassPathResource("iris.txt").getTempFileFromArchive()));
int batchSize = 10;
int labelIdx = 4;
int numClasses = 3;
RecordReaderDataSetIterator rrdsi = new RecordReaderDataSetIterator(csv, batchSize, labelIdx, numClasses);
NormalizerStandardize ns = new NormalizerStandardize();
ns.fit(rrdsi);
rrdsi.setPreProcessor(ns);
rrdsi.reset();
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).iterations(1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD).learningRate(0.1).list().layer(0, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(4).nOut(3).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
for (int i = 0; i < 4; i++) {
net.fit(rrdsi);
rrdsi.reset();
}
Evaluation e = new Evaluation();
//*** New: Enable collection of metadata (stored in the DataSets) ***
rrdsi.setCollectMetaData(true);
while (rrdsi.hasNext()) {
DataSet ds = rrdsi.next();
//*** New - cross dependencies here make types difficult, usid Object internally in DataSet for this***
List<RecordMetaData> meta = ds.getExampleMetaData(RecordMetaData.class);
INDArray out = net.output(ds.getFeatures());
//*** New - evaluate and also store metadata ***
e.eval(ds.getLabels(), out, meta);
}
System.out.println(e.stats());
System.out.println("\n\n*** Prediction Errors: ***");
//*** New - get list of prediction errors from evaluation ***
List<Prediction> errors = e.getPredictionErrors();
List<RecordMetaData> metaForErrors = new ArrayList<>();
for (Prediction p : errors) {
metaForErrors.add((RecordMetaData) p.getRecordMetaData());
}
//*** New - dynamically load a subset of the data, just for prediction errors ***
DataSet ds = rrdsi.loadFromMetaData(metaForErrors);
INDArray output = net.output(ds.getFeatures());
int count = 0;
for (Prediction t : errors) {
System.out.println(t + "\t\tRaw Data: " + //*** New - load subset of data from MetaData object (usually batched for efficiency) ***
csv.loadFromMetaData((RecordMetaData) t.getRecordMetaData()).getRecord() + "\tNormalized: " + ds.getFeatureMatrix().getRow(count) + "\tLabels: " + ds.getLabels().getRow(count) + "\tNetwork predictions: " + output.getRow(count));
count++;
}
int errorCount = errors.size();
double expAcc = 1.0 - errorCount / 150.0;
assertEquals(expAcc, e.accuracy(), 1e-5);
ConfusionMatrix<Integer> confusion = e.getConfusionMatrix();
int[] actualCounts = new int[3];
int[] predictedCounts = new int[3];
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
//(actual,predicted)
int entry = confusion.getCount(i, j);
List<Prediction> list = e.getPredictions(i, j);
assertEquals(entry, list.size());
actualCounts[i] += entry;
predictedCounts[j] += entry;
}
}
for (int i = 0; i < 3; i++) {
List<Prediction> actualClassI = e.getPredictionsByActualClass(i);
List<Prediction> predictedClassI = e.getPredictionByPredictedClass(i);
assertEquals(actualCounts[i], actualClassI.size());
assertEquals(predictedCounts[i], predictedClassI.size());
}
}
use of org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator in project deeplearning4j by deeplearning4j.
the class MultipleEpochsIteratorTest method testLoadBatchDataSet.
@Test
public void testLoadBatchDataSet() throws Exception {
int epochs = 2;
RecordReader rr = new CSVRecordReader();
rr.initialize(new FileSplit(new ClassPathResource("iris.txt").getFile()));
DataSetIterator iter = new RecordReaderDataSetIterator(rr, 150);
DataSet ds = iter.next(20);
MultipleEpochsIterator multiIter = new MultipleEpochsIterator(epochs, ds);
while (multiIter.hasNext()) {
DataSet path = multiIter.next(10);
assertEquals(path.numExamples(), 10, 0.0);
assertFalse(path == null);
}
assertEquals(epochs, multiIter.epochs);
}
use of org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator in project deeplearning4j by deeplearning4j.
the class ConvolutionLayerSetupTest method testLRN.
@Test
public void testLRN() throws Exception {
List<String> labels = new ArrayList<>(Arrays.asList("Zico", "Ziwang_Xu"));
String rootDir = new ClassPathResource("lfwtest").getFile().getAbsolutePath();
RecordReader reader = new ImageRecordReader(28, 28, 3);
reader.initialize(new FileSplit(new File(rootDir)));
DataSetIterator recordReader = new RecordReaderDataSetIterator(reader, 10, 1, labels.size());
labels.remove("lfwtest");
NeuralNetConfiguration.ListBuilder builder = (NeuralNetConfiguration.ListBuilder) incompleteLRN();
builder.setInputType(InputType.convolutional(28, 28, 3));
MultiLayerConfiguration conf = builder.build();
ConvolutionLayer layer2 = (ConvolutionLayer) conf.getConf(3).getLayer();
assertEquals(6, layer2.getNIn());
}
use of org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator in project deeplearning4j by deeplearning4j.
the class StringToDataSetExportFunction method processBatchIfRequired.
private void processBatchIfRequired(List<List<Writable>> list, boolean finalRecord) throws Exception {
if (list.isEmpty())
return;
if (list.size() < batchSize && !finalRecord)
return;
RecordReader rr = new CollectionRecordReader(list);
RecordReaderDataSetIterator iter = new RecordReaderDataSetIterator(rr, new SelfWritableConverter(), batchSize, labelIndex, numPossibleLabels, regression);
DataSet ds = iter.next();
String filename = "dataset_" + uid + "_" + (outputCount++) + ".bin";
URI uri = new URI(outputDir.getPath() + "/" + filename);
FileSystem file = FileSystem.get(uri, conf);
try (FSDataOutputStream out = file.create(new Path(uri))) {
ds.save(out);
}
list.clear();
}
Aggregations