use of com.simiacryptus.mindseye.layers.java.EntropyLossLayer in project MindsEye by SimiaCryptus.
the class SimpleStochasticGradientDescentTest method train.
@Override
public void train(@Nonnull final NotebookOutput log, @Nonnull final Layer network, @Nonnull final Tensor[][] trainingData, final TrainingMonitor monitor) {
log.p("Training a model involves a few different components. First, our model is combined mapCoords a loss function. " + "Then we take that model and combine it mapCoords our training data to define a trainable object. " + "Finally, we use a simple iterative scheme to refine the weights of our model. " + "The final output is the last output value of the loss function when evaluating the last batch.");
log.code(() -> {
@Nonnull final SimpleLossNetwork supervisedNetwork = new SimpleLossNetwork(network, new EntropyLossLayer());
@Nonnull final Trainable trainable = new SampledArrayTrainable(trainingData, supervisedNetwork, 10000);
return new IterativeTrainer(trainable).setMonitor(monitor).setOrientation(new GradientDescent()).setTimeout(5, TimeUnit.MINUTES).setMaxIterations(500).runAndFree();
});
}
use of com.simiacryptus.mindseye.layers.java.EntropyLossLayer in project MindsEye by SimiaCryptus.
the class ClassifyProblem method run.
@Nonnull
@Override
public ClassifyProblem run(@Nonnull final NotebookOutput log) {
@Nonnull final TrainingMonitor monitor = TestUtil.getMonitor(history);
final Tensor[][] trainingData = getTrainingData(log);
@Nonnull final DAGNetwork network = fwdFactory.imageToVector(log, categories);
log.h3("Network Diagram");
log.code(() -> {
return Graphviz.fromGraph(TestUtil.toGraph(network)).height(400).width(600).render(Format.PNG).toImage();
});
log.h3("Training");
@Nonnull final SimpleLossNetwork supervisedNetwork = new SimpleLossNetwork(network, new EntropyLossLayer());
TestUtil.instrumentPerformance(supervisedNetwork);
int initialSampleSize = Math.max(trainingData.length / 5, Math.min(10, trainingData.length / 2));
@Nonnull final ValidatingTrainer trainer = optimizer.train(log, new SampledArrayTrainable(trainingData, supervisedNetwork, initialSampleSize, getBatchSize()), new ArrayTrainable(trainingData, supervisedNetwork, getBatchSize()), monitor);
log.code(() -> {
trainer.setTimeout(timeoutMinutes, TimeUnit.MINUTES).setMaxIterations(10000).run();
});
if (!history.isEmpty()) {
log.code(() -> {
return TestUtil.plot(history);
});
log.code(() -> {
return TestUtil.plotTime(history);
});
}
try {
@Nonnull String filename = log.getName() + "_" + ClassifyProblem.modelNo++ + "_plot.png";
ImageIO.write(Util.toImage(TestUtil.plot(history)), "png", log.file(filename));
@Nonnull File file = new File(log.getResourceDir(), filename);
log.appendFrontMatterProperty("result_plot", file.toString(), ";");
} catch (IOException e) {
throw new RuntimeException(e);
}
TestUtil.extractPerformance(log, supervisedNetwork);
@Nonnull final String modelName = "classification_model_" + ClassifyProblem.modelNo++ + ".json";
log.appendFrontMatterProperty("result_model", modelName, ";");
log.p("Saved model as " + log.file(network.getJson().toString(), modelName, modelName));
log.h3("Validation");
log.p("If we apply our model against the entire validation dataset, we get this accuracy:");
log.code(() -> {
return data.validationData().mapToDouble(labeledObject -> predict(network, labeledObject)[0] == parse(labeledObject.label) ? 1 : 0).average().getAsDouble() * 100;
});
log.p("Let's examine some incorrectly predicted results in more detail:");
log.code(() -> {
try {
@Nonnull final TableOutput table = new TableOutput();
Lists.partition(data.validationData().collect(Collectors.toList()), 100).stream().flatMap(batch -> {
@Nonnull TensorList batchIn = TensorArray.create(batch.stream().map(x -> x.data).toArray(i -> new Tensor[i]));
TensorList batchOut = network.eval(new ConstantResult(batchIn)).getData();
return IntStream.range(0, batchOut.length()).mapToObj(i -> toRow(log, batch.get(i), batchOut.get(i).getData()));
}).filter(x -> null != x).limit(10).forEach(table::putRow);
return table;
} catch (@Nonnull final IOException e) {
throw new RuntimeException(e);
}
});
return this;
}
use of com.simiacryptus.mindseye.layers.java.EntropyLossLayer in project MindsEye by SimiaCryptus.
the class BisectionLineSearchTest method train.
@Override
public void train(@Nonnull final NotebookOutput log, @Nonnull final Layer network, @Nonnull final Tensor[][] trainingData, final TrainingMonitor monitor) {
log.code(() -> {
@Nonnull final SimpleLossNetwork supervisedNetwork = new SimpleLossNetwork(network, new EntropyLossLayer());
@Nonnull final Trainable trainable = new SampledArrayTrainable(trainingData, supervisedNetwork, 1000);
return new IterativeTrainer(trainable).setMonitor(monitor).setOrientation(new GradientDescent()).setLineSearchFactory((@Nonnull final CharSequence name) -> new BisectionSearch()).setTimeout(3, TimeUnit.MINUTES).setMaxIterations(500).runAndFree();
});
}
use of com.simiacryptus.mindseye.layers.java.EntropyLossLayer in project MindsEye by SimiaCryptus.
the class LBFGSTest method train.
@Override
public void train(@Nonnull final NotebookOutput log, @Nonnull final Layer network, @Nonnull final Tensor[][] trainingData, final TrainingMonitor monitor) {
log.code(() -> {
@Nonnull final SimpleLossNetwork supervisedNetwork = new SimpleLossNetwork(network, new EntropyLossLayer());
@Nonnull ValidatingTrainer trainer = new ValidatingTrainer(new SampledArrayTrainable(trainingData, supervisedNetwork, 1000, 10000), new ArrayTrainable(trainingData, supervisedNetwork).cached()).setMonitor(monitor);
trainer.getRegimen().get(0).setOrientation(new LBFGS()).setLineSearchFactory(name -> name.toString().contains("LBFGS") ? new QuadraticSearch().setCurrentRate(1.0) : new QuadraticSearch());
return trainer.setTimeout(5, TimeUnit.MINUTES).setMaxIterations(500).run();
});
}
use of com.simiacryptus.mindseye.layers.java.EntropyLossLayer in project MindsEye by SimiaCryptus.
the class OWLQNTest method train.
@Override
public void train(@Nonnull final NotebookOutput log, @Nonnull final Layer network, @Nonnull final Tensor[][] trainingData, final TrainingMonitor monitor) {
log.code(() -> {
@Nonnull final SimpleLossNetwork supervisedNetwork = new SimpleLossNetwork(network, new EntropyLossLayer());
@Nonnull final Trainable trainable = new SampledArrayTrainable(trainingData, supervisedNetwork, 10000);
return new IterativeTrainer(trainable).setIterationsPerSample(100).setMonitor(monitor).setOrientation(new ValidatingOrientationWrapper(new OwlQn())).setTimeout(5, TimeUnit.MINUTES).setMaxIterations(500).runAndFree();
});
}
Aggregations