use of com.simiacryptus.mindseye.layers.java.SoftmaxActivationLayer in project MindsEye by SimiaCryptus.
the class MnistTestBase method buildModel.
/**
* Build model dag network.
*
* @param log the log
* @return the dag network
*/
public DAGNetwork buildModel(@Nonnull final NotebookOutput log) {
log.h1("Model");
log.p("This is a very simple model that performs basic logistic regression. " + "It is expected to be trainable to about 91% accuracy on MNIST.");
return log.code(() -> {
@Nonnull final PipelineNetwork network = new PipelineNetwork();
network.add(new BiasLayer(28, 28, 1));
network.add(new FullyConnectedLayer(new int[] { 28, 28, 1 }, new int[] { 10 }).set(() -> 0.001 * (Math.random() - 0.45)));
network.add(new SoftmaxActivationLayer());
return network;
});
}
use of com.simiacryptus.mindseye.layers.java.SoftmaxActivationLayer in project MindsEye by SimiaCryptus.
the class RecursiveSubspaceTest method buildModel.
@Override
public DAGNetwork buildModel(@Nonnull NotebookOutput log) {
log.h3("Model");
log.p("We use a multi-level convolution network");
return log.code(() -> {
@Nonnull final PipelineNetwork network = new PipelineNetwork();
double weight = 1e-3;
@Nonnull DoubleSupplier init = () -> weight * (Math.random() - 0.5);
network.add(new ConvolutionLayer(3, 3, 1, 5).set(init));
network.add(new ImgBandBiasLayer(5));
network.add(new PoolingLayer().setMode(PoolingLayer.PoolingMode.Max));
network.add(new ActivationLayer(ActivationLayer.Mode.RELU));
network.add(newNormalizationLayer());
network.add(new ConvolutionLayer(3, 3, 5, 5).set(init));
network.add(new ImgBandBiasLayer(5));
network.add(new PoolingLayer().setMode(PoolingLayer.PoolingMode.Max));
network.add(new ActivationLayer(ActivationLayer.Mode.RELU));
network.add(newNormalizationLayer());
network.add(new BiasLayer(7, 7, 5));
network.add(new FullyConnectedLayer(new int[] { 7, 7, 5 }, new int[] { 10 }).set(init));
network.add(new SoftmaxActivationLayer());
return network;
});
}
use of com.simiacryptus.mindseye.layers.java.SoftmaxActivationLayer in project MindsEye by SimiaCryptus.
the class ConvPipelineTest method buildList_1.
/**
* Build list 1 nn layer [ ].
*
* @return the nn layer [ ]
*/
public static Layer[] buildList_1() {
@Nonnull final ArrayList<Layer> network = new ArrayList<Layer>();
network.add(new ConvolutionLayer(3, 3, 3, 10).set(i -> 1e-8 * (Math.random() - 0.5)));
network.add(new PoolingLayer().setMode(PoolingLayer.PoolingMode.Max));
network.add(new ReLuActivationLayer());
network.add(new ImgCropLayer(126, 126));
network.add(new ConvolutionLayer(3, 3, 10, 20).set(i -> 1e-8 * (Math.random() - 0.5)));
network.add(new PoolingLayer().setMode(PoolingLayer.PoolingMode.Max));
network.add(new ReLuActivationLayer());
network.add(new ImgCropLayer(62, 62));
network.add(new ConvolutionLayer(5, 5, 20, 30).set(i -> 1e-8 * (Math.random() - 0.5)));
network.add(new PoolingLayer().setMode(PoolingLayer.PoolingMode.Max));
network.add(new ReLuActivationLayer());
network.add(new ImgCropLayer(18, 18));
network.add(new ConvolutionLayer(3, 3, 30, 40).set(i -> 1e-8 * (Math.random() - 0.5)));
network.add(new PoolingLayer().setWindowX(4).setWindowY(4).setMode(PoolingLayer.PoolingMode.Avg));
network.add(new ReLuActivationLayer());
network.add(new ImgCropLayer(4, 4));
network.add(new ImgBandBiasLayer(40));
network.add(new FullyConnectedLayer(new int[] { 4, 4, 40 }, new int[] { 100 }).set(() -> 0.001 * (Math.random() - 0.45)));
network.add(new SoftmaxActivationLayer());
return network.toArray(new Layer[] {});
}
use of com.simiacryptus.mindseye.layers.java.SoftmaxActivationLayer in project MindsEye by SimiaCryptus.
the class EncodingProblem method run.
@Nonnull
@Override
public EncodingProblem run(@Nonnull final NotebookOutput log) {
@Nonnull final TrainingMonitor monitor = TestUtil.getMonitor(history);
Tensor[][] trainingData;
try {
trainingData = data.trainingData().map(labeledObject -> {
return new Tensor[] { new Tensor(features).set(this::random), labeledObject.data };
}).toArray(i -> new Tensor[i][]);
} catch (@Nonnull final IOException e) {
throw new RuntimeException(e);
}
@Nonnull final DAGNetwork imageNetwork = revFactory.vectorToImage(log, features);
log.h3("Network Diagram");
log.code(() -> {
return Graphviz.fromGraph(TestUtil.toGraph(imageNetwork)).height(400).width(600).render(Format.PNG).toImage();
});
@Nonnull final PipelineNetwork trainingNetwork = new PipelineNetwork(2);
@Nullable final DAGNode image = trainingNetwork.add(imageNetwork, trainingNetwork.getInput(0));
@Nullable final DAGNode softmax = trainingNetwork.add(new SoftmaxActivationLayer(), trainingNetwork.getInput(0));
trainingNetwork.add(new SumInputsLayer(), trainingNetwork.add(new EntropyLossLayer(), softmax, softmax), trainingNetwork.add(new NthPowerActivationLayer().setPower(1.0 / 2.0), trainingNetwork.add(new MeanSqLossLayer(), image, trainingNetwork.getInput(1))));
log.h3("Training");
log.p("We start by training apply a very small population to improve initial convergence performance:");
TestUtil.instrumentPerformance(trainingNetwork);
@Nonnull final Tensor[][] primingData = Arrays.copyOfRange(trainingData, 0, 1000);
@Nonnull final ValidatingTrainer preTrainer = optimizer.train(log, (SampledTrainable) new SampledArrayTrainable(primingData, trainingNetwork, trainingSize, batchSize).setMinSamples(trainingSize).setMask(true, false), new ArrayTrainable(primingData, trainingNetwork, batchSize), monitor);
log.code(() -> {
preTrainer.setTimeout(timeoutMinutes / 2, TimeUnit.MINUTES).setMaxIterations(batchSize).run();
});
TestUtil.extractPerformance(log, trainingNetwork);
log.p("Then our main training phase:");
TestUtil.instrumentPerformance(trainingNetwork);
@Nonnull final ValidatingTrainer mainTrainer = optimizer.train(log, (SampledTrainable) new SampledArrayTrainable(trainingData, trainingNetwork, trainingSize, batchSize).setMinSamples(trainingSize).setMask(true, false), new ArrayTrainable(trainingData, trainingNetwork, batchSize), monitor);
log.code(() -> {
mainTrainer.setTimeout(timeoutMinutes, TimeUnit.MINUTES).setMaxIterations(batchSize).run();
});
TestUtil.extractPerformance(log, trainingNetwork);
if (!history.isEmpty()) {
log.code(() -> {
return TestUtil.plot(history);
});
log.code(() -> {
return TestUtil.plotTime(history);
});
}
try {
@Nonnull String filename = log.getName().toString() + EncodingProblem.modelNo++ + "_plot.png";
ImageIO.write(Util.toImage(TestUtil.plot(history)), "png", log.file(filename));
log.appendFrontMatterProperty("result_plot", filename, ";");
} catch (IOException e) {
throw new RuntimeException(e);
}
// log.file()
@Nonnull final String modelName = "encoding_model_" + EncodingProblem.modelNo++ + ".json";
log.appendFrontMatterProperty("result_model", modelName, ";");
log.p("Saved model as " + log.file(trainingNetwork.getJson().toString(), modelName, modelName));
log.h3("Results");
@Nonnull final PipelineNetwork testNetwork = new PipelineNetwork(2);
testNetwork.add(imageNetwork, testNetwork.getInput(0));
log.code(() -> {
@Nonnull final TableOutput table = new TableOutput();
Arrays.stream(trainingData).map(tensorArray -> {
@Nullable final Tensor predictionSignal = testNetwork.eval(tensorArray).getData().get(0);
@Nonnull final LinkedHashMap<CharSequence, Object> row = new LinkedHashMap<>();
row.put("Source", log.image(tensorArray[1].toImage(), ""));
row.put("Echo", log.image(predictionSignal.toImage(), ""));
return row;
}).filter(x -> null != x).limit(10).forEach(table::putRow);
return table;
});
log.p("Learned Model Statistics:");
log.code(() -> {
@Nonnull final ScalarStatistics scalarStatistics = new ScalarStatistics();
trainingNetwork.state().stream().flatMapToDouble(x -> Arrays.stream(x)).forEach(v -> scalarStatistics.add(v));
return scalarStatistics.getMetrics();
});
log.p("Learned Representation Statistics:");
log.code(() -> {
@Nonnull final ScalarStatistics scalarStatistics = new ScalarStatistics();
Arrays.stream(trainingData).flatMapToDouble(row -> Arrays.stream(row[0].getData())).forEach(v -> scalarStatistics.add(v));
return scalarStatistics.getMetrics();
});
log.p("Some rendered unit vectors:");
for (int featureNumber = 0; featureNumber < features; featureNumber++) {
@Nonnull final Tensor input = new Tensor(features).set(featureNumber, 1);
@Nullable final Tensor tensor = imageNetwork.eval(input).getData().get(0);
TestUtil.renderToImages(tensor, true).forEach(img -> {
log.out(log.image(img, ""));
});
}
return this;
}
Aggregations