use of com.simiacryptus.mindseye.eval.Trainable in project MindsEye by SimiaCryptus.
the class TrainingTester method trainLBFGS.
/**
* Train lbfgs list.
*
* @param log the log
* @param trainable the trainable
* @return the list
*/
@Nonnull
public List<StepRecord> trainLBFGS(@Nonnull final NotebookOutput log, final Trainable trainable) {
log.p("Next, we apply the same optimization using L-BFGS, which is nearly ideal for purely second-order or quadratic functions.");
@Nonnull final List<StepRecord> history = new ArrayList<>();
@Nonnull final TrainingMonitor monitor = TrainingTester.getMonitor(history);
try {
log.code(() -> {
return new IterativeTrainer(trainable).setLineSearchFactory(label -> new ArmijoWolfeSearch()).setOrientation(new LBFGS()).setMonitor(monitor).setTimeout(30, TimeUnit.SECONDS).setIterationsPerSample(100).setMaxIterations(250).setTerminateThreshold(0).runAndFree();
});
} catch (Throwable e) {
if (isThrowExceptions())
throw new RuntimeException(e);
}
return history;
}
use of com.simiacryptus.mindseye.eval.Trainable in project MindsEye by SimiaCryptus.
the class TrainingTester method train.
private List<StepRecord> train(@Nonnull NotebookOutput log, @Nonnull BiFunction<NotebookOutput, Trainable, List<StepRecord>> opt, @Nonnull Layer layer, @Nonnull Tensor[][] data, @Nonnull boolean... mask) {
try {
int inputs = data[0].length;
@Nonnull final PipelineNetwork network = new PipelineNetwork(inputs);
network.wrap(new MeanSqLossLayer(), network.add(layer, IntStream.range(0, inputs - 1).mapToObj(i -> network.getInput(i)).toArray(i -> new DAGNode[i])), network.getInput(inputs - 1));
@Nonnull ArrayTrainable trainable = new ArrayTrainable(data, network);
if (0 < mask.length)
trainable.setMask(mask);
List<StepRecord> history;
try {
history = opt.apply(log, trainable);
if (history.stream().mapToDouble(x -> x.fitness).min().orElse(1) > 1e-5) {
if (!network.isFrozen()) {
log.p("This training apply resulted in the following configuration:");
log.code(() -> {
return network.state().stream().map(Arrays::toString).reduce((a, b) -> a + "\n" + b).orElse("");
});
}
if (0 < mask.length) {
log.p("And regressed input:");
log.code(() -> {
return Arrays.stream(data).flatMap(x -> Arrays.stream(x)).limit(1).map(x -> x.prettyPrint()).reduce((a, b) -> a + "\n" + b).orElse("");
});
}
log.p("To produce the following output:");
log.code(() -> {
Result[] array = ConstantResult.batchResultArray(pop(data));
@Nullable Result eval = layer.eval(array);
for (@Nonnull Result result : array) {
result.freeRef();
result.getData().freeRef();
}
TensorList tensorList = eval.getData();
eval.freeRef();
String str = tensorList.stream().limit(1).map(x -> {
String s = x.prettyPrint();
x.freeRef();
return s;
}).reduce((a, b) -> a + "\n" + b).orElse("");
tensorList.freeRef();
return str;
});
} else {
log.p("Training Converged");
}
} finally {
trainable.freeRef();
network.freeRef();
}
return history;
} finally {
layer.freeRef();
for (@Nonnull Tensor[] tensors : data) {
for (@Nonnull Tensor tensor : tensors) {
tensor.freeRef();
}
}
}
}
use of com.simiacryptus.mindseye.eval.Trainable in project MindsEye by SimiaCryptus.
the class TrustSphereTest method train.
@Override
public void train(@Nonnull final NotebookOutput log, @Nonnull final Layer network, @Nonnull final Tensor[][] trainingData, final TrainingMonitor monitor) {
log.code(() -> {
@Nonnull final SimpleLossNetwork supervisedNetwork = new SimpleLossNetwork(network, new EntropyLossLayer());
@Nonnull final Trainable trainable = new SampledArrayTrainable(trainingData, supervisedNetwork, 10000);
@Nonnull final TrustRegionStrategy trustRegionStrategy = new TrustRegionStrategy() {
@Override
public TrustRegion getRegionPolicy(final Layer layer) {
return new AdaptiveTrustSphere();
}
};
return new IterativeTrainer(trainable).setIterationsPerSample(100).setMonitor(monitor).setOrientation(trustRegionStrategy).setTimeout(3, TimeUnit.MINUTES).setMaxIterations(500).runAndFree();
});
}
use of com.simiacryptus.mindseye.eval.Trainable in project MindsEye by SimiaCryptus.
the class QuadraticLineSearchTest method train.
@Override
public void train(@Nonnull final NotebookOutput log, @Nonnull final Layer network, @Nonnull final Tensor[][] trainingData, final TrainingMonitor monitor) {
log.code(() -> {
@Nonnull final SimpleLossNetwork supervisedNetwork = new SimpleLossNetwork(network, new EntropyLossLayer());
@Nonnull final Trainable trainable = new SampledArrayTrainable(trainingData, supervisedNetwork, 1000);
return new IterativeTrainer(trainable).setMonitor(monitor).setOrientation(new GradientDescent()).setLineSearchFactory((@Nonnull final CharSequence name) -> new QuadraticSearch()).setTimeout(3, TimeUnit.MINUTES).setMaxIterations(500).runAndFree();
});
}
use of com.simiacryptus.mindseye.eval.Trainable in project MindsEye by SimiaCryptus.
the class OwlQn method orient.
@Nonnull
@Override
public LineSearchCursor orient(final Trainable subject, @Nonnull final PointSample measurement, final TrainingMonitor monitor) {
@Nonnull final SimpleLineSearchCursor gradient = (SimpleLineSearchCursor) inner.orient(subject, measurement, monitor);
@Nonnull final DeltaSet<Layer> searchDirection = gradient.direction.copy();
@Nonnull final DeltaSet<Layer> orthant = new DeltaSet<Layer>();
for (@Nonnull final Layer layer : getLayers(gradient.direction.getMap().keySet())) {
final double[] weights = gradient.direction.getMap().get(layer).target;
@Nullable final double[] delta = gradient.direction.getMap().get(layer).getDelta();
@Nullable final double[] searchDir = searchDirection.get(layer, weights).getDelta();
@Nullable final double[] suborthant = orthant.get(layer, weights).getDelta();
for (int i = 0; i < searchDir.length; i++) {
final int positionSign = sign(weights[i]);
final int directionSign = sign(delta[i]);
suborthant[i] = 0 == positionSign ? directionSign : positionSign;
searchDir[i] += factor_L1 * (weights[i] < 0 ? -1.0 : 1.0);
if (sign(searchDir[i]) != directionSign) {
searchDir[i] = delta[i];
}
}
assert null != searchDir;
}
return new SimpleLineSearchCursor(subject, measurement, searchDirection) {
@Nonnull
@Override
public LineSearchPoint step(final double alpha, final TrainingMonitor monitor) {
origin.weights.stream().forEach(d -> d.restore());
@Nonnull final DeltaSet<Layer> currentDirection = direction.copy();
direction.getMap().forEach((layer, buffer) -> {
if (null == buffer.getDelta())
return;
@Nullable final double[] currentDelta = currentDirection.get(layer, buffer.target).getDelta();
for (int i = 0; i < buffer.getDelta().length; i++) {
final double prevValue = buffer.target[i];
final double newValue = prevValue + buffer.getDelta()[i] * alpha;
if (sign(prevValue) != 0 && sign(prevValue) != sign(newValue)) {
currentDelta[i] = 0;
buffer.target[i] = 0;
} else {
buffer.target[i] = newValue;
}
}
});
@Nonnull final PointSample measure = subject.measure(monitor).setRate(alpha);
return new LineSearchPoint(measure, currentDirection.dot(measure.delta));
}
}.setDirectionType("OWL/QN");
}
Aggregations