use of com.simiacryptus.mindseye.network.DAGNetwork in project MindsEye by SimiaCryptus.
the class ValidatingTrainer method run.
/**
* Run double.
*
* @return the double
*/
public double run() {
try {
final long timeoutAt = System.currentTimeMillis() + timeout.toMillis();
if (validationSubject.getLayer() instanceof DAGNetwork) {
((DAGNetwork) validationSubject.getLayer()).visitLayers(layer -> {
if (layer instanceof StochasticComponent)
((StochasticComponent) layer).clearNoise();
});
}
@Nonnull final EpochParams epochParams = new EpochParams(timeoutAt, epochIterations, getTrainingSize(), validationSubject.measure(monitor));
int epochNumber = 0;
int iterationNumber = 0;
int lastImprovement = 0;
double lowestValidation = Double.POSITIVE_INFINITY;
while (true) {
if (shouldHalt(monitor, timeoutAt)) {
monitor.log("Training halted");
break;
}
monitor.log(String.format("Epoch parameters: %s, %s", epochParams.trainingSize, epochParams.iterations));
@Nonnull final List<TrainingPhase> regimen = getRegimen();
final long seed = System.nanoTime();
final List<EpochResult> epochResults = IntStream.range(0, regimen.size()).mapToObj(i -> {
final TrainingPhase phase = getRegimen().get(i);
return runPhase(epochParams, phase, i, seed);
}).collect(Collectors.toList());
final EpochResult primaryPhase = epochResults.get(0);
iterationNumber += primaryPhase.iterations;
final double trainingDelta = primaryPhase.currentPoint.getMean() / primaryPhase.priorMean;
if (validationSubject.getLayer() instanceof DAGNetwork) {
((DAGNetwork) validationSubject.getLayer()).visitLayers(layer -> {
if (layer instanceof StochasticComponent)
((StochasticComponent) layer).clearNoise();
});
}
final PointSample currentValidation = validationSubject.measure(monitor);
final double overtraining = Math.log(trainingDelta) / Math.log(currentValidation.getMean() / epochParams.validation.getMean());
final double validationDelta = currentValidation.getMean() / epochParams.validation.getMean();
final double adj1 = Math.pow(Math.log(getTrainingTarget()) / Math.log(validationDelta), adjustmentFactor);
final double adj2 = Math.pow(overtraining / getOvertrainingTarget(), adjustmentFactor);
final double validationMean = currentValidation.getMean();
if (validationMean < lowestValidation) {
lowestValidation = validationMean;
lastImprovement = iterationNumber;
}
monitor.log(String.format("Epoch %d result apply %s iterations, %s/%s samples: {validation *= 2^%.5f; training *= 2^%.3f; Overtraining = %.2f}, {itr*=%.2f, len*=%.2f} %s since improvement; %.4f validation time", ++epochNumber, primaryPhase.iterations, epochParams.trainingSize, getMaxTrainingSize(), Math.log(validationDelta) / Math.log(2), Math.log(trainingDelta) / Math.log(2), overtraining, adj1, adj2, iterationNumber - lastImprovement, validatingMeasurementTime.getAndSet(0) / 1e9));
if (!primaryPhase.continueTraining) {
monitor.log(String.format("Training %d runPhase halted", epochNumber));
break;
}
if (epochParams.trainingSize >= getMaxTrainingSize()) {
final double roll = FastRandom.INSTANCE.random();
if (roll > Math.pow(2 - validationDelta, pessimism)) {
monitor.log(String.format("Training randomly converged: %3f", roll));
break;
} else {
if (iterationNumber - lastImprovement > improvmentStaleThreshold) {
if (disappointments.incrementAndGet() > getDisappointmentThreshold()) {
monitor.log(String.format("Training converged after %s iterations", iterationNumber - lastImprovement));
break;
} else {
monitor.log(String.format("Training failed to converged on %s attempt after %s iterations", disappointments.get(), iterationNumber - lastImprovement));
}
} else {
disappointments.set(0);
}
}
}
if (validationDelta < 1.0 && trainingDelta < 1.0) {
if (adj1 < 1 - adjustmentTolerance || adj1 > 1 + adjustmentTolerance) {
epochParams.iterations = Math.max(getMinEpochIterations(), Math.min(getMaxEpochIterations(), (int) (primaryPhase.iterations * adj1)));
}
if (adj2 < 1 + adjustmentTolerance || adj2 > 1 - adjustmentTolerance) {
epochParams.trainingSize = Math.max(0, Math.min(Math.max(getMinTrainingSize(), Math.min(getMaxTrainingSize(), (int) (epochParams.trainingSize * adj2))), epochParams.trainingSize));
}
} else {
epochParams.trainingSize = Math.max(0, Math.min(Math.max(getMinTrainingSize(), Math.min(getMaxTrainingSize(), epochParams.trainingSize * 5)), epochParams.trainingSize));
epochParams.iterations = 1;
}
epochParams.validation = currentValidation;
}
if (validationSubject.getLayer() instanceof DAGNetwork) {
((DAGNetwork) validationSubject.getLayer()).visitLayers(layer -> {
if (layer instanceof StochasticComponent)
((StochasticComponent) layer).clearNoise();
});
}
return epochParams.validation.getMean();
} catch (@Nonnull final Throwable e) {
throw new RuntimeException(e);
}
}
use of com.simiacryptus.mindseye.network.DAGNetwork in project MindsEye by SimiaCryptus.
the class StochasticSamplingSubnetLayer method eval.
@Nullable
@Override
public Result eval(@Nonnull final Result... inObj) {
Result[] counting = Arrays.stream(inObj).map(r -> {
return new CountingResult(r, samples);
}).toArray(i -> new Result[i]);
return average(Arrays.stream(getSeeds()).mapToObj(seed -> {
Layer inner = getInner();
if (inner instanceof DAGNetwork) {
((DAGNetwork) inner).visitNodes(node -> {
Layer layer = node.getLayer();
if (layer instanceof StochasticComponent) {
((StochasticComponent) layer).shuffle(seed);
}
if (layer instanceof MultiPrecision<?>) {
((MultiPrecision) layer).setPrecision(precision);
}
});
}
if (inner instanceof MultiPrecision<?>) {
((MultiPrecision) inner).setPrecision(precision);
}
if (inner instanceof StochasticComponent) {
((StochasticComponent) inner).shuffle(seed);
}
inner.setFrozen(isFrozen());
return inner.eval(counting);
}).toArray(i -> new Result[i]), precision);
}
use of com.simiacryptus.mindseye.network.DAGNetwork in project MindsEye by SimiaCryptus.
the class ExplodedConvolutionLeg method add.
/**
* Add dag node.
*
* @param input the input
* @return the dag node
*/
public DAGNode add(@Nonnull final DAGNode input) {
assertAlive();
DAGNetwork network = input.getNetwork();
DAGNode head = input;
final int[] filterDimensions = this.convolutionParams.masterFilterDimensions;
if (getInputBands() == this.convolutionParams.outputBands) {
assert 1 == subLayers.size();
head = network.add(subLayers.get(0), head);
} else {
head = network.wrap(new ImgConcatLayer().setMaxBands(this.convolutionParams.outputBands).setPrecision(this.convolutionParams.precision).setParallel(CudaSettings.INSTANCE.isConv_para_2()), subLayers.stream().map(l -> network.add(l, input)).toArray(i -> new DAGNode[i])).setParallel(CudaSettings.INSTANCE.isConv_para_2());
}
return head;
}
use of com.simiacryptus.mindseye.network.DAGNetwork in project MindsEye by SimiaCryptus.
the class ExplodedConvolutionGrid method add.
/**
* Add dag node.
*
* @param input the input
* @return the dag node
*/
public DAGNode add(@Nonnull DAGNode input) {
assertAlive();
DAGNetwork network = input.getNetwork();
int defaultPaddingX = 0;
int defaultPaddingY = 0;
boolean customPaddingX = this.convolutionParams.paddingX != null && convolutionParams.paddingX != defaultPaddingX;
boolean customPaddingY = this.convolutionParams.paddingY != null && convolutionParams.paddingY != defaultPaddingY;
final DAGNode paddedInput;
if (customPaddingX || customPaddingY) {
int x;
if (this.convolutionParams.paddingX < -defaultPaddingX) {
x = this.convolutionParams.paddingX + defaultPaddingX;
} else if (this.convolutionParams.paddingX > defaultPaddingX) {
x = this.convolutionParams.paddingX - defaultPaddingX;
} else {
x = 0;
}
int y;
if (this.convolutionParams.paddingY < -defaultPaddingY) {
y = this.convolutionParams.paddingY + defaultPaddingY;
} else if (this.convolutionParams.paddingY > defaultPaddingY) {
y = this.convolutionParams.paddingY - defaultPaddingY;
} else {
y = 0;
}
if (x != 0 || y != 0) {
paddedInput = network.wrap(new ImgZeroPaddingLayer(x, y).setPrecision(convolutionParams.precision), input);
} else {
paddedInput = input;
}
} else {
paddedInput = input;
}
InnerNode output;
if (subLayers.size() == 1) {
output = (InnerNode) subLayers.get(0).add(paddedInput);
} else {
ImgLinearSubnetLayer linearSubnetLayer = new ImgLinearSubnetLayer();
subLayers.forEach(leg -> {
PipelineNetwork subnet = new PipelineNetwork();
leg.add(subnet.getHead());
linearSubnetLayer.add(leg.fromBand, leg.toBand, subnet);
});
boolean isParallel = CudaSettings.INSTANCE.isConv_para_1();
linearSubnetLayer.setPrecision(convolutionParams.precision).setParallel(isParallel);
output = network.wrap(linearSubnetLayer, paddedInput).setParallel(isParallel);
}
if (customPaddingX || customPaddingY) {
int x = !customPaddingX ? 0 : (this.convolutionParams.paddingX - defaultPaddingX);
int y = !customPaddingY ? 0 : (this.convolutionParams.paddingY - defaultPaddingY);
if (x > 0)
x = 0;
if (y > 0)
y = 0;
if (x != 0 || y != 0) {
return network.wrap(new ImgZeroPaddingLayer(x, y).setPrecision(convolutionParams.precision), output);
}
}
return output;
}
use of com.simiacryptus.mindseye.network.DAGNetwork in project MindsEye by SimiaCryptus.
the class ImageClassifier method add.
/**
* Add.
*
* @param layer the layer
* @param model the model
* @return the layer
*/
@Nonnull
protected static Layer add(@Nonnull Layer layer, @Nonnull PipelineNetwork model) {
name(layer);
if (layer instanceof Explodable) {
Layer explode = ((Explodable) layer).explode();
try {
if (explode instanceof DAGNetwork) {
((DAGNetwork) explode).visitNodes(node -> name(node.getLayer()));
log.info(String.format("Exploded %s to %s (%s nodes)", layer.getName(), explode.getClass().getSimpleName(), ((DAGNetwork) explode).getNodes().size()));
} else {
log.info(String.format("Exploded %s to %s (%s nodes)", layer.getName(), explode.getClass().getSimpleName(), explode.getName()));
}
return add(explode, model);
} finally {
layer.freeRef();
}
} else {
model.wrap(layer);
return layer;
}
}
Aggregations