use of org.nd4j.linalg.api.ops.impl.transforms.comparison.Eps in project deeplearning4j by deeplearning4j.
the class BackTrackLineSearch method optimize.
// returns fraction of step size if found a good step
// returns 0.0 if could not step in direction
// step == alam and score == f in book
/**
* @param parameters the parameters to optimize
* @param gradients the line/rate of change
* @param searchDirection the point for the line search to go in
* @return the next step size
* @throws InvalidStepException
*/
@Override
public double optimize(INDArray parameters, INDArray gradients, INDArray searchDirection) throws InvalidStepException {
double test, stepMin, step, step2, oldStep, tmpStep;
double rhs1, rhs2, a, b, disc, score, scoreAtStart, score2;
minObjectiveFunction = (stepFunction instanceof NegativeDefaultStepFunction || stepFunction instanceof NegativeGradientStepFunction);
Level1 l1Blas = Nd4j.getBlasWrapper().level1();
double sum = l1Blas.nrm2(searchDirection);
double slope = -1f * Nd4j.getBlasWrapper().dot(searchDirection, gradients);
log.debug("slope = {}", slope);
INDArray maxOldParams = abs(parameters);
Nd4j.getExecutioner().exec(new ScalarSetValue(maxOldParams, 1));
INDArray testMatrix = abs(gradients).divi(maxOldParams);
test = testMatrix.max(Integer.MAX_VALUE).getDouble(0);
// initially, step = 1.0, i.e. take full Newton step
step = 1.0;
// relative convergence tolerance
stepMin = relTolx / test;
oldStep = 0.0;
step2 = 0.0;
score = score2 = scoreAtStart = layer.score();
double bestScore = score;
double bestStepSize = 1.0;
if (log.isTraceEnabled()) {
double norm1 = l1Blas.asum(searchDirection);
int infNormIdx = l1Blas.iamax(searchDirection);
double infNorm = FastMath.max(Float.NEGATIVE_INFINITY, searchDirection.getDouble(infNormIdx));
log.trace("ENTERING BACKTRACK\n");
log.trace("Entering BackTrackLineSearch, value = " + scoreAtStart + ",\ndirection.oneNorm:" + norm1 + " direction.infNorm:" + infNorm);
}
if (sum > stepMax) {
log.warn("Attempted step too big. scaling: sum= {}, stepMax= {}", sum, stepMax);
searchDirection.muli(stepMax / sum);
}
// if (slope >= 0.0) {
// throw new InvalidStepException("Slope " + slope + " is >= 0.0. Expect slope < 0.0 when minimizing objective function");
// }
// find maximum lambda
// converge when (delta x) / x < REL_TOLX for all coordinates.
// the largest step size that triggers this threshold is precomputed and saved in stepMin
// look for step size in direction given by "line"
INDArray candidateParameters = null;
for (int iteration = 0; iteration < maxIterations; iteration++) {
if (log.isTraceEnabled()) {
log.trace("BackTrack loop iteration {} : step={}, oldStep={}", iteration, step, oldStep);
log.trace("before step, x.1norm: {} \nstep: {} \noldStep: {}", parameters.norm1(Integer.MAX_VALUE), step, oldStep);
}
if (step == oldStep)
throw new IllegalArgumentException("Current step == oldStep");
// step
candidateParameters = parameters.dup('f');
stepFunction.step(candidateParameters, searchDirection, step);
oldStep = step;
if (log.isTraceEnabled()) {
double norm1 = l1Blas.asum(candidateParameters);
log.trace("after step, x.1norm: " + norm1);
}
// check for convergence on delta x
if ((step < stepMin) || Nd4j.getExecutioner().execAndReturn(new Eps(parameters, candidateParameters, Shape.toOffsetZeroCopy(candidateParameters, 'f'), candidateParameters.length())).sum(Integer.MAX_VALUE).getDouble(0) == candidateParameters.length()) {
score = setScoreFor(parameters);
log.debug("EXITING BACKTRACK: Jump too small (stepMin = {}). Exiting and using original params. Score = {}", stepMin, score);
return 0.0;
}
score = setScoreFor(candidateParameters);
log.debug("Model score after step = {}", score);
//Score best step size for use if we terminate on maxIterations
if ((minObjectiveFunction && score < bestScore) || (!minObjectiveFunction && score > bestScore)) {
bestScore = score;
bestStepSize = step;
}
//Sufficient decrease in cost/loss function (Wolfe condition / Armijo condition)
if (minObjectiveFunction && score <= scoreAtStart + ALF * step * slope) {
log.debug("Sufficient decrease (Wolfe cond.), exiting backtrack on iter {}: score={}, scoreAtStart={}", iteration, score, scoreAtStart);
if (score > scoreAtStart)
throw new IllegalStateException("Function did not decrease: score = " + score + " > " + scoreAtStart + " = oldScore");
return step;
}
//Sufficient increase in cost/loss function (Wolfe condition / Armijo condition)
if (!minObjectiveFunction && score >= scoreAtStart + ALF * step * slope) {
log.debug("Sufficient increase (Wolfe cond.), exiting backtrack on iter {}: score={}, bestScore={}", iteration, score, scoreAtStart);
if (score < scoreAtStart)
throw new IllegalStateException("Function did not increase: score = " + score + " < " + scoreAtStart + " = scoreAtStart");
return step;
} else // if value is infinite, i.e. we've jumped to unstable territory, then scale down jump
if (Double.isInfinite(score) || Double.isInfinite(score2) || Double.isNaN(score) || Double.isNaN(score2)) {
log.warn("Value is infinite after jump. oldStep={}. score={}, score2={}. Scaling back step size...", oldStep, score, score2);
tmpStep = .2 * step;
if (step < stepMin) {
//convergence on delta x
score = setScoreFor(parameters);
log.warn("EXITING BACKTRACK: Jump too small (step={} < stepMin={}). Exiting and using previous parameters. Value={}", step, stepMin, score);
return 0.0;
}
} else if (minObjectiveFunction) {
if (// first time through
step == 1.0)
tmpStep = -slope / (2.0 * (score - scoreAtStart - slope));
else {
rhs1 = score - scoreAtStart - step * slope;
rhs2 = score2 - scoreAtStart - step2 * slope;
if (step == step2)
throw new IllegalStateException("FAILURE: dividing by step-step2 which equals 0. step=" + step);
double stepSquared = step * step;
double step2Squared = step2 * step2;
a = (rhs1 / stepSquared - rhs2 / step2Squared) / (step - step2);
b = (-step2 * rhs1 / stepSquared + step * rhs2 / step2Squared) / (step - step2);
if (a == 0.0)
tmpStep = -slope / (2.0 * b);
else {
disc = b * b - 3.0 * a * slope;
if (disc < 0.0) {
tmpStep = 0.5 * step;
} else if (b <= 0.0)
tmpStep = (-b + FastMath.sqrt(disc)) / (3.0 * a);
else
tmpStep = -slope / (b + FastMath.sqrt(disc));
}
if (tmpStep > 0.5 * step)
// lambda <= 0.5 lambda_1
tmpStep = 0.5 * step;
}
} else {
if (// first time through
step == 1.0)
tmpStep = -slope / (2.0 * (scoreAtStart - score - slope));
else {
rhs1 = scoreAtStart - score - step * slope;
rhs2 = scoreAtStart - score2 - step2 * slope;
if (step == step2)
throw new IllegalStateException("FAILURE: dividing by step-step2 which equals 0. step=" + step);
double stepSquared = step * step;
double step2Squared = step2 * step2;
a = (rhs1 / stepSquared - rhs2 / step2Squared) / (step - step2);
b = (-step2 * rhs1 / stepSquared + step * rhs2 / step2Squared) / (step - step2);
if (a == 0.0)
tmpStep = -slope / (2.0 * b);
else {
disc = b * b - 3.0 * a * slope;
if (disc < 0.0) {
tmpStep = 0.5 * step;
} else if (b <= 0.0)
tmpStep = (-b + FastMath.sqrt(disc)) / (3.0 * a);
else
tmpStep = -slope / (b + FastMath.sqrt(disc));
}
if (tmpStep > 0.5 * step)
// lambda <= 0.5 lambda_1
tmpStep = 0.5 * step;
}
}
step2 = step;
score2 = score;
log.debug("tmpStep: {}", tmpStep);
// lambda >= .1*Lambda_1
step = Math.max(tmpStep, .1f * step);
}
if (minObjectiveFunction && bestScore < scoreAtStart) {
//Return best step size
log.debug("Exited line search after maxIterations termination condition; bestStepSize={}, bestScore={}, scoreAtStart={}", bestStepSize, bestScore, scoreAtStart);
return bestStepSize;
} else if (!minObjectiveFunction && bestScore > scoreAtStart) {
//Return best step size
log.debug("Exited line search after maxIterations termination condition; bestStepSize={}, bestScore={}, scoreAtStart={}", bestStepSize, bestScore, scoreAtStart);
return bestStepSize;
} else {
log.debug("Exited line search after maxIterations termination condition; score did not improve (bestScore={}, scoreAtStart={}). Resetting parameters", bestScore, scoreAtStart);
setScoreFor(parameters);
return 0.0;
}
}
use of org.nd4j.linalg.api.ops.impl.transforms.comparison.Eps in project nd4j by deeplearning4j.
the class Nd4jTestsC method testEps3.
@Test
public void testEps3() {
INDArray first = Nd4j.linspace(1, 10, 10);
INDArray second = Nd4j.linspace(20, 30, 10);
INDArray expAllZeros = Nd4j.getExecutioner().execAndReturn(new Eps(first, second, Nd4j.create(10), 10));
INDArray expAllOnes = Nd4j.getExecutioner().execAndReturn(new Eps(first, first, Nd4j.create(10), 10));
System.out.println(expAllZeros);
System.out.println(expAllOnes);
assertEquals(0, expAllZeros.sumNumber().doubleValue(), 0.0);
assertEquals(10, expAllOnes.sumNumber().doubleValue(), 0.0);
}
use of org.nd4j.linalg.api.ops.impl.transforms.comparison.Eps in project nd4j by deeplearning4j.
the class Nd4jTestsC method testEps2.
@Test
public void testEps2() {
// 0.01
INDArray first = Nd4j.valueArrayOf(10, 1e-2);
// 0.0
INDArray second = Nd4j.zeros(10);
INDArray expAllZeros1 = Nd4j.getExecutioner().execAndReturn(new Eps(first, second, Nd4j.create(new int[] { 1, 10 }, 'f'), 10));
INDArray expAllZeros2 = Nd4j.getExecutioner().execAndReturn(new Eps(second, first, Nd4j.create(new int[] { 1, 10 }, 'f'), 10));
System.out.println(expAllZeros1);
System.out.println(expAllZeros2);
assertEquals(0, expAllZeros1.sumNumber().doubleValue(), 0.0);
assertEquals(0, expAllZeros2.sumNumber().doubleValue(), 0.0);
}
use of org.nd4j.linalg.api.ops.impl.transforms.comparison.Eps in project nd4j by deeplearning4j.
the class Nd4jTestsC method testEps.
@Test
public void testEps() {
INDArray ones = Nd4j.ones(5);
double sum = Nd4j.getExecutioner().exec(new Eps(ones, ones, ones, ones.length())).z().sumNumber().doubleValue();
assertEquals(5, sum, 1e-1);
}
use of org.nd4j.linalg.api.ops.impl.transforms.comparison.Eps in project nd4j by deeplearning4j.
the class NDArrayTestsFortran method testEps.
@Test
public void testEps() {
INDArray ones = Nd4j.ones(5);
double sum = Nd4j.getExecutioner().exec(new Eps(ones, ones, ones, ones.length())).z().sumNumber().doubleValue();
assertEquals(5, sum, 1e-1);
}
Aggregations