use of org.nd4j.linalg.api.ops.impl.transforms.comparison.OldMax in project nd4j by deeplearning4j.
the class AdaMaxUpdater method applyUpdater.
/**
* Calculate the update based on the given gradient
*
* @param gradient the gradient to get the update for
* @param iteration
* @return the gradient
*/
@Override
public void applyUpdater(INDArray gradient, int iteration, int epoch) {
if (m == null || u == null)
throw new IllegalStateException("Updater has not been initialized with view state");
// m = B_1 * m + (1-B_1)*grad
m.muli(config.getBeta1()).addi(gradient.mul(1 - config.getBeta1()));
// u = max(B_2 * u, |grad|)
u.muli(config.getBeta2());
// In-place should be OK here, original gradient values aren't used again later
Transforms.abs(gradient, false);
Nd4j.getExecutioner().exec(new OldMax(u, gradient, u, u.length()));
double beta1t = FastMath.pow(config.getBeta1(), iteration + 1);
double learningRate = config.getLearningRate(iteration, epoch);
double alphat = learningRate / (1.0 - beta1t);
if (Double.isNaN(alphat) || Double.isInfinite(alphat) || alphat == 0.0) {
alphat = config.getEpsilon();
}
// prevent NaNs in params
u.addi(1e-32);
gradient.assign(m).muli(alphat).divi(u);
}
use of org.nd4j.linalg.api.ops.impl.transforms.comparison.OldMax in project nd4j by deeplearning4j.
the class GradCheckTransforms method testPairwiseTransforms.
@Test
public void testPairwiseTransforms() {
/*
add, sub, mul, div, rsub, rdiv
eq, neq, gt, lt, gte, lte, or, and, xor
min, max
mmul
tensormmul
*/
// Test transforms (pairwise)
Nd4j.getRandom().setSeed(12345);
List<String> allSkipped = new ArrayList<>();
List<String> allFailed = new ArrayList<>();
for (int i = 0; i < 23; i++) {
boolean skipBackward = false;
SameDiff sd = SameDiff.create();
int nOut = 4;
int minibatch = 5;
SDVariable in1 = sd.var("in1", new int[] { -1, nOut });
SDVariable in2 = sd.var("in2", new int[] { -1, nOut });
INDArray ia = Nd4j.randn(minibatch, nOut);
INDArray ib = Nd4j.randn(minibatch, nOut);
SDVariable t;
INDArray expOut;
switch(i) {
case 0:
t = in1.add(in2);
expOut = ia.add(ib);
break;
case 1:
t = in1.sub(in2);
expOut = ia.sub(ib);
break;
case 2:
t = in1.mul(in2);
expOut = ia.mul(ib);
break;
case 3:
// break;
continue;
case 4:
t = in1.rsub(in2);
expOut = ia.rsub(ib);
break;
case 5:
t = in1.rdiv(in2);
expOut = ia.rdiv(ib);
break;
case 6:
t = sd.eq(in1, in2);
expOut = ia.eq(ib);
break;
case 7:
t = sd.neq(in1, in2);
expOut = ia.neq(ib);
break;
case 8:
t = sd.gt(in1, in2);
expOut = ia.gt(ib);
break;
case 9:
t = sd.lt(in1, in2);
expOut = ia.lt(ib);
break;
case 10:
t = sd.gte(in1, in2);
expOut = ia.dup();
Nd4j.getExecutioner().exec(new GreaterThanOrEqual(new INDArray[] { ia, ib }, new INDArray[] { expOut }));
break;
case 11:
t = sd.lte(in1, in2);
expOut = ia.dup();
Nd4j.getExecutioner().exec(new LessThanOrEqual(new INDArray[] { ia, ib }, new INDArray[] { expOut }));
break;
case 12:
ia = Nd4j.getExecutioner().exec(new BernoulliDistribution(ia, 0.5));
ib = Nd4j.getExecutioner().exec(new BernoulliDistribution(ib, 0.5));
t = sd.or(in1, in2);
expOut = Transforms.or(ia, ib);
break;
case 13:
ib = Nd4j.randn(nOut, nOut);
t = sd.mmul(in1, in2);
expOut = ia.mmul(ib);
break;
case 14:
t = sd.max(in1, in2);
expOut = Nd4j.getExecutioner().execAndReturn(new OldMax(ia, ib, ia.dup(), ia.length()));
break;
case 15:
t = sd.min(in1, in2);
expOut = Nd4j.getExecutioner().execAndReturn(new OldMin(ia, ib, ia.dup(), ia.length()));
break;
case 16:
ia = Nd4j.getExecutioner().exec(new BernoulliDistribution(ia, 0.5));
ib = Nd4j.getExecutioner().exec(new BernoulliDistribution(ib, 0.5));
t = sd.and(in1, in2);
expOut = Transforms.and(ia, ib);
break;
case 17:
ia = Nd4j.getExecutioner().exec(new BernoulliDistribution(ia, 0.5));
ib = Nd4j.getExecutioner().exec(new BernoulliDistribution(ib, 0.5));
t = sd.xor(in1, in2);
expOut = Transforms.xor(ia, ib);
break;
case 18:
t = sd.assign(in1, in2);
expOut = ib;
break;
case 19:
t = sd.atan2(in1, in2);
// Note: y,x order for samediff; x,y order for transforms
expOut = Transforms.atan2(ib, ia);
skipBackward = true;
break;
case 20:
t = sd.mergeAdd(in1, in2, in2);
expOut = ia.add(ib).add(ib);
break;
case 21:
ia = Nd4j.create(new float[] { 2, 4 });
ib = Nd4j.create(new float[] { 42, 2 });
in1 = sd.var("in1", new int[] { 1, 2 });
in2 = sd.var("in2", new int[] { 1, 2 });
t = in1.truncatedDiv(in2);
expOut = Nd4j.create(ia.shape(), ia.ordering());
Nd4j.getExecutioner().exec(new TruncateDivOp(ia, ib, expOut));
skipBackward = true;
break;
case 22:
t = in1.squaredDifference(in2);
expOut = Nd4j.create(ia.shape(), ia.ordering());
DynamicCustomOp squareDiff = DynamicCustomOp.builder("squaredsubtract").addInputs(ia, ib).addOutputs(expOut).build();
Nd4j.getExecutioner().exec(squareDiff);
skipBackward = true;
break;
default:
throw new RuntimeException();
}
DifferentialFunction[] funcs = sd.functions();
String name = funcs[0].opName();
String msg = "test: " + i + " - " + name;
log.info("*** Starting test: " + msg);
SDVariable loss = sd.mean("loss", t);
sd.associateArrayWithVariable(ia, in1);
sd.associateArrayWithVariable(ib, in2);
sd.exec();
INDArray out = t.getArr();
assertEquals(msg, expOut, out);
boolean ok;
if (skipBackward) {
ok = true;
msg += " - SKIPPED";
allSkipped.add(msg);
} else {
try {
ok = GradCheckUtil.checkGradients(sd);
} catch (Exception e) {
e.printStackTrace();
msg += " - EXCEPTION";
ok = false;
}
}
if (!ok) {
allFailed.add(msg);
}
}
if (allSkipped.size() > 0) {
log.info("All backward skipped transforms: " + allSkipped);
log.info(allSkipped.size() + " backward passes were skipped.");
}
if (allFailed.size() > 0) {
log.error("All failed transforms: " + allFailed);
fail(allFailed.size() + " transforms failed");
}
}
Aggregations