use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.
the class TransferLearningHelper method initHelperMLN.
private void initHelperMLN() {
if (applyFrozen) {
org.deeplearning4j.nn.api.Layer[] layers = origMLN.getLayers();
for (int i = frozenTill; i >= 0; i--) {
//unchecked?
layers[i] = new FrozenLayer(layers[i]);
}
origMLN.setLayers(layers);
}
for (int i = 0; i < origMLN.getnLayers(); i++) {
if (origMLN.getLayer(i) instanceof FrozenLayer) {
frozenInputLayer = i;
}
}
List<NeuralNetConfiguration> allConfs = new ArrayList<>();
for (int i = frozenInputLayer + 1; i < origMLN.getnLayers(); i++) {
allConfs.add(origMLN.getLayer(i).conf());
}
MultiLayerConfiguration c = origMLN.getLayerWiseConfigurations();
unFrozenSubsetMLN = new MultiLayerNetwork(new MultiLayerConfiguration.Builder().backprop(c.isBackprop()).inputPreProcessors(c.getInputPreProcessors()).pretrain(c.isPretrain()).backpropType(c.getBackpropType()).tBPTTForwardLength(c.getTbpttFwdLength()).tBPTTBackwardLength(c.getTbpttBackLength()).confs(allConfs).build());
unFrozenSubsetMLN.init();
//copy over params
for (int i = frozenInputLayer + 1; i < origMLN.getnLayers(); i++) {
unFrozenSubsetMLN.getLayer(i - frozenInputLayer - 1).setParams(origMLN.getLayer(i).params());
}
//unFrozenSubsetMLN.setListeners(origMLN.getListeners());
}
use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.
the class LayerUpdater method postApply.
/**
* Apply the regularization
*
* @param layer
* @param gradient
* @param param
*/
public void postApply(Layer layer, INDArray gradient, String param, int miniBatchSize) {
NeuralNetConfiguration conf = layer.conf();
INDArray params = layer.getParam(param);
if (conf.isUseRegularization() && conf.getL2ByParam(param) > 0)
//dC/dw = dC0/dw + lambda/n * w where C0 is pre-l2 cost function
gradient.addi(params.mul(conf.getL2ByParam(param)));
if (conf.isUseRegularization() && conf.getL1ByParam(param) > 0)
gradient.addi(Transforms.sign(params).muli(conf.getL1ByParam(param)));
if (conf.isMiniBatch())
gradient.divi(miniBatchSize);
}
use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.
the class LayerUpdater method applyLrDecayPolicy.
/**
* Update learning rate based on policy
*/
public void applyLrDecayPolicy(LearningRatePolicy decay, Layer layer, int iteration, String variable) {
NeuralNetConfiguration conf = layer.conf();
double decayRate = layer.conf().getLrPolicyDecayRate();
double lr = conf.getLearningRateByParam(variable);
switch(decay) {
case Exponential:
conf.setLearningRateByParam(variable, lr * Math.pow(decayRate, iteration));
break;
case Inverse:
conf.setLearningRateByParam(variable, lr / Math.pow((1 + decayRate * iteration), conf.getLrPolicyPower()));
break;
case Step:
conf.setLearningRateByParam(variable, lr * Math.pow(decayRate, Math.floor(iteration / conf.getLrPolicySteps())));
break;
case TorchStep:
if (iteration > 1 && conf.getLrPolicySteps() % iteration == 0)
conf.setLearningRateByParam(variable, lr * decayRate);
break;
case Poly:
conf.setLearningRateByParam(variable, lr * Math.pow((1 - ((double) iteration) / conf.getNumIterations()), conf.getLrPolicyPower()));
break;
case Sigmoid:
conf.setLearningRateByParam(variable, lr / (1 + Math.exp(-decayRate * (iteration - conf.getLrPolicySteps()))));
break;
case Schedule:
if (conf.getLayer().getLearningRateSchedule().containsKey(iteration))
conf.setLearningRateByParam(variable, conf.getLayer().getLearningRateSchedule().get(iteration));
break;
}
if (layer.conf().getLayer().getUpdater() == org.deeplearning4j.nn.conf.Updater.NESTEROVS) {
applyMomentumDecayPolicy(layer, iteration, variable);
} else if (updaterForVariable.get(variable) != null) {
updaterForVariable.get(variable).update(conf.getLearningRateByParam(variable));
}
}
use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.
the class TestDecayPolicies method testLearningRateSigmoidDecaySingleLayer.
@Test
public void testLearningRateSigmoidDecaySingleLayer() {
int iterations = 2;
double lr = 1e-2;
double decayRate = 2;
double steps = 3;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Sigmoid).lrPolicyDecayRate(decayRate).lrPolicySteps(steps).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
Gradient gradientActual = new DefaultGradient();
gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
for (int i = 0; i < iterations; i++) {
updater.update(layer, gradientActual, i, 1);
double expectedLr = calcSigmoidDecay(layer.conf().getLearningRateByParam("W"), decayRate, i, steps);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
}
}
use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.
the class TestDecayPolicies method testLearningRateExponentialDecaySingleLayer.
@Test
public void testLearningRateExponentialDecaySingleLayer() {
int iterations = 2;
double lr = 1e-2;
double decayRate = 2;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Exponential).lrPolicyDecayRate(decayRate).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
Gradient gradientActual = new DefaultGradient();
gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
for (int i = 0; i < iterations; i++) {
updater.update(layer, gradientActual, i, 1);
double expectedLr = calcExponentialDecay(lr, decayRate, i);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
}
}
Aggregations