use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testPretrain.
@Test
public void testPretrain() {
double lr = 0.05;
gradient.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
Gradient gradientDup = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
gradientDup.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
Gradient gradientDup2 = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
gradientDup.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).seed(42).layer(new org.deeplearning4j.nn.conf.layers.RBM.Builder().lossFunction(LossFunctions.LossFunction.COSINE_PROXIMITY).activation(Activation.IDENTITY).updater(org.deeplearning4j.nn.conf.Updater.SGD).nIn(nIn).nOut(nOut).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
boolean preTrain = true;
conf.setPretrain(preTrain);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
updater.update(layer, gradient, -1, 1);
for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
val = entry.getValue();
gradExpected = val.mul(lr);
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
preTrain = false;
conf.setPretrain(preTrain);
gradient = gradientDup;
params = Nd4j.create(1, numParams);
layer = conf.getLayer().instantiate(conf, null, 0, params, true);
updater.update(layer, gradient, -1, 1);
for (Map.Entry<String, INDArray> entry : gradientDup2.gradientForVariable().entrySet()) {
val = entry.getValue();
if (entry.getKey() != "vb")
gradExpected = val.mul(lr);
else
gradExpected = val;
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
}
assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
}
use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testAdaDeltaUpdate.
@Test
public void testAdaDeltaUpdate() {
INDArray dxSquared;
Map<String, INDArray> msg = new HashMap<>();
Map<String, INDArray> msdx = new HashMap<>();
double rho = 0.85;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().rho(rho).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).epsilon(Nd4j.EPS_THRESHOLD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
int updaterStateSize = updater.stateSizeForLayer(layer);
INDArray updaterState = Nd4j.create(1, updaterStateSize);
updater.setStateViewArray(layer, updaterState, true);
Gradient gradientDup = new DefaultGradient();
gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
for (int i = 0; i < 2; i++) {
updater.update(layer, gradient, i, 1);
for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
key = entry.getKey();
val = entry.getValue();
INDArray msgTmp = msg.get(key);
INDArray msdxTmp = msdx.get(key);
if (msgTmp == null) {
msgTmp = Nd4j.zeros(val.shape());
msdxTmp = Nd4j.zeros(val.shape());
}
msgTmp.muli(rho);
msgTmp.addi(val.mul(val).muli(1 - rho));
gradExpected = Transforms.sqrt(msdxTmp.add(Nd4j.EPS_THRESHOLD)).divi(Transforms.sqrt(msgTmp.add(Nd4j.EPS_THRESHOLD))).muli(val);
gradientDup.setGradientFor(key, gradExpected);
assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
msdxTmp.muli(rho);
dxSquared = gradExpected.mul(gradExpected);
msdxTmp.addi(dxSquared.muli(1 - rho));
msg.put(key, msgTmp);
msdx.put(key, msdxTmp);
}
assertEquals(rho, layer.conf().getLayer().getRho(), 1e-4);
}
}
use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.
the class MultiLayerNetwork method init.
/**
* Initialize the MultiLayerNetwork, optionally with an existing parameters array.
* If an existing parameters array is specified, it will be used (and the values will not be modified) in the network;
* if no parameters array is specified, parameters will be initialized randomly according to the network configuration.
*
* @param parameters Network parameter. May be null. If null: randomly initialize.
* @param cloneParametersArray Whether the parameter array (if any) should be cloned, or used directly
*/
public void init(INDArray parameters, boolean cloneParametersArray) {
if (layerWiseConfigurations == null || layers == null)
intializeConfigurations();
if (initCalled)
return;
int nLayers = getnLayers();
if (nLayers < 1)
throw new IllegalStateException("Unable to create network: number of layers is less than 1");
if (this.layers == null || this.layers[0] == null) {
if (this.layers == null)
this.layers = new Layer[nLayers];
//First: Work out total length of (backprop) params
int paramLength = 0;
int[] nParamsPerLayer = new int[nLayers];
for (int i = 0; i < nLayers; i++) {
NeuralNetConfiguration conf = layerWiseConfigurations.getConf(i);
nParamsPerLayer[i] = conf.getLayer().initializer().numParams(conf);
paramLength += nParamsPerLayer[i];
}
//Create parameters array, if required
boolean initializeParams;
if (parameters != null) {
if (!parameters.isRowVector())
throw new IllegalArgumentException("Invalid parameters: should be a row vector");
if (parameters.length() != paramLength)
throw new IllegalArgumentException("Invalid parameters: expected length " + paramLength + ", got length " + parameters.length());
if (cloneParametersArray)
flattenedParams = parameters.dup();
else
flattenedParams = parameters;
initializeParams = false;
} else {
flattenedParams = Nd4j.create(1, paramLength);
initializeParams = true;
}
// construct multi-layer
int paramCountSoFar = 0;
for (int i = 0; i < nLayers; i++) {
INDArray paramsView;
if (nParamsPerLayer[i] > 0) {
paramsView = flattenedParams.get(NDArrayIndex.point(0), NDArrayIndex.interval(paramCountSoFar, paramCountSoFar + nParamsPerLayer[i]));
} else {
paramsView = null;
}
paramCountSoFar += nParamsPerLayer[i];
NeuralNetConfiguration conf = layerWiseConfigurations.getConf(i);
layers[i] = conf.getLayer().instantiate(conf, listeners, i, paramsView, initializeParams);
layerMap.put(conf.getLayer().getLayerName(), layers[i]);
}
initCalled = true;
}
//Set parameters in MultiLayerNetwork.defaultConfiguration for later use in BaseOptimizer.setupSearchState() etc
//Keyed as per backprop()
defaultConfiguration.clearVariables();
List<String> variables = defaultConfiguration.variables(false);
for (int i = 0; i < layers.length; i++) {
for (String s : layers[i].conf().variables()) {
variables.add(i + "_" + s);
}
}
}
use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.
the class MultiLayerNetwork method initGradientsView.
/**
* This method: initializes the flattened gradients array (used in backprop) and sets the appropriate subset in all layers.
* As a general rule, this shouldn't ever need to be called manually when doing training via fit(DataSet) or fit(DataSetIterator)
*/
public void initGradientsView() {
if (layers == null)
init();
int nLayers = layers.length;
//First: Work out total length of (backprop) params
int backpropParamLength = 0;
int[] nParamsPerLayer = new int[nLayers];
for (int i = 0; i < nLayers; i++) {
NeuralNetConfiguration conf = layerWiseConfigurations.getConf(i);
nParamsPerLayer[i] = layers[i].conf().getLayer().initializer().numParams(conf);
backpropParamLength += nParamsPerLayer[i];
}
//No need to initialize, as each layer will do it each iteration anyway
flattenedGradients = Nd4j.zeros(new int[] { 1, backpropParamLength }, 'f');
int backpropParamsSoFar = 0;
for (int i = 0; i < layers.length; i++) {
if (nParamsPerLayer[i] == 0)
//This layer doesn't have any parameters...
continue;
INDArray thisLayerGradView = flattenedGradients.get(NDArrayIndex.point(0), NDArrayIndex.interval(backpropParamsSoFar, backpropParamsSoFar + nParamsPerLayer[i]));
layers[i].setBackpropGradientsViewArray(thisLayerGradView);
backpropParamsSoFar += nParamsPerLayer[i];
}
}
use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.
the class IterativeReduceFlatMapAdapter method call.
@Override
public Iterable<INDArray> call(Iterator<DataSet> dataSetIterator) throws Exception {
if (!dataSetIterator.hasNext()) {
return Collections.singletonList(Nd4j.zeros(params.value().shape()));
}
List<DataSet> collect = new ArrayList<>();
while (dataSetIterator.hasNext()) {
collect.add(dataSetIterator.next());
}
DataSet data = DataSet.merge(collect, false);
log.debug("Training on " + data.labelCounts());
NeuralNetConfiguration conf = NeuralNetConfiguration.fromJson(json);
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray thisParams = Nd4j.create(1, numParams);
Layer network = conf.getLayer().instantiate(conf, null, 0, thisParams, true);
network.setBackpropGradientsViewArray(Nd4j.create(1, numParams));
INDArray val = params.value().unsafeDuplication();
if (val.length() != network.numParams())
throw new IllegalStateException("Network did not have same number of parameters as the broadcast set parameters");
network.setParams(val);
if (network instanceof OutputLayer) {
OutputLayer o = (OutputLayer) network;
o.fit(data);
} else
network.fit(data.getFeatureMatrix());
return Collections.singletonList(network.params());
}
Aggregations