use of org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator in project deeplearning4j by deeplearning4j.
the class TestComputationGraphNetwork method testOptimizationAlgorithmsSearchBasic.
@Test
public void testOptimizationAlgorithmsSearchBasic() {
DataSetIterator iter = new IrisDataSetIterator(1, 1);
OptimizationAlgorithm[] oas = new OptimizationAlgorithm[] { OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT, OptimizationAlgorithm.LINE_GRADIENT_DESCENT, OptimizationAlgorithm.CONJUGATE_GRADIENT, OptimizationAlgorithm.LBFGS };
for (OptimizationAlgorithm oa : oas) {
System.out.println(oa);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(oa).iterations(1).graphBuilder().addInputs("input").addLayer("first", new DenseLayer.Builder().nIn(4).nOut(5).build(), "input").addLayer("output", new OutputLayer.Builder().nIn(5).nOut(3).build(), "first").setOutputs("output").pretrain(false).backprop(true).build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
net.fit(iter.next());
}
}
use of org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator in project deeplearning4j by deeplearning4j.
the class BackPropMLPTest method testMLP.
@Test
public void testMLP() {
//Simple mini-batch test with multiple hidden layers
MultiLayerConfiguration conf = getIrisMLPSimpleConfig(new int[] { 5, 4, 3 }, Activation.SIGMOID);
System.out.println(conf);
MultiLayerNetwork network = new MultiLayerNetwork(conf);
network.init();
DataSetIterator iter = new IrisDataSetIterator(10, 100);
while (iter.hasNext()) {
network.fit(iter.next());
}
}
use of org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator in project deeplearning4j by deeplearning4j.
the class BackPropMLPTest method testSingleExampleWeightUpdates.
@Test
public void testSingleExampleWeightUpdates() {
//Simplest possible case: 1 hidden layer, 1 hidden neuron, batch size of 1.
//Manually calculate weight updates (entirely outside of DL4J and ND4J)
// and compare expected and actual weights after backprop
DataSetIterator iris = new IrisDataSetIterator(1, 10);
MultiLayerNetwork network = new MultiLayerNetwork(getIrisMLPSimpleConfig(new int[] { 1 }, Activation.SIGMOID));
network.init();
Layer[] layers = network.getLayers();
final boolean printCalculations = true;
while (iris.hasNext()) {
DataSet data = iris.next();
INDArray x = data.getFeatureMatrix();
INDArray y = data.getLabels();
float[] xFloat = asFloat(x);
float[] yFloat = asFloat(y);
//Do forward pass:
//Hidden layer
INDArray l1Weights = layers[0].getParam(DefaultParamInitializer.WEIGHT_KEY).dup();
//Output layer
INDArray l2Weights = layers[1].getParam(DefaultParamInitializer.WEIGHT_KEY).dup();
INDArray l1Bias = layers[0].getParam(DefaultParamInitializer.BIAS_KEY).dup();
INDArray l2Bias = layers[1].getParam(DefaultParamInitializer.BIAS_KEY).dup();
float[] l1WeightsFloat = asFloat(l1Weights);
float[] l2WeightsFloat = asFloat(l2Weights);
float l1BiasFloat = l1Bias.getFloat(0);
float[] l2BiasFloatArray = asFloat(l2Bias);
//z=w*x+b
float hiddenUnitPreSigmoid = dotProduct(l1WeightsFloat, xFloat) + l1BiasFloat;
//a=sigma(z)
float hiddenUnitPostSigmoid = sigmoid(hiddenUnitPreSigmoid);
float[] outputPreSoftmax = new float[3];
//Normally a matrix multiplication here, but only one hidden unit in this trivial example
for (int i = 0; i < 3; i++) {
outputPreSoftmax[i] = hiddenUnitPostSigmoid * l2WeightsFloat[i] + l2BiasFloatArray[i];
}
float[] outputPostSoftmax = softmax(outputPreSoftmax);
//Do backward pass:
//out-labels
float[] deltaOut = vectorDifference(outputPostSoftmax, yFloat);
//deltaHidden = sigmaPrime(hiddenUnitZ) * sum_k (w_jk * \delta_k); here, only one j
float deltaHidden = 0.0f;
for (int i = 0; i < 3; i++) deltaHidden += l2WeightsFloat[i] * deltaOut[i];
deltaHidden *= derivOfSigmoid(hiddenUnitPreSigmoid);
//Calculate weight/bias updates:
//dL/dw = delta * (activation of prev. layer)
//dL/db = delta
float[] dLdwOut = new float[3];
for (int i = 0; i < dLdwOut.length; i++) dLdwOut[i] = deltaOut[i] * hiddenUnitPostSigmoid;
float[] dLdwHidden = new float[4];
for (int i = 0; i < dLdwHidden.length; i++) dLdwHidden[i] = deltaHidden * xFloat[i];
float[] dLdbOut = deltaOut;
float dLdbHidden = deltaHidden;
if (printCalculations) {
System.out.println("deltaOut = " + Arrays.toString(deltaOut));
System.out.println("deltaHidden = " + deltaHidden);
System.out.println("dLdwOut = " + Arrays.toString(dLdwOut));
System.out.println("dLdbOut = " + Arrays.toString(dLdbOut));
System.out.println("dLdwHidden = " + Arrays.toString(dLdwHidden));
System.out.println("dLdbHidden = " + dLdbHidden);
}
//Calculate new parameters:
//w_i = w_i - (learningRate)/(batchSize) * sum_j (dL_j/dw_i)
//b_i = b_i - (learningRate)/(batchSize) * sum_j (dL_j/db_i)
//Which for batch size of one (here) is simply:
//w_i = w_i - learningRate * dL/dw
//b_i = b_i - learningRate * dL/db
float[] expectedL1WeightsAfter = new float[4];
float[] expectedL2WeightsAfter = new float[3];
float expectedL1BiasAfter = l1BiasFloat - 0.1f * dLdbHidden;
float[] expectedL2BiasAfter = new float[3];
for (int i = 0; i < 4; i++) expectedL1WeightsAfter[i] = l1WeightsFloat[i] - 0.1f * dLdwHidden[i];
for (int i = 0; i < 3; i++) expectedL2WeightsAfter[i] = l2WeightsFloat[i] - 0.1f * dLdwOut[i];
for (int i = 0; i < 3; i++) expectedL2BiasAfter[i] = l2BiasFloatArray[i] - 0.1f * dLdbOut[i];
//Finally, do back-prop on network, and compare parameters vs. expected parameters
network.fit(data);
/* INDArray l1WeightsAfter = layers[0].getParam(DefaultParamInitializer.WEIGHT_KEY).dup(); //Hidden layer
INDArray l2WeightsAfter = layers[1].getParam(DefaultParamInitializer.WEIGHT_KEY).dup(); //Output layer
INDArray l1BiasAfter = layers[0].getParam(DefaultParamInitializer.BIAS_KEY).dup();
INDArray l2BiasAfter = layers[1].getParam(DefaultParamInitializer.BIAS_KEY).dup();
float[] l1WeightsFloatAfter = asFloat(l1WeightsAfter);
float[] l2WeightsFloatAfter = asFloat(l2WeightsAfter);
float l1BiasFloatAfter = l1BiasAfter.getFloat(0);
float[] l2BiasFloatAfter = asFloat(l2BiasAfter);
if( printCalculations) {
System.out.println("Expected L1 weights = " + Arrays.toString(expectedL1WeightsAfter));
System.out.println("Actual L1 weights = " + Arrays.toString(asFloat(l1WeightsAfter)));
System.out.println("Expected L2 weights = " + Arrays.toString(expectedL2WeightsAfter));
System.out.println("Actual L2 weights = " + Arrays.toString(asFloat(l2WeightsAfter)));
System.out.println("Expected L1 bias = " + expectedL1BiasAfter);
System.out.println("Actual L1 bias = " + Arrays.toString(asFloat(l1BiasAfter)));
System.out.println("Expected L2 bias = " + Arrays.toString(expectedL2BiasAfter));
System.out.println("Actual L2 bias = " + Arrays.toString(asFloat(l2BiasAfter)));
}
float eps = 1e-4f;
assertArrayEquals(l1WeightsFloatAfter,expectedL1WeightsAfter,eps);
assertArrayEquals(l2WeightsFloatAfter,expectedL2WeightsAfter,eps);
assertEquals(l1BiasFloatAfter,expectedL1BiasAfter,eps);
assertArrayEquals(l2BiasFloatAfter,expectedL2BiasAfter,eps);
*/
System.out.println("\n\n--------------");
}
}
use of org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator in project deeplearning4j by deeplearning4j.
the class BackPropMLPTest method testIrisMiniBatchGradients.
private static void testIrisMiniBatchGradients(int miniBatchSize, int[] hiddenLayerSizes, Activation activationFunction) {
int totalExamples = 10 * miniBatchSize;
if (totalExamples > 150) {
totalExamples = miniBatchSize * (150 / miniBatchSize);
}
if (miniBatchSize > 150) {
fail();
}
DataSetIterator iris = new IrisDataSetIterator(miniBatchSize, totalExamples);
MultiLayerNetwork network = new MultiLayerNetwork(getIrisMLPSimpleConfig(hiddenLayerSizes, Activation.SIGMOID));
network.init();
Layer[] layers = network.getLayers();
int nLayers = layers.length;
while (iris.hasNext()) {
DataSet data = iris.next();
INDArray x = data.getFeatureMatrix();
INDArray y = data.getLabels();
//Do forward pass:
INDArray[] layerWeights = new INDArray[nLayers];
INDArray[] layerBiases = new INDArray[nLayers];
for (int i = 0; i < nLayers; i++) {
layerWeights[i] = layers[i].getParam(DefaultParamInitializer.WEIGHT_KEY).dup();
layerBiases[i] = layers[i].getParam(DefaultParamInitializer.BIAS_KEY).dup();
}
INDArray[] layerZs = new INDArray[nLayers];
INDArray[] layerActivations = new INDArray[nLayers];
for (int i = 0; i < nLayers; i++) {
INDArray layerInput = (i == 0 ? x : layerActivations[i - 1]);
layerZs[i] = layerInput.mmul(layerWeights[i]).addiRowVector(layerBiases[i]);
layerActivations[i] = (i == nLayers - 1 ? doSoftmax(layerZs[i].dup()) : doSigmoid(layerZs[i].dup()));
}
//Do backward pass:
INDArray[] deltas = new INDArray[nLayers];
//Out - labels; shape=[miniBatchSize,nOut];
deltas[nLayers - 1] = layerActivations[nLayers - 1].sub(y);
assertArrayEquals(deltas[nLayers - 1].shape(), new int[] { miniBatchSize, 3 });
for (int i = nLayers - 2; i >= 0; i--) {
INDArray sigmaPrimeOfZ;
sigmaPrimeOfZ = doSigmoidDerivative(layerZs[i]);
INDArray epsilon = layerWeights[i + 1].mmul(deltas[i + 1].transpose()).transpose();
deltas[i] = epsilon.mul(sigmaPrimeOfZ);
assertArrayEquals(deltas[i].shape(), new int[] { miniBatchSize, hiddenLayerSizes[i] });
}
INDArray[] dLdw = new INDArray[nLayers];
INDArray[] dLdb = new INDArray[nLayers];
for (int i = 0; i < nLayers; i++) {
INDArray prevActivations = (i == 0 ? x : layerActivations[i - 1]);
//Raw gradients, so not yet divided by mini-batch size (division is done in BaseUpdater)
//Shape: [nIn, nOut]
dLdw[i] = deltas[i].transpose().mmul(prevActivations).transpose();
//Shape: [1,nOut]
dLdb[i] = deltas[i].sum(0);
int nIn = (i == 0 ? 4 : hiddenLayerSizes[i - 1]);
int nOut = (i < nLayers - 1 ? hiddenLayerSizes[i] : 3);
assertArrayEquals(dLdw[i].shape(), new int[] { nIn, nOut });
assertArrayEquals(dLdb[i].shape(), new int[] { 1, nOut });
}
//Calculate and get gradient, compare to expected
network.setInput(x);
network.setLabels(y);
network.computeGradientAndScore();
Gradient gradient = network.gradientAndScore().getFirst();
float eps = 1e-4f;
for (int i = 0; i < hiddenLayerSizes.length; i++) {
String wKey = i + "_" + DefaultParamInitializer.WEIGHT_KEY;
String bKey = i + "_" + DefaultParamInitializer.BIAS_KEY;
INDArray wGrad = gradient.getGradientFor(wKey);
INDArray bGrad = gradient.getGradientFor(bKey);
float[] wGradf = asFloat(wGrad);
float[] bGradf = asFloat(bGrad);
float[] expWGradf = asFloat(dLdw[i]);
float[] expBGradf = asFloat(dLdb[i]);
assertArrayEquals(wGradf, expWGradf, eps);
assertArrayEquals(bGradf, expBGradf, eps);
}
}
}
use of org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator in project deeplearning4j by deeplearning4j.
the class MultiLayerTest method testGradientUpdate.
@Test
public void testGradientUpdate() throws Exception {
DataSetIterator iter = new IrisDataSetIterator(1, 1);
Gradient expectedGradient = new DefaultGradient();
expectedGradient.setGradientFor("0_W", Nd4j.ones(4, 5));
expectedGradient.setGradientFor("0_b", Nd4j.ones(1, 5));
expectedGradient.setGradientFor("1_W", Nd4j.ones(5, 3));
expectedGradient.setGradientFor("1_b", Nd4j.ones(1, 3));
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(org.deeplearning4j.nn.conf.Updater.SGD).learningRate(1).activation(Activation.RELU).weightInit(WeightInit.XAVIER).list().layer(0, new DenseLayer.Builder().name("dnn1").nIn(4).nOut(5).build()).layer(1, new OutputLayer.Builder().name("output").nIn(5).nOut(3).activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER).build()).backprop(true).pretrain(false).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.fit(iter.next());
// TODO validate actual layer gradientView - issue getting var out of BaseLayer w/o adding MLN getter that gets confused with local gradient vars
Gradient actualGradient = net.gradient;
assertNotEquals(expectedGradient.getGradientFor("0_W"), actualGradient.getGradientFor("0_W"));
net.update(expectedGradient);
actualGradient = net.gradient;
assertEquals(expectedGradient.getGradientFor("0_W"), actualGradient.getGradientFor("0_W"));
// Update params with set
net.setParam("0_W", Nd4j.ones(4, 5));
net.setParam("0_b", Nd4j.ones(1, 5));
net.setParam("1_W", Nd4j.ones(5, 3));
net.setParam("1_b", Nd4j.ones(1, 3));
INDArray actualParams = net.params();
// Confirm params
assertEquals(expectedGradient.gradient(), actualParams);
net.update(expectedGradient);
actualParams = net.params();
assertEquals(Nd4j.ones(1, 43).addi(1), actualParams);
}
Aggregations