use of org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor in project deeplearning4j by deeplearning4j.
the class EmbeddingLayerTest method testEmbeddingLayerWithMasking.
@Test
public void testEmbeddingLayerWithMasking() {
//Idea: have masking on the input with an embedding and dense layers on input
//Ensure that the parameter gradients for the inputs don't depend on the inputs when inputs are masked
int[] miniBatchSizes = { 1, 2, 5 };
int nIn = 2;
Random r = new Random(12345);
int numInputClasses = 10;
int timeSeriesLength = 5;
for (int nExamples : miniBatchSizes) {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).list().layer(0, new EmbeddingLayer.Builder().activation(Activation.TANH).nIn(numInputClasses).nOut(5).build()).layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build()).layer(2, new GravesLSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).build()).layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3).nOut(4).build()).inputPreProcessor(0, new RnnToFeedForwardPreProcessor()).inputPreProcessor(2, new FeedForwardToRnnPreProcessor()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).list().layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(numInputClasses).nOut(5).build()).layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build()).layer(2, new GravesLSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).build()).layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3).nOut(4).build()).inputPreProcessor(0, new RnnToFeedForwardPreProcessor()).inputPreProcessor(2, new FeedForwardToRnnPreProcessor()).build();
MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
net2.init();
net2.setParams(net.params().dup());
INDArray inEmbedding = Nd4j.zeros(nExamples, 1, timeSeriesLength);
INDArray inDense = Nd4j.zeros(nExamples, numInputClasses, timeSeriesLength);
INDArray labels = Nd4j.zeros(nExamples, 4, timeSeriesLength);
for (int i = 0; i < nExamples; i++) {
for (int j = 0; j < timeSeriesLength; j++) {
int inIdx = r.nextInt(numInputClasses);
inEmbedding.putScalar(new int[] { i, 0, j }, inIdx);
inDense.putScalar(new int[] { i, inIdx, j }, 1.0);
int outIdx = r.nextInt(4);
labels.putScalar(new int[] { i, outIdx, j }, 1.0);
}
}
INDArray inputMask = Nd4j.zeros(nExamples, timeSeriesLength);
for (int i = 0; i < nExamples; i++) {
for (int j = 0; j < timeSeriesLength; j++) {
inputMask.putScalar(new int[] { i, j }, (r.nextBoolean() ? 1.0 : 0.0));
}
}
net.setLayerMaskArrays(inputMask, null);
net2.setLayerMaskArrays(inputMask, null);
List<INDArray> actEmbedding = net.feedForward(inEmbedding, false);
List<INDArray> actDense = net2.feedForward(inDense, false);
for (int i = 1; i < actEmbedding.size(); i++) {
assertEquals(actDense.get(i), actEmbedding.get(i));
}
net.setLabels(labels);
net2.setLabels(labels);
net.computeGradientAndScore();
net2.computeGradientAndScore();
System.out.println(net.score() + "\t" + net2.score());
assertEquals(net2.score(), net.score(), 1e-5);
Map<String, INDArray> gradients = net.gradient().gradientForVariable();
Map<String, INDArray> gradients2 = net2.gradient().gradientForVariable();
assertEquals(gradients.keySet(), gradients2.keySet());
for (String s : gradients.keySet()) {
assertEquals(gradients2.get(s), gradients.get(s));
}
}
}
use of org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor in project deeplearning4j by deeplearning4j.
the class TestVariableLengthTSCG method testInputMasking.
@Test
public void testInputMasking() {
//Idea: have masking on the input with 2 dense layers on input
//Ensure that the parameter gradients for the inputs don't depend on the inputs when inputs are masked
int[] miniBatchSizes = { 1, 2, 5 };
int nIn = 2;
Random r = new Random(12345);
for (int nExamples : miniBatchSizes) {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).graphBuilder().addInputs("in").addLayer("0", new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "in").addLayer("1", new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "0").addLayer("2", new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "1").addLayer("3", new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(1).build(), "2").setOutputs("3").inputPreProcessor("0", new RnnToFeedForwardPreProcessor()).inputPreProcessor("2", new FeedForwardToRnnPreProcessor()).build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
INDArray in1 = Nd4j.rand(new int[] { nExamples, 2, 4 });
INDArray in2 = Nd4j.rand(new int[] { nExamples, 2, 5 });
in2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, in1);
assertEquals(in1, in2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
INDArray labels1 = Nd4j.rand(new int[] { nExamples, 1, 4 });
INDArray labels2 = Nd4j.create(nExamples, 1, 5);
labels2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, labels1);
assertEquals(labels1, labels2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
INDArray inputMask = Nd4j.ones(nExamples, 5);
for (int j = 0; j < nExamples; j++) {
inputMask.putScalar(new int[] { j, 4 }, 0);
}
net.setInput(0, in1);
net.setLabel(0, labels1);
net.computeGradientAndScore();
double score1 = net.score();
Gradient g1 = net.gradient();
Map<String, INDArray> map = g1.gradientForVariable();
for (String s : map.keySet()) {
//Gradients are views; need to dup otherwise they will be modified by next computeGradientAndScore
map.put(s, map.get(s).dup());
}
net.setInput(0, in2);
net.setLabel(0, labels2);
net.setLayerMaskArrays(new INDArray[] { inputMask }, null);
net.computeGradientAndScore();
double score2 = net.score();
Gradient g2 = net.gradient();
Map<String, INDArray> activations2 = net.feedForward();
//Scores should differ here: masking the input, not the output. Therefore 4 vs. 5 time step outputs
assertNotEquals(score1, score2, 0.01);
Map<String, INDArray> g1map = g1.gradientForVariable();
Map<String, INDArray> g2map = g2.gradientForVariable();
for (String s : g1map.keySet()) {
INDArray g1s = g1map.get(s);
INDArray g2s = g2map.get(s);
assertNotEquals(s, g1s, g2s);
}
//Modify the values at the masked time step, and check that neither the gradients, score or activations change
for (int j = 0; j < nExamples; j++) {
for (int k = 0; k < nIn; k++) {
in2.putScalar(new int[] { j, k, 4 }, r.nextDouble());
}
net.setInput(0, in2);
net.computeGradientAndScore();
double score2a = net.score();
Gradient g2a = net.gradient();
assertEquals(score2, score2a, 1e-12);
for (String s : g2.gradientForVariable().keySet()) {
assertEquals(g2.getGradientFor(s), g2a.getGradientFor(s));
}
Map<String, INDArray> activations2a = net.feedForward();
for (String s : activations2.keySet()) {
assertEquals(activations2.get(s), activations2a.get(s));
}
}
//Finally: check that the activations for the first two (dense) layers are zero at the appropriate time step
FeedForwardToRnnPreProcessor temp = new FeedForwardToRnnPreProcessor();
INDArray l0Before = activations2.get("0");
INDArray l1Before = activations2.get("1");
INDArray l0After = temp.preProcess(l0Before, nExamples);
INDArray l1After = temp.preProcess(l1Before, nExamples);
for (int j = 0; j < nExamples; j++) {
for (int k = 0; k < nIn; k++) {
assertEquals(0.0, l0After.getDouble(j, k, 4), 0.0);
assertEquals(0.0, l1After.getDouble(j, k, 4), 0.0);
}
}
}
}
use of org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor in project deeplearning4j by deeplearning4j.
the class MultiLayerTestRNN method testRnnTimeStepGravesLSTM.
@Test
public void testRnnTimeStepGravesLSTM() {
Nd4j.getRandom().setSeed(12345);
int timeSeriesLength = 12;
//4 layer network: 2 GravesLSTM + DenseLayer + RnnOutputLayer. Hence also tests preprocessors.
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(2, new DenseLayer.Builder().nIn(8).nOut(9).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(3, new RnnOutputLayer.Builder(LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(4).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).inputPreProcessor(2, new RnnToFeedForwardPreProcessor()).inputPreProcessor(3, new FeedForwardToRnnPreProcessor()).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
INDArray input = Nd4j.rand(new int[] { 3, 5, timeSeriesLength });
List<INDArray> allOutputActivations = mln.feedForward(input, true);
INDArray fullOutL0 = allOutputActivations.get(1);
INDArray fullOutL1 = allOutputActivations.get(2);
INDArray fullOutL3 = allOutputActivations.get(4);
int[] inputLengths = { 1, 2, 3, 4, 6, 12 };
//Should get the same result regardless of step size; should be identical to standard forward pass
for (int i = 0; i < inputLengths.length; i++) {
int inLength = inputLengths[i];
//each of length inLength
int nSteps = timeSeriesLength / inLength;
mln.rnnClearPreviousState();
//Reset; should be set by rnnTimeStep method
mln.setInputMiniBatchSize(1);
for (int j = 0; j < nSteps; j++) {
int startTimeRange = j * inLength;
int endTimeRange = startTimeRange + inLength;
INDArray inputSubset;
if (inLength == 1) {
//Workaround to nd4j bug
int[] sizes = new int[] { input.size(0), input.size(1), 1 };
inputSubset = Nd4j.create(sizes);
inputSubset.tensorAlongDimension(0, 1, 0).assign(input.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
} else {
inputSubset = input.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
}
if (inLength > 1)
assertTrue(inputSubset.size(2) == inLength);
INDArray out = mln.rnnTimeStep(inputSubset);
INDArray expOutSubset;
if (inLength == 1) {
int[] sizes = new int[] { fullOutL3.size(0), fullOutL3.size(1), 1 };
expOutSubset = Nd4j.create(sizes);
expOutSubset.tensorAlongDimension(0, 1, 0).assign(fullOutL3.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
} else {
expOutSubset = fullOutL3.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
}
assertEquals(expOutSubset, out);
Map<String, INDArray> currL0State = mln.rnnGetPreviousState(0);
Map<String, INDArray> currL1State = mln.rnnGetPreviousState(1);
INDArray lastActL0 = currL0State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
INDArray lastActL1 = currL1State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
INDArray expLastActL0 = fullOutL0.tensorAlongDimension(endTimeRange - 1, 1, 0);
INDArray expLastActL1 = fullOutL1.tensorAlongDimension(endTimeRange - 1, 1, 0);
assertEquals(expLastActL0, lastActL0);
assertEquals(expLastActL1, lastActL1);
}
}
}
use of org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor in project deeplearning4j by deeplearning4j.
the class MultiLayerTestRNN method testRnnTimeStepWithPreprocessorGraph.
@Test
public void testRnnTimeStepWithPreprocessorGraph() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10).nOut(10).activation(Activation.TANH).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10).nOut(10).activation(Activation.TANH).build(), "0").addLayer("2", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build(), "1").setOutputs("2").inputPreProcessor("0", new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
INDArray in = Nd4j.rand(1, 10);
net.rnnTimeStep(in);
}
use of org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor in project deeplearning4j by deeplearning4j.
the class TransferLearningMLNTest method testRemoveAndProcessing.
@Test
public void testRemoveAndProcessing() {
int V_WIDTH = 130;
int V_HEIGHT = 130;
int V_NFRAMES = 150;
MultiLayerConfiguration confForArchitecture = //l2 regularization on all layers
new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l2(0.001).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.4).list().layer(0, //3 channels: RGB
new ConvolutionLayer.Builder(10, 10).nIn(3).nOut(30).stride(4, 4).activation(Activation.RELU).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).build()).layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3, 3).stride(2, 2).build()).layer(2, new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2).activation(Activation.RELU).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).build()).layer(3, new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.5).build()).layer(4, new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50).nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.6).build()).layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(50).nOut(//4 possible shapes: circle, square, arc, line
4).updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).build()).inputPreProcessor(0, new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3)).inputPreProcessor(3, new CnnToFeedForwardPreProcessor(7, 7, 10)).inputPreProcessor(4, new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(V_NFRAMES / 5).tBPTTBackwardLength(V_NFRAMES / 5).build();
MultiLayerNetwork modelExpectedArch = new MultiLayerNetwork(confForArchitecture);
modelExpectedArch.init();
MultiLayerNetwork modelToTweak = new MultiLayerNetwork(new NeuralNetConfiguration.Builder().seed(12345).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(//change learning rate
0.1).updater(// change updater
Updater.RMSPROP).list().layer(0, //Only keep the first layer the same
new ConvolutionLayer.Builder(10, 10).nIn(//3 channels: RGB
3).nOut(30).stride(4, 4).activation(Activation.RELU).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).build()).layer(1, new SubsamplingLayer.Builder(//change kernel size
SubsamplingLayer.PoolingType.MAX).kernelSize(5, 5).stride(2, 2).build()).layer(2, //change here
new ConvolutionLayer.Builder(6, 6).nIn(30).nOut(10).stride(2, 2).activation(Activation.RELU).weightInit(WeightInit.RELU).build()).layer(3, //change here
new DenseLayer.Builder().activation(Activation.RELU).nIn(250).nOut(50).weightInit(WeightInit.RELU).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.01).build()).layer(4, //change here
new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50).nOut(25).weightInit(WeightInit.XAVIER).build()).layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(25).nOut(4).weightInit(WeightInit.XAVIER).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).build()).inputPreProcessor(0, new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3)).inputPreProcessor(3, new CnnToFeedForwardPreProcessor(5, 5, 10)).inputPreProcessor(4, new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(V_NFRAMES / 5).tBPTTBackwardLength(V_NFRAMES / 5).build());
modelToTweak.init();
MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToTweak).fineTuneConfiguration(//l2 regularization on all layers
new FineTuneConfiguration.Builder().seed(12345).regularization(true).l2(0.001).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.ADAGRAD).weightInit(WeightInit.RELU).iterations(1).learningRate(0.4).build()).removeLayersFromOutput(5).addLayer(new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3, 3).stride(2, 2).build()).addLayer(new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2).activation(Activation.RELU).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).build()).addLayer(new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.5).build()).addLayer(new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50).nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.6).build()).addLayer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(50).nOut(//4 possible shapes: circle, square, arc, line
4).updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).build()).setInputPreProcessor(3, new CnnToFeedForwardPreProcessor(7, 7, 10)).setInputPreProcessor(4, new FeedForwardToRnnPreProcessor()).build();
//modelNow should have the same architecture as modelExpectedArch
assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(0).toJson(), modelNow.getLayerWiseConfigurations().getConf(0).toJson());
//some learning related info the subsampling layer will not be overwritten
//assertTrue(modelExpectedArch.getLayerWiseConfigurations().getConf(1).toJson().equals(modelNow.getLayerWiseConfigurations().getConf(1).toJson()));
assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(2).toJson(), modelNow.getLayerWiseConfigurations().getConf(2).toJson());
assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(3).toJson(), modelNow.getLayerWiseConfigurations().getConf(3).toJson());
assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(4).toJson(), modelNow.getLayerWiseConfigurations().getConf(4).toJson());
assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(5).toJson(), modelNow.getLayerWiseConfigurations().getConf(5).toJson());
assertArrayEquals(modelExpectedArch.params().shape(), modelNow.params().shape());
assertArrayEquals(modelExpectedArch.getLayer(0).params().shape(), modelNow.getLayer(0).params().shape());
//subsampling has no params
//assertArrayEquals(modelExpectedArch.getLayer(1).params().shape(), modelNow.getLayer(1).params().shape());
assertArrayEquals(modelExpectedArch.getLayer(2).params().shape(), modelNow.getLayer(2).params().shape());
assertArrayEquals(modelExpectedArch.getLayer(3).params().shape(), modelNow.getLayer(3).params().shape());
assertArrayEquals(modelExpectedArch.getLayer(4).params().shape(), modelNow.getLayer(4).params().shape());
assertArrayEquals(modelExpectedArch.getLayer(5).params().shape(), modelNow.getLayer(5).params().shape());
}
Aggregations