use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.
the class ParameterServerParallelWrapperTest method testWrapper.
@Test
public void testWrapper() throws Exception {
int nChannels = 1;
int outputNum = 10;
// for GPU you usually want to have higher batchSize
int batchSize = 128;
int nEpochs = 10;
int iterations = 1;
int seed = 123;
log.info("Load data....");
DataSetIterator mnistTrain = new MnistDataSetIterator(batchSize, 1000);
DataSetIterator mnistTest = new MnistDataSetIterator(batchSize, false, 12345);
log.info("Build model....");
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations).regularization(true).l2(0.0005).learningRate(//.biasLearningRate(0.02)
0.01).weightInit(WeightInit.XAVIER).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS).momentum(0.9).list().layer(0, new ConvolutionLayer.Builder(5, 5).nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()).layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(2, 2).build()).layer(2, new ConvolutionLayer.Builder(5, 5).stride(1, 1).nOut(50).activation(Activation.IDENTITY).build()).layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(2, 2).build()).layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()).layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(outputNum).activation(Activation.SOFTMAX).build()).setInputType(InputType.convolutionalFlat(28, 28, 1)).backprop(true).pretrain(false);
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork model = new MultiLayerNetwork(conf);
model.init();
ParameterServerParallelWrapper parameterServerParallelWrapper = ParameterServerParallelWrapper.builder().model(model).multiLayerNetwork(model).numEpochs(10).numWorkers(Runtime.getRuntime().availableProcessors()).statusServerPort(33000).preFetchSize(3).build();
parameterServerParallelWrapper.fit(mnistTrain);
parameterServerParallelWrapper.close();
Thread.sleep(30000);
}
use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.
the class ModelSerializer method taskByModel.
/**
*
* @param model
* @return
*/
public static Task taskByModel(Model model) {
Task task = new Task();
try {
task.setArchitectureType(Task.ArchitectureType.RECURRENT);
if (model instanceof ComputationGraph) {
task.setNetworkType(Task.NetworkType.ComputationalGraph);
ComputationGraph network = (ComputationGraph) model;
try {
if (network.getLayers() != null && network.getLayers().length > 0) {
for (Layer layer : network.getLayers()) {
if (layer instanceof RBM || layer instanceof org.deeplearning4j.nn.layers.feedforward.rbm.RBM) {
task.setArchitectureType(Task.ArchitectureType.RBM);
break;
}
if (layer.type().equals(Layer.Type.CONVOLUTIONAL)) {
task.setArchitectureType(Task.ArchitectureType.CONVOLUTION);
break;
} else if (layer.type().equals(Layer.Type.RECURRENT) || layer.type().equals(Layer.Type.RECURSIVE)) {
task.setArchitectureType(Task.ArchitectureType.RECURRENT);
break;
}
}
} else
task.setArchitectureType(Task.ArchitectureType.UNKNOWN);
} catch (Exception e) {
// do nothing here
}
} else if (model instanceof MultiLayerNetwork) {
task.setNetworkType(Task.NetworkType.MultilayerNetwork);
MultiLayerNetwork network = (MultiLayerNetwork) model;
try {
if (network.getLayers() != null && network.getLayers().length > 0) {
for (Layer layer : network.getLayers()) {
if (layer instanceof RBM || layer instanceof org.deeplearning4j.nn.layers.feedforward.rbm.RBM) {
task.setArchitectureType(Task.ArchitectureType.RBM);
break;
}
if (layer.type().equals(Layer.Type.CONVOLUTIONAL)) {
task.setArchitectureType(Task.ArchitectureType.CONVOLUTION);
break;
} else if (layer.type().equals(Layer.Type.RECURRENT) || layer.type().equals(Layer.Type.RECURSIVE)) {
task.setArchitectureType(Task.ArchitectureType.RECURRENT);
break;
}
}
} else
task.setArchitectureType(Task.ArchitectureType.UNKNOWN);
} catch (Exception e) {
// do nothing here
}
}
return task;
} catch (Exception e) {
task.setArchitectureType(Task.ArchitectureType.UNKNOWN);
task.setNetworkType(Task.NetworkType.DenseNetwork);
return task;
}
}
use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.
the class GradientCheckTestsMasking method testBidirectionalLSTMMasking.
@Test
public void testBidirectionalLSTMMasking() {
//Basic test of GravesLSTM layer
Nd4j.getRandom().setSeed(12345L);
int timeSeriesLength = 5;
int nIn = 5;
int layerSize = 4;
int nOut = 3;
int miniBatchSize = 3;
INDArray[] masks = new INDArray[] { null, Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 1 } }), Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 0 }, { 1, 1, 1, 0, 0 } }), Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 0, 1, 1, 1, 1 }, { 0, 0, 1, 1, 1 } }) };
int testNum = 0;
for (INDArray mask : masks) {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0)).seed(12345L).list().layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new GravesBidirectionalLSTM.Builder().nIn(layerSize).nOut(layerSize).activation(Activation.TANH).build()).layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength; k++) {
input.putScalar(new int[] { i, j, k }, r.nextDouble() - 0.5);
}
}
}
INDArray labels = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
labels.putScalar(i, r.nextInt(nOut), j, 1.0);
}
}
mln.setLayerMaskArrays(mask, mask);
if (PRINT_RESULTS) {
System.out.println("testBidirectionalLSTMMasking() - testNum = " + testNum++);
for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
}
}
use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.
the class GradientCheckTestsMasking method testPerOutputMaskingRnn.
@Test
public void testPerOutputMaskingRnn() {
//For RNNs: per-output masking uses 3d masks (same shape as output/labels), as compared to the standard
// 2d masks (used for per *example* masking)
int nIn = 4;
int layerSize = 4;
int nOut = 4;
//1 example, TS length 3
INDArray mask1 = Nd4j.create(new double[] { 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0 }, new int[] { 1, nOut, 3 }, 'f');
//1 example, TS length 1
INDArray mask2 = Nd4j.create(new double[] { 1, 1, 0, 1 }, new int[] { 1, nOut, 1 }, 'f');
//3 examples, TS length 3
INDArray mask3 = Nd4j.create(new double[] { // step) followed by time index (least frequently)
1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0 }, new int[] { 3, nOut, 3 }, 'f');
INDArray[] labelMasks = new INDArray[] { mask1, mask2, mask3 };
ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), // new LossCosineProximity(), //Doesn't support per-output masking, as it doesn't make sense for cosine proximity
new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), // new LossMCXENT(), //Per output masking on MCXENT+Softmax: not yet supported
new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
Activation[] act = new Activation[] { //XENT
Activation.SIGMOID, //Hinge
Activation.TANH, //KLD
Activation.SIGMOID, //KLD + softmax
Activation.SOFTMAX, //L1
Activation.TANH, //L2
Activation.TANH, //MAE
Activation.TANH, //MAE + softmax
Activation.SOFTMAX, //MAPE
Activation.TANH, //MAPE + softmax
Activation.SOFTMAX, //MCXENT + sigmoid
Activation.SIGMOID, //MSE
Activation.TANH, //MSE + softmax
Activation.SOFTMAX, //MSLE - needs positive labels/activations (due to log)
Activation.SIGMOID, //MSLE + softmax
Activation.SOFTMAX, //NLL
Activation.SIGMOID, //Poisson
Activation.SIGMOID, //Squared hinge
Activation.TANH };
for (INDArray labelMask : labelMasks) {
int minibatch = labelMask.size(0);
int tsLength = labelMask.size(2);
for (int i = 0; i < lossFunctions.length; i++) {
ILossFunction lf = lossFunctions[i];
Activation a = act[i];
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setLayerMaskArrays(null, labelMask);
INDArray[] fl = LossFunctionGradientCheck.getFeaturesAndLabels(lf, new int[] { minibatch, nIn, tsLength }, new int[] { minibatch, nOut, tsLength }, 12345);
INDArray features = fl[0];
INDArray labels = fl[1];
String msg = "testPerOutputMaskingRnn(): maskShape = " + Arrays.toString(labelMask.shape()) + ", loss function = " + lf + ", activation = " + a;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, features, labels);
assertTrue(msg, gradOK);
//Check the equivalent compgraph:
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration cg = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 2)).seed(12345).graphBuilder().addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build(), "in").addLayer("1", new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build(), "0").setOutputs("1").build();
ComputationGraph graph = new ComputationGraph(cg);
graph.init();
net.setLayerMaskArrays(null, labelMask);
gradOK = GradientCheckUtil.checkGradients(graph, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { features }, new INDArray[] { labels });
assertTrue(msg + " (compgraph)", gradOK);
}
}
}
use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.
the class LossFunctionGradientCheck method lossFunctionGradientCheckLossLayer.
@Test
public void lossFunctionGradientCheckLossLayer() {
ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), new LossBinaryXENT(), new LossCosineProximity(), new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL1(), new LossL2(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
String[] outputActivationFn = new String[] { //xent
"sigmoid", //xent
"sigmoid", //cosine
"tanh", //hinge -> trying to predict 1 or -1
"tanh", //kld -> probab so should be between 0 and 1
"sigmoid", //kld + softmax
"softmax", //l1
"tanh", //l1 + softmax
"softmax", //l2
"tanh", //l2 + softmax
"softmax", //mae
"identity", //mae + softmax
"softmax", //mape
"identity", //mape + softmax
"softmax", //mcxent
"softmax", //mse
"identity", //mse + softmax
"softmax", //msle - requires positive labels/activations due to log
"sigmoid", //msle + softmax
"softmax", //nll
"sigmoid", //nll + softmax
"softmax", //poisson - requires positive predictions due to log... not sure if this is the best option
"sigmoid", //squared hinge
"tanh" };
int[] nOut = new int[] { //xent
1, //xent
3, //cosine
5, //hinge
3, //kld
3, //kld + softmax
3, //l1
3, //l1 + softmax
3, //l2
3, //l2 + softmax
3, //mae
3, //mae + softmax
3, //mape
3, //mape + softmax
3, //mcxent
3, //mse
3, //mse + softmax
3, //msle
3, //msle + softmax
3, //nll
3, //nll + softmax
3, //poisson
3, //squared hinge
3 };
int[] minibatchSizes = new int[] { 1, 3 };
// int[] minibatchSizes = new int[]{3};
List<String> passed = new ArrayList<>();
List<String> failed = new ArrayList<>();
for (int i = 0; i < lossFunctions.length; i++) {
for (int j = 0; j < minibatchSizes.length; j++) {
String testName = lossFunctions[i] + " - " + outputActivationFn[i] + " - minibatchSize = " + minibatchSizes[j];
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345).updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH).build()).layer(1, new LossLayer.Builder().lossFunction(lossFunctions[i]).activation(outputActivationFn[i]).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
assertTrue(((LossLayer) net.getLayer(1).conf().getLayer()).getLossFn().getClass() == lossFunctions[i].getClass());
INDArray[] inOut = getFeaturesAndLabels(lossFunctions[i], minibatchSizes[j], 4, nOut[i], 12345);
INDArray input = inOut[0];
INDArray labels = inOut[1];
log.info(" ***** Starting test: {} *****", testName);
// System.out.println(Arrays.toString(labels.data().asDouble()));
// System.out.println(Arrays.toString(net.output(input,false).data().asDouble()));
// System.out.println(net.score(new DataSet(input,labels)));
boolean gradOK;
try {
gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
} catch (Exception e) {
e.printStackTrace();
failed.add(testName + "\t" + "EXCEPTION");
continue;
}
if (gradOK) {
passed.add(testName);
} else {
failed.add(testName);
}
System.out.println("\n\n");
}
}
System.out.println("---- Passed ----");
for (String s : passed) {
System.out.println(s);
}
System.out.println("---- Failed ----");
for (String s : failed) {
System.out.println(s);
}
assertEquals("Tests failed", 0, failed.size());
}
Aggregations