use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class TestParallelEarlyStopping method testEarlyStoppingEveryNEpoch.
// parallel training results vary wildly with expected result
// need to determine if this test is feasible, and how it should
// be properly designed
// @Test
// public void testEarlyStoppingIris(){
// MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
// .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
// .updater(Updater.SGD)
// .weightInit(WeightInit.XAVIER)
// .list()
// .layer(0,new OutputLayer.Builder().nIn(4).nOut(3).lossFunction(LossFunctions.LossFunction.MCXENT).build())
// .pretrain(false).backprop(true)
// .build();
// MultiLayerNetwork net = new MultiLayerNetwork(conf);
// net.setListeners(new ScoreIterationListener(1));
//
// DataSetIterator irisIter = new IrisDataSetIterator(50,600);
// EarlyStoppingModelSaver<MultiLayerNetwork> saver = new InMemoryModelSaver<>();
// EarlyStoppingConfiguration<MultiLayerNetwork> esConf = new EarlyStoppingConfiguration.Builder<MultiLayerNetwork>()
// .epochTerminationConditions(new MaxEpochsTerminationCondition(5))
// .evaluateEveryNEpochs(1)
// .iterationTerminationConditions(new MaxTimeIterationTerminationCondition(1, TimeUnit.MINUTES))
// .scoreCalculator(new DataSetLossCalculator(irisIter,true))
// .modelSaver(saver)
// .build();
//
// IEarlyStoppingTrainer<MultiLayerNetwork> trainer = new EarlyStoppingParallelTrainer<>(esConf,net,irisIter,null,2,2,1);
//
// EarlyStoppingResult<MultiLayerNetwork> result = trainer.fit();
// System.out.println(result);
//
// assertEquals(5, result.getTotalEpochs());
// assertEquals(EarlyStoppingResult.TerminationReason.EpochTerminationCondition,result.getTerminationReason());
// Map<Integer,Double> scoreVsIter = result.getScoreVsEpoch();
// assertEquals(5,scoreVsIter.size());
// String expDetails = esConf.getEpochTerminationConditions().get(0).toString();
// assertEquals(expDetails, result.getTerminationDetails());
//
// MultiLayerNetwork out = result.getBestModel();
// assertNotNull(out);
//
// //Check that best score actually matches (returned model vs. manually calculated score)
// MultiLayerNetwork bestNetwork = result.getBestModel();
// irisIter.reset();
// double score = bestNetwork.score(irisIter.next());
// assertEquals(result.getBestModelScore(), score, 1e-4);
// }
@Test
public void testEarlyStoppingEveryNEpoch() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).weightInit(WeightInit.XAVIER).list().layer(0, new OutputLayer.Builder().nIn(4).nOut(3).lossFunction(LossFunctions.LossFunction.MCXENT).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.setListeners(new ScoreIterationListener(1));
DataSetIterator irisIter = new IrisDataSetIterator(50, 600);
EarlyStoppingModelSaver<MultiLayerNetwork> saver = new InMemoryModelSaver<>();
EarlyStoppingConfiguration<MultiLayerNetwork> esConf = new EarlyStoppingConfiguration.Builder<MultiLayerNetwork>().epochTerminationConditions(new MaxEpochsTerminationCondition(5)).scoreCalculator(new DataSetLossCalculator(irisIter, true)).evaluateEveryNEpochs(2).modelSaver(saver).build();
IEarlyStoppingTrainer<MultiLayerNetwork> trainer = new EarlyStoppingParallelTrainer<>(esConf, net, irisIter, null, 2, 6, 1);
EarlyStoppingResult<MultiLayerNetwork> result = trainer.fit();
System.out.println(result);
assertEquals(5, result.getTotalEpochs());
assertEquals(EarlyStoppingResult.TerminationReason.EpochTerminationCondition, result.getTerminationReason());
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class ParallelWrapperMainTest method runParallelWrapperMain.
@Test
public void runParallelWrapperMain() throws Exception {
int nChannels = 1;
int outputNum = 10;
// for GPU you usually want to have higher batchSize
int batchSize = 128;
int nEpochs = 10;
int iterations = 1;
int seed = 123;
int uiPort = new Random().nextInt(1000) + 9000;
System.setProperty("org.deeplearning4j.ui.port", String.valueOf(uiPort));
log.info("Load data....");
DataSetIterator mnistTrain = new MnistDataSetIterator(batchSize, true, 12345);
DataSetIterator mnistTest = new MnistDataSetIterator(batchSize, false, 12345);
log.info("Build model....");
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations).regularization(true).l2(0.0005).learningRate(//.biasLearningRate(0.02)
0.01).weightInit(WeightInit.XAVIER).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS).momentum(0.9).list().layer(0, new ConvolutionLayer.Builder(5, 5).nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()).layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(2, 2).build()).layer(2, new ConvolutionLayer.Builder(5, 5).stride(1, 1).nOut(50).activation(Activation.IDENTITY).build()).layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(2, 2).build()).layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()).layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(outputNum).activation(Activation.SOFTMAX).build()).backprop(true).pretrain(false);
// The builder needs the dimensions of the image along with the number of channels. these are 28x28 images in one channel
new ConvolutionLayerSetup(builder, 28, 28, 1);
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork model = new MultiLayerNetwork(conf);
model.init();
File tempModel = new File("tmpmodel.zip");
tempModel.deleteOnExit();
ModelSerializer.writeModel(model, tempModel, false);
File tmp = new File("tmpmodel.bin");
tmp.deleteOnExit();
ParallelWrapperMain parallelWrapperMain = new ParallelWrapperMain();
parallelWrapperMain.runMain(new String[] { "--modelPath", tempModel.getAbsolutePath(), "--dataSetIteratorFactoryClazz", MnistDataSetIteratorProviderFactory.class.getName(), "--modelOutputPath", tmp.getAbsolutePath(), "--uiUrl", "localhost:" + uiPort });
Thread.sleep(30000);
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class TransferLearningHelper method initHelperMLN.
private void initHelperMLN() {
if (applyFrozen) {
org.deeplearning4j.nn.api.Layer[] layers = origMLN.getLayers();
for (int i = frozenTill; i >= 0; i--) {
//unchecked?
layers[i] = new FrozenLayer(layers[i]);
}
origMLN.setLayers(layers);
}
for (int i = 0; i < origMLN.getnLayers(); i++) {
if (origMLN.getLayer(i) instanceof FrozenLayer) {
frozenInputLayer = i;
}
}
List<NeuralNetConfiguration> allConfs = new ArrayList<>();
for (int i = frozenInputLayer + 1; i < origMLN.getnLayers(); i++) {
allConfs.add(origMLN.getLayer(i).conf());
}
MultiLayerConfiguration c = origMLN.getLayerWiseConfigurations();
unFrozenSubsetMLN = new MultiLayerNetwork(new MultiLayerConfiguration.Builder().backprop(c.isBackprop()).inputPreProcessors(c.getInputPreProcessors()).pretrain(c.isPretrain()).backpropType(c.getBackpropType()).tBPTTForwardLength(c.getTbpttFwdLength()).tBPTTBackwardLength(c.getTbpttBackLength()).confs(allConfs).build());
unFrozenSubsetMLN.init();
//copy over params
for (int i = frozenInputLayer + 1; i < origMLN.getnLayers(); i++) {
unFrozenSubsetMLN.getLayer(i - frozenInputLayer - 1).setParams(origMLN.getLayer(i).params());
}
//unFrozenSubsetMLN.setListeners(origMLN.getListeners());
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class ParameterServerParallelWrapperTest method testWrapper.
@Test
public void testWrapper() throws Exception {
int nChannels = 1;
int outputNum = 10;
// for GPU you usually want to have higher batchSize
int batchSize = 128;
int nEpochs = 10;
int iterations = 1;
int seed = 123;
log.info("Load data....");
DataSetIterator mnistTrain = new MnistDataSetIterator(batchSize, 1000);
DataSetIterator mnistTest = new MnistDataSetIterator(batchSize, false, 12345);
log.info("Build model....");
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations).regularization(true).l2(0.0005).learningRate(//.biasLearningRate(0.02)
0.01).weightInit(WeightInit.XAVIER).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS).momentum(0.9).list().layer(0, new ConvolutionLayer.Builder(5, 5).nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()).layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(2, 2).build()).layer(2, new ConvolutionLayer.Builder(5, 5).stride(1, 1).nOut(50).activation(Activation.IDENTITY).build()).layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(2, 2).build()).layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()).layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(outputNum).activation(Activation.SOFTMAX).build()).setInputType(InputType.convolutionalFlat(28, 28, 1)).backprop(true).pretrain(false);
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork model = new MultiLayerNetwork(conf);
model.init();
ParameterServerParallelWrapper parameterServerParallelWrapper = ParameterServerParallelWrapper.builder().model(model).multiLayerNetwork(model).numEpochs(10).numWorkers(Runtime.getRuntime().availableProcessors()).statusServerPort(33000).preFetchSize(3).build();
parameterServerParallelWrapper.fit(mnistTrain);
parameterServerParallelWrapper.close();
Thread.sleep(30000);
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class GradientCheckTestsMasking method testBidirectionalLSTMMasking.
@Test
public void testBidirectionalLSTMMasking() {
//Basic test of GravesLSTM layer
Nd4j.getRandom().setSeed(12345L);
int timeSeriesLength = 5;
int nIn = 5;
int layerSize = 4;
int nOut = 3;
int miniBatchSize = 3;
INDArray[] masks = new INDArray[] { null, Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 1 } }), Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 0 }, { 1, 1, 1, 0, 0 } }), Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 0, 1, 1, 1, 1 }, { 0, 0, 1, 1, 1 } }) };
int testNum = 0;
for (INDArray mask : masks) {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0)).seed(12345L).list().layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new GravesBidirectionalLSTM.Builder().nIn(layerSize).nOut(layerSize).activation(Activation.TANH).build()).layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength; k++) {
input.putScalar(new int[] { i, j, k }, r.nextDouble() - 0.5);
}
}
}
INDArray labels = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
labels.putScalar(i, r.nextInt(nOut), j, 1.0);
}
}
mln.setLayerMaskArrays(mask, mask);
if (PRINT_RESULTS) {
System.out.println("testBidirectionalLSTMMasking() - testNum = " + testNum++);
for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
}
}
Aggregations