use of org.nd4j.linalg.api.rng.distribution.Distribution in project deeplearning4j by deeplearning4j.
the class GravesLSTMParamInitializer method init.
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
org.deeplearning4j.nn.conf.layers.GravesLSTM layerConf = (org.deeplearning4j.nn.conf.layers.GravesLSTM) conf.getLayer();
double forgetGateInit = layerConf.getForgetGateBiasInit();
Distribution dist = Distributions.createDistribution(layerConf.getDist());
//i.e., n neurons in this layer
int nL = layerConf.getNOut();
//i.e., n neurons in previous layer
int nLast = layerConf.getNIn();
conf.addVariable(INPUT_WEIGHT_KEY);
conf.addVariable(RECURRENT_WEIGHT_KEY);
conf.addVariable(BIAS_KEY);
int length = numParams(conf);
if (paramsView.length() != length)
throw new IllegalStateException("Expected params view of length " + length + ", got length " + paramsView.length());
int nParamsIn = nLast * (4 * nL);
int nParamsRecurrent = nL * (4 * nL + 3);
int nBias = 4 * nL;
INDArray inputWeightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nParamsIn));
INDArray recurrentWeightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(nParamsIn, nParamsIn + nParamsRecurrent));
INDArray biasView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(nParamsIn + nParamsRecurrent, nParamsIn + nParamsRecurrent + nBias));
if (initializeParams) {
int fanIn = nL;
int fanOut = nLast + nL;
int[] inputWShape = new int[] { nLast, 4 * nL };
int[] recurrentWShape = new int[] { nL, 4 * nL + 3 };
params.put(INPUT_WEIGHT_KEY, WeightInitUtil.initWeights(fanIn, fanOut, inputWShape, layerConf.getWeightInit(), dist, inputWeightView));
params.put(RECURRENT_WEIGHT_KEY, WeightInitUtil.initWeights(fanIn, fanOut, recurrentWShape, layerConf.getWeightInit(), dist, recurrentWeightView));
biasView.put(new INDArrayIndex[] { NDArrayIndex.point(0), NDArrayIndex.interval(nL, 2 * nL) }, //Order: input, forget, output, input modulation, i.e., IFOG}
Nd4j.valueArrayOf(1, nL, forgetGateInit));
/*The above line initializes the forget gate biases to specified value.
* See Sutskever PhD thesis, pg19:
* "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
* which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
* not done, it will be harder to learn long range dependencies because the smaller values of the forget
* gates will create a vanishing gradients problem."
* http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
*/
params.put(BIAS_KEY, biasView);
} else {
params.put(INPUT_WEIGHT_KEY, WeightInitUtil.reshapeWeights(new int[] { nLast, 4 * nL }, inputWeightView));
params.put(RECURRENT_WEIGHT_KEY, WeightInitUtil.reshapeWeights(new int[] { nL, 4 * nL + 3 }, recurrentWeightView));
params.put(BIAS_KEY, biasView);
}
return params;
}
use of org.nd4j.linalg.api.rng.distribution.Distribution in project deeplearning4j by deeplearning4j.
the class RBM method propUp.
/**
* Calculates the activation of the visible :
* sigmoid(v * W + hbias)
* @param v the visible layer
* @return the approximated activations of the visible layer
*/
public INDArray propUp(INDArray v, boolean training) {
INDArray preSig = preOutput(v, training);
switch(layerConf().getHiddenUnit()) {
case IDENTITY:
return preSig;
case BINARY:
return sigmoid(preSig);
case GAUSSIAN:
Distribution dist = Nd4j.getDistributions().createNormal(preSig, 1);
preSig = dist.sample(preSig.shape());
return preSig;
case RECTIFIED:
preSig = max(preSig, 0.0);
return preSig;
case SOFTMAX:
return Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform("softmax", preSig));
default:
throw new IllegalStateException("Hidden unit type should either be binary, gaussian, or rectified linear");
}
}
use of org.nd4j.linalg.api.rng.distribution.Distribution in project deeplearning4j by deeplearning4j.
the class RBM method propDown.
/**
* Calculates the activation of the hidden:
* activation(h * W + vbias)
* @param h the hidden layer
* @return the approximated output of the hidden layer
*/
public INDArray propDown(INDArray h) {
INDArray W = getParam(PretrainParamInitializer.WEIGHT_KEY).transpose();
INDArray vBias = getParam(PretrainParamInitializer.VISIBLE_BIAS_KEY);
INDArray vMean = h.mmul(W).addiRowVector(vBias);
switch(layerConf().getVisibleUnit()) {
case IDENTITY:
return vMean;
case BINARY:
return sigmoid(vMean);
case GAUSSIAN:
Distribution dist = Nd4j.getDistributions().createNormal(vMean, 1);
vMean = dist.sample(vMean.shape());
return vMean;
case LINEAR:
return vMean;
case SOFTMAX:
return Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform("softmax", vMean));
default:
throw new IllegalStateException("Visible unit type should either be binary or gaussian");
}
}
use of org.nd4j.linalg.api.rng.distribution.Distribution in project deeplearning4j by deeplearning4j.
the class RBM method sampleVisibleGivenHidden.
/**
* Guess the visible values given the hidden
*
* @param h the hidden units
* @return a visible mean and sample relative to the hidden states
* passed in
*/
@Override
public Pair<INDArray, INDArray> sampleVisibleGivenHidden(INDArray h) {
INDArray vProb = propDown(h);
INDArray vSample;
switch(layerConf().getVisibleUnit()) {
case IDENTITY:
{
vSample = vProb;
break;
}
case BINARY:
{
Distribution dist = Nd4j.getDistributions().createBinomial(1, vProb);
vSample = dist.sample(vProb.shape());
break;
}
case GAUSSIAN:
case LINEAR:
{
Distribution dist = Nd4j.getDistributions().createNormal(vProb, 1);
vSample = dist.sample(vProb.shape());
break;
}
case SOFTMAX:
{
vSample = Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform("softmax", vProb));
break;
}
default:
{
throw new IllegalStateException("Visible type must be one of Binary, Gaussian, SoftMax or Linear");
}
}
return new Pair<>(vProb, vSample);
}
use of org.nd4j.linalg.api.rng.distribution.Distribution in project nd4j by deeplearning4j.
the class RandomTests method testAndersonDarling.
/**
* Uses a test of Gaussianity for testing the values out of GaussianDistribution
* See https://en.wikipedia.org/wiki/Anderson%E2%80%93Darling_test
*
* @throws Exception
*/
@Test
public void testAndersonDarling() throws Exception {
Random random1 = Nd4j.getRandomFactory().getNewRandomInstance(119);
INDArray z1 = Nd4j.create(1000);
GaussianDistribution op1 = new GaussianDistribution(z1, 0.0, 1.0);
Nd4j.getExecutioner().exec(op1, random1);
int n = z1.length();
// using this just for the cdf
Distribution nd = new NormalDistribution(random1, 0.0, 1.0);
Nd4j.sort(z1, true);
System.out.println("Data for Anderson-Darling: " + z1);
for (int i = 0; i < n; i++) {
Double res = nd.cumulativeProbability(z1.getDouble(i));
assertTrue(res >= 0.0);
assertTrue(res <= 1.0);
// avoid overflow when taking log later.
if (res == 0)
res = 0.0000001;
if (res == 1)
res = 0.9999999;
z1.putScalar(i, res);
}
double A = 0.0;
for (int i = 0; i < n; i++) {
A -= (2 * i + 1) * (Math.log(z1.getDouble(i)) + Math.log(1 - z1.getDouble(n - i - 1)));
}
A = A / n - n;
A *= (1 + 4.0 / n - 25.0 / (n * n));
assertTrue("Critical (max) value for 1000 points and confidence α = 0.0001 is 1.8692, received: " + A, A < 1.8692);
}
Aggregations