Search in sources :

Example 1 with Distribution

use of org.nd4j.linalg.api.rng.distribution.Distribution in project deeplearning4j by deeplearning4j.

the class GravesLSTMParamInitializer method init.

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    org.deeplearning4j.nn.conf.layers.GravesLSTM layerConf = (org.deeplearning4j.nn.conf.layers.GravesLSTM) conf.getLayer();
    double forgetGateInit = layerConf.getForgetGateBiasInit();
    Distribution dist = Distributions.createDistribution(layerConf.getDist());
    //i.e., n neurons in this layer
    int nL = layerConf.getNOut();
    //i.e., n neurons in previous layer
    int nLast = layerConf.getNIn();
    conf.addVariable(INPUT_WEIGHT_KEY);
    conf.addVariable(RECURRENT_WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);
    int length = numParams(conf);
    if (paramsView.length() != length)
        throw new IllegalStateException("Expected params view of length " + length + ", got length " + paramsView.length());
    int nParamsIn = nLast * (4 * nL);
    int nParamsRecurrent = nL * (4 * nL + 3);
    int nBias = 4 * nL;
    INDArray inputWeightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nParamsIn));
    INDArray recurrentWeightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(nParamsIn, nParamsIn + nParamsRecurrent));
    INDArray biasView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(nParamsIn + nParamsRecurrent, nParamsIn + nParamsRecurrent + nBias));
    if (initializeParams) {
        int fanIn = nL;
        int fanOut = nLast + nL;
        int[] inputWShape = new int[] { nLast, 4 * nL };
        int[] recurrentWShape = new int[] { nL, 4 * nL + 3 };
        params.put(INPUT_WEIGHT_KEY, WeightInitUtil.initWeights(fanIn, fanOut, inputWShape, layerConf.getWeightInit(), dist, inputWeightView));
        params.put(RECURRENT_WEIGHT_KEY, WeightInitUtil.initWeights(fanIn, fanOut, recurrentWShape, layerConf.getWeightInit(), dist, recurrentWeightView));
        biasView.put(new INDArrayIndex[] { NDArrayIndex.point(0), NDArrayIndex.interval(nL, 2 * nL) }, //Order: input, forget, output, input modulation, i.e., IFOG}
        Nd4j.valueArrayOf(1, nL, forgetGateInit));
        /*The above line initializes the forget gate biases to specified value.
             * See Sutskever PhD thesis, pg19:
             * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
             *  which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
             *  not done, it will be harder to learn long range dependencies because the smaller values of the forget
             *  gates will create a vanishing gradients problem."
             *  http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
             */
        params.put(BIAS_KEY, biasView);
    } else {
        params.put(INPUT_WEIGHT_KEY, WeightInitUtil.reshapeWeights(new int[] { nLast, 4 * nL }, inputWeightView));
        params.put(RECURRENT_WEIGHT_KEY, WeightInitUtil.reshapeWeights(new int[] { nL, 4 * nL + 3 }, recurrentWeightView));
        params.put(BIAS_KEY, biasView);
    }
    return params;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) Distribution(org.nd4j.linalg.api.rng.distribution.Distribution)

Example 2 with Distribution

use of org.nd4j.linalg.api.rng.distribution.Distribution in project deeplearning4j by deeplearning4j.

the class RBM method propUp.

/**
     * Calculates the activation of the visible :
     * sigmoid(v * W + hbias)
     * @param v the visible layer
     * @return the approximated activations of the visible layer
     */
public INDArray propUp(INDArray v, boolean training) {
    INDArray preSig = preOutput(v, training);
    switch(layerConf().getHiddenUnit()) {
        case IDENTITY:
            return preSig;
        case BINARY:
            return sigmoid(preSig);
        case GAUSSIAN:
            Distribution dist = Nd4j.getDistributions().createNormal(preSig, 1);
            preSig = dist.sample(preSig.shape());
            return preSig;
        case RECTIFIED:
            preSig = max(preSig, 0.0);
            return preSig;
        case SOFTMAX:
            return Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform("softmax", preSig));
        default:
            throw new IllegalStateException("Hidden unit type should either be binary, gaussian, or rectified linear");
    }
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) Distribution(org.nd4j.linalg.api.rng.distribution.Distribution)

Example 3 with Distribution

use of org.nd4j.linalg.api.rng.distribution.Distribution in project deeplearning4j by deeplearning4j.

the class RBM method propDown.

/**
     * Calculates the activation of the hidden:
     * activation(h * W + vbias)
     * @param h the hidden layer
     * @return the approximated output of the hidden layer
     */
public INDArray propDown(INDArray h) {
    INDArray W = getParam(PretrainParamInitializer.WEIGHT_KEY).transpose();
    INDArray vBias = getParam(PretrainParamInitializer.VISIBLE_BIAS_KEY);
    INDArray vMean = h.mmul(W).addiRowVector(vBias);
    switch(layerConf().getVisibleUnit()) {
        case IDENTITY:
            return vMean;
        case BINARY:
            return sigmoid(vMean);
        case GAUSSIAN:
            Distribution dist = Nd4j.getDistributions().createNormal(vMean, 1);
            vMean = dist.sample(vMean.shape());
            return vMean;
        case LINEAR:
            return vMean;
        case SOFTMAX:
            return Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform("softmax", vMean));
        default:
            throw new IllegalStateException("Visible unit type should either be binary or gaussian");
    }
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) Distribution(org.nd4j.linalg.api.rng.distribution.Distribution)

Example 4 with Distribution

use of org.nd4j.linalg.api.rng.distribution.Distribution in project deeplearning4j by deeplearning4j.

the class RBM method sampleVisibleGivenHidden.

/**
     * Guess the visible values given the hidden
     *
     * @param h the hidden units
     * @return a visible mean and sample relative to the hidden states
     * passed in
     */
@Override
public Pair<INDArray, INDArray> sampleVisibleGivenHidden(INDArray h) {
    INDArray vProb = propDown(h);
    INDArray vSample;
    switch(layerConf().getVisibleUnit()) {
        case IDENTITY:
            {
                vSample = vProb;
                break;
            }
        case BINARY:
            {
                Distribution dist = Nd4j.getDistributions().createBinomial(1, vProb);
                vSample = dist.sample(vProb.shape());
                break;
            }
        case GAUSSIAN:
        case LINEAR:
            {
                Distribution dist = Nd4j.getDistributions().createNormal(vProb, 1);
                vSample = dist.sample(vProb.shape());
                break;
            }
        case SOFTMAX:
            {
                vSample = Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform("softmax", vProb));
                break;
            }
        default:
            {
                throw new IllegalStateException("Visible type must be one of Binary, Gaussian, SoftMax or Linear");
            }
    }
    return new Pair<>(vProb, vSample);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) Distribution(org.nd4j.linalg.api.rng.distribution.Distribution) Pair(org.deeplearning4j.berkeley.Pair)

Example 5 with Distribution

use of org.nd4j.linalg.api.rng.distribution.Distribution in project nd4j by deeplearning4j.

the class RandomTests method testAndersonDarling.

/**
 * Uses a test of Gaussianity for testing the values out of GaussianDistribution
 * See https://en.wikipedia.org/wiki/Anderson%E2%80%93Darling_test
 *
 * @throws Exception
 */
@Test
public void testAndersonDarling() throws Exception {
    Random random1 = Nd4j.getRandomFactory().getNewRandomInstance(119);
    INDArray z1 = Nd4j.create(1000);
    GaussianDistribution op1 = new GaussianDistribution(z1, 0.0, 1.0);
    Nd4j.getExecutioner().exec(op1, random1);
    int n = z1.length();
    // using this just for the cdf
    Distribution nd = new NormalDistribution(random1, 0.0, 1.0);
    Nd4j.sort(z1, true);
    System.out.println("Data for Anderson-Darling: " + z1);
    for (int i = 0; i < n; i++) {
        Double res = nd.cumulativeProbability(z1.getDouble(i));
        assertTrue(res >= 0.0);
        assertTrue(res <= 1.0);
        // avoid overflow when taking log later.
        if (res == 0)
            res = 0.0000001;
        if (res == 1)
            res = 0.9999999;
        z1.putScalar(i, res);
    }
    double A = 0.0;
    for (int i = 0; i < n; i++) {
        A -= (2 * i + 1) * (Math.log(z1.getDouble(i)) + Math.log(1 - z1.getDouble(n - i - 1)));
    }
    A = A / n - n;
    A *= (1 + 4.0 / n - 25.0 / (n * n));
    assertTrue("Critical (max) value for 1000 points and confidence α = 0.0001 is 1.8692, received: " + A, A < 1.8692);
}
Also used : Random(org.nd4j.linalg.api.rng.Random) DefaultRandom(org.nd4j.linalg.api.rng.DefaultRandom) NativeRandom(org.nd4j.rng.NativeRandom) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.nd4j.linalg.api.rng.distribution.impl.NormalDistribution) Distribution(org.nd4j.linalg.api.rng.distribution.Distribution) OrthogonalDistribution(org.nd4j.linalg.api.rng.distribution.impl.OrthogonalDistribution) NormalDistribution(org.nd4j.linalg.api.rng.distribution.impl.NormalDistribution) BaseNd4jTest(org.nd4j.linalg.BaseNd4jTest) Test(org.junit.Test)

Aggregations

Distribution (org.nd4j.linalg.api.rng.distribution.Distribution)17 INDArray (org.nd4j.linalg.api.ndarray.INDArray)16 Test (org.junit.Test)9 BaseNd4jTest (org.nd4j.linalg.BaseNd4jTest)7 Pair (org.deeplearning4j.berkeley.Pair)2 LinkedHashMap (java.util.LinkedHashMap)1 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)1 VariationalAutoencoder (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder)1 Gradient (org.deeplearning4j.nn.gradient.Gradient)1 WeightInit (org.deeplearning4j.nn.weights.WeightInit)1 ActivationSigmoid (org.nd4j.linalg.activations.impl.ActivationSigmoid)1 MatchCondition (org.nd4j.linalg.api.ops.impl.accum.MatchCondition)1 DefaultRandom (org.nd4j.linalg.api.rng.DefaultRandom)1 Random (org.nd4j.linalg.api.rng.Random)1 NormalDistribution (org.nd4j.linalg.api.rng.distribution.impl.NormalDistribution)1 OrthogonalDistribution (org.nd4j.linalg.api.rng.distribution.impl.OrthogonalDistribution)1 AdaGrad (org.nd4j.linalg.learning.config.AdaGrad)1 NativeRandom (org.nd4j.rng.NativeRandom)1