Search in sources :

Example 6 with Distribution

use of org.nd4j.linalg.api.rng.distribution.Distribution in project deeplearning4j by deeplearning4j.

the class RBM method sampleHiddenGivenVisible.

/**
     * Binomial sampling of the hidden values given visible
     *
     * @param v the visible values
     * @return a binomial distribution containing the expected values and the samples
     */
@Override
public Pair<INDArray, INDArray> sampleHiddenGivenVisible(INDArray v) {
    INDArray hProb = propUp(v);
    INDArray hSample;
    Distribution dist;
    switch(layerConf().getHiddenUnit()) {
        case IDENTITY:
            {
                hSample = hProb;
                break;
            }
        case BINARY:
            {
                dist = Nd4j.getDistributions().createBinomial(1, hProb);
                hSample = dist.sample(hProb.shape());
                break;
            }
        case GAUSSIAN:
            {
                dist = Nd4j.getDistributions().createNormal(hProb, 1);
                hSample = dist.sample(hProb.shape());
                break;
            }
        case RECTIFIED:
            {
                INDArray sigH1Mean = sigmoid(hProb);
                /*
                 * Rectified linear part
                 */
                INDArray sqrtSigH1Mean = sqrt(sigH1Mean);
                INDArray sample = Nd4j.getDistributions().createNormal(hProb, 1).sample(hProb.shape());
                sample.muli(sqrtSigH1Mean);
                hSample = hProb.add(sample);
                hSample = max(hSample, 0.0);
                break;
            }
        case SOFTMAX:
            {
                hSample = Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform("softmax", hProb));
                break;
            }
        default:
            throw new IllegalStateException("Hidden unit type must either be Binary, Gaussian, SoftMax or Rectified");
    }
    return new Pair<>(hProb, hSample);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) Distribution(org.nd4j.linalg.api.rng.distribution.Distribution) Pair(org.deeplearning4j.berkeley.Pair)

Example 7 with Distribution

use of org.nd4j.linalg.api.rng.distribution.Distribution in project deeplearning4j by deeplearning4j.

the class ConvolutionParamInitializer method createWeightMatrix.

protected INDArray createWeightMatrix(NeuralNetConfiguration conf, INDArray weightView, boolean initializeParams) {
    /*
         Create a 4d weight matrix of:
           (number of kernels, num input channels, kernel height, kernel width)
         Note c order is used specifically for the CNN weights, as opposed to f order elsewhere
         Inputs to the convolution layer are:
         (batch size, num input feature maps, image height, image width)
         */
    org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf = (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer();
    if (initializeParams) {
        Distribution dist = Distributions.createDistribution(conf.getLayer().getDist());
        int[] kernel = layerConf.getKernelSize();
        int[] stride = layerConf.getStride();
        int inputDepth = layerConf.getNIn();
        int outputDepth = layerConf.getNOut();
        double fanIn = inputDepth * kernel[0] * kernel[1];
        double fanOut = outputDepth * kernel[0] * kernel[1] / ((double) stride[0] * stride[1]);
        int[] weightsShape = new int[] { outputDepth, inputDepth, kernel[0], kernel[1] };
        return WeightInitUtil.initWeights(fanIn, fanOut, weightsShape, layerConf.getWeightInit(), dist, 'c', weightView);
    } else {
        int[] kernel = layerConf.getKernelSize();
        return WeightInitUtil.reshapeWeights(new int[] { layerConf.getNOut(), layerConf.getNIn(), kernel[0], kernel[1] }, weightView, 'c');
    }
}
Also used : Distribution(org.nd4j.linalg.api.rng.distribution.Distribution)

Example 8 with Distribution

use of org.nd4j.linalg.api.rng.distribution.Distribution in project deeplearning4j by deeplearning4j.

the class GravesBidirectionalLSTMParamInitializer method init.

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM layerConf = (org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM) conf.getLayer();
    double forgetGateInit = layerConf.getForgetGateBiasInit();
    Distribution dist = Distributions.createDistribution(layerConf.getDist());
    //i.e., n neurons in this layer
    int nL = layerConf.getNOut();
    //i.e., n neurons in previous layer
    int nLast = layerConf.getNIn();
    conf.addVariable(INPUT_WEIGHT_KEY_FORWARDS);
    conf.addVariable(RECURRENT_WEIGHT_KEY_FORWARDS);
    conf.addVariable(BIAS_KEY_FORWARDS);
    conf.addVariable(INPUT_WEIGHT_KEY_BACKWARDS);
    conf.addVariable(RECURRENT_WEIGHT_KEY_BACKWARDS);
    conf.addVariable(BIAS_KEY_BACKWARDS);
    int nParamsInput = nLast * (4 * nL);
    int nParamsRecurrent = nL * (4 * nL + 3);
    int nBias = 4 * nL;
    int rwFOffset = nParamsInput;
    int bFOffset = rwFOffset + nParamsRecurrent;
    int iwROffset = bFOffset + nBias;
    int rwROffset = iwROffset + nParamsInput;
    int bROffset = rwROffset + nParamsRecurrent;
    INDArray iwF = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, rwFOffset));
    INDArray rwF = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(rwFOffset, bFOffset));
    INDArray bF = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(bFOffset, iwROffset));
    INDArray iwR = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(iwROffset, rwROffset));
    INDArray rwR = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(rwROffset, bROffset));
    INDArray bR = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(bROffset, bROffset + nBias));
    if (initializeParams) {
        bF.put(new INDArrayIndex[] { NDArrayIndex.point(0), NDArrayIndex.interval(nL, 2 * nL) }, //Order: input, forget, output, input modulation, i.e., IFOG
        Nd4j.ones(1, nL).muli(forgetGateInit));
        bR.put(new INDArrayIndex[] { NDArrayIndex.point(0), NDArrayIndex.interval(nL, 2 * nL) }, Nd4j.ones(1, nL).muli(forgetGateInit));
    }
    if (initializeParams) {
        //As per standard LSTM
        int fanIn = nL;
        int fanOut = nLast + nL;
        int[] inputWShape = new int[] { nLast, 4 * nL };
        int[] recurrentWShape = new int[] { nL, 4 * nL + 3 };
        params.put(INPUT_WEIGHT_KEY_FORWARDS, WeightInitUtil.initWeights(fanIn, fanOut, inputWShape, layerConf.getWeightInit(), dist, iwF));
        params.put(RECURRENT_WEIGHT_KEY_FORWARDS, WeightInitUtil.initWeights(fanIn, fanOut, recurrentWShape, layerConf.getWeightInit(), dist, rwF));
        params.put(BIAS_KEY_FORWARDS, bF);
        params.put(INPUT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.initWeights(fanIn, fanOut, inputWShape, layerConf.getWeightInit(), dist, iwR));
        params.put(RECURRENT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.initWeights(fanIn, fanOut, recurrentWShape, layerConf.getWeightInit(), dist, rwR));
        params.put(BIAS_KEY_BACKWARDS, bR);
    } else {
        params.put(INPUT_WEIGHT_KEY_FORWARDS, WeightInitUtil.reshapeWeights(new int[] { nLast, 4 * nL }, iwF));
        params.put(RECURRENT_WEIGHT_KEY_FORWARDS, WeightInitUtil.reshapeWeights(new int[] { nL, 4 * nL + 3 }, rwF));
        params.put(BIAS_KEY_FORWARDS, bF);
        params.put(INPUT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.reshapeWeights(new int[] { nLast, 4 * nL }, iwR));
        params.put(RECURRENT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.reshapeWeights(new int[] { nL, 4 * nL + 3 }, rwR));
        params.put(BIAS_KEY_BACKWARDS, bR);
    }
    return params;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) Distribution(org.nd4j.linalg.api.rng.distribution.Distribution)

Example 9 with Distribution

use of org.nd4j.linalg.api.rng.distribution.Distribution in project deeplearning4j by deeplearning4j.

the class VariationalAutoencoderParamInitializer method init.

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    if (paramsView.length() != numParams(conf)) {
        throw new IllegalArgumentException("Incorrect paramsView length: Expected length " + numParams(conf) + ", got length " + paramsView.length());
    }
    Map<String, INDArray> ret = new LinkedHashMap<>();
    VariationalAutoencoder layer = (VariationalAutoencoder) conf.getLayer();
    int nIn = layer.getNIn();
    int nOut = layer.getNOut();
    int[] encoderLayerSizes = layer.getEncoderLayerSizes();
    int[] decoderLayerSizes = layer.getDecoderLayerSizes();
    WeightInit weightInit = layer.getWeightInit();
    Distribution dist = Distributions.createDistribution(layer.getDist());
    int soFar = 0;
    for (int i = 0; i < encoderLayerSizes.length; i++) {
        int encoderLayerNIn;
        if (i == 0) {
            encoderLayerNIn = nIn;
        } else {
            encoderLayerNIn = encoderLayerSizes[i - 1];
        }
        int weightParamCount = encoderLayerNIn * encoderLayerSizes[i];
        INDArray weightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + weightParamCount));
        soFar += weightParamCount;
        INDArray biasView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + encoderLayerSizes[i]));
        soFar += encoderLayerSizes[i];
        INDArray layerWeights = createWeightMatrix(encoderLayerNIn, encoderLayerSizes[i], weightInit, dist, weightView, initializeParams);
        //TODO don't hardcode 0
        INDArray layerBiases = createBias(encoderLayerSizes[i], 0.0, biasView, initializeParams);
        String sW = "e" + i + WEIGHT_KEY_SUFFIX;
        String sB = "e" + i + BIAS_KEY_SUFFIX;
        ret.put(sW, layerWeights);
        ret.put(sB, layerBiases);
        conf.addVariable(sW);
        conf.addVariable(sB);
    }
    //Last encoder layer -> p(z|x)
    int nWeightsPzx = encoderLayerSizes[encoderLayerSizes.length - 1] * nOut;
    INDArray pzxWeightsMean = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + nWeightsPzx));
    soFar += nWeightsPzx;
    INDArray pzxBiasMean = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + nOut));
    soFar += nOut;
    INDArray pzxWeightsMeanReshaped = createWeightMatrix(encoderLayerSizes[encoderLayerSizes.length - 1], nOut, weightInit, dist, pzxWeightsMean, initializeParams);
    //TODO don't hardcode 0
    INDArray pzxBiasMeanReshaped = createBias(nOut, 0.0, pzxBiasMean, initializeParams);
    ret.put(PZX_MEAN_W, pzxWeightsMeanReshaped);
    ret.put(PZX_MEAN_B, pzxBiasMeanReshaped);
    conf.addVariable(PZX_MEAN_W);
    conf.addVariable(PZX_MEAN_B);
    //Pretrain params
    INDArray pzxWeightsLogStdev2 = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + nWeightsPzx));
    soFar += nWeightsPzx;
    INDArray pzxBiasLogStdev2 = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + nOut));
    soFar += nOut;
    INDArray pzxWeightsLogStdev2Reshaped = createWeightMatrix(encoderLayerSizes[encoderLayerSizes.length - 1], nOut, weightInit, dist, pzxWeightsLogStdev2, initializeParams);
    //TODO don't hardcode 0
    INDArray pzxBiasLogStdev2Reshaped = createBias(nOut, 0.0, pzxBiasLogStdev2, initializeParams);
    ret.put(PZX_LOGSTD2_W, pzxWeightsLogStdev2Reshaped);
    ret.put(PZX_LOGSTD2_B, pzxBiasLogStdev2Reshaped);
    conf.addVariable(PZX_LOGSTD2_W);
    conf.addVariable(PZX_LOGSTD2_B);
    for (int i = 0; i < decoderLayerSizes.length; i++) {
        int decoderLayerNIn;
        if (i == 0) {
            decoderLayerNIn = nOut;
        } else {
            decoderLayerNIn = decoderLayerSizes[i - 1];
        }
        int weightParamCount = decoderLayerNIn * decoderLayerSizes[i];
        INDArray weightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + weightParamCount));
        soFar += weightParamCount;
        INDArray biasView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + decoderLayerSizes[i]));
        soFar += decoderLayerSizes[i];
        INDArray layerWeights = createWeightMatrix(decoderLayerNIn, decoderLayerSizes[i], weightInit, dist, weightView, initializeParams);
        //TODO don't hardcode 0
        INDArray layerBiases = createBias(decoderLayerSizes[i], 0.0, biasView, initializeParams);
        String sW = "d" + i + WEIGHT_KEY_SUFFIX;
        String sB = "d" + i + BIAS_KEY_SUFFIX;
        ret.put(sW, layerWeights);
        ret.put(sB, layerBiases);
        conf.addVariable(sW);
        conf.addVariable(sB);
    }
    //Finally, p(x|z):
    int nDistributionParams = layer.getOutputDistribution().distributionInputSize(nIn);
    int pxzWeightCount = decoderLayerSizes[decoderLayerSizes.length - 1] * nDistributionParams;
    INDArray pxzWeightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + pxzWeightCount));
    soFar += pxzWeightCount;
    INDArray pxzBiasView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + nDistributionParams));
    INDArray pxzWeightsReshaped = createWeightMatrix(decoderLayerSizes[decoderLayerSizes.length - 1], nDistributionParams, weightInit, dist, pxzWeightView, initializeParams);
    //TODO don't hardcode 0
    INDArray pxzBiasReshaped = createBias(nDistributionParams, 0.0, pxzBiasView, initializeParams);
    ret.put(PXZ_W, pxzWeightsReshaped);
    ret.put(PXZ_B, pxzBiasReshaped);
    conf.addVariable(PXZ_W);
    conf.addVariable(PXZ_B);
    return ret;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) WeightInit(org.deeplearning4j.nn.weights.WeightInit) Distribution(org.nd4j.linalg.api.rng.distribution.Distribution) VariationalAutoencoder(org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) LinkedHashMap(java.util.LinkedHashMap)

Aggregations

Distribution (org.nd4j.linalg.api.rng.distribution.Distribution)9 INDArray (org.nd4j.linalg.api.ndarray.INDArray)8 Pair (org.deeplearning4j.berkeley.Pair)2 LinkedHashMap (java.util.LinkedHashMap)1 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)1 VariationalAutoencoder (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder)1 Gradient (org.deeplearning4j.nn.gradient.Gradient)1 WeightInit (org.deeplearning4j.nn.weights.WeightInit)1 Test (org.junit.Test)1 ActivationSigmoid (org.nd4j.linalg.activations.impl.ActivationSigmoid)1