use of org.nd4j.linalg.api.rng.distribution.Distribution in project deeplearning4j by deeplearning4j.
the class RBM method sampleHiddenGivenVisible.
/**
* Binomial sampling of the hidden values given visible
*
* @param v the visible values
* @return a binomial distribution containing the expected values and the samples
*/
@Override
public Pair<INDArray, INDArray> sampleHiddenGivenVisible(INDArray v) {
INDArray hProb = propUp(v);
INDArray hSample;
Distribution dist;
switch(layerConf().getHiddenUnit()) {
case IDENTITY:
{
hSample = hProb;
break;
}
case BINARY:
{
dist = Nd4j.getDistributions().createBinomial(1, hProb);
hSample = dist.sample(hProb.shape());
break;
}
case GAUSSIAN:
{
dist = Nd4j.getDistributions().createNormal(hProb, 1);
hSample = dist.sample(hProb.shape());
break;
}
case RECTIFIED:
{
INDArray sigH1Mean = sigmoid(hProb);
/*
* Rectified linear part
*/
INDArray sqrtSigH1Mean = sqrt(sigH1Mean);
INDArray sample = Nd4j.getDistributions().createNormal(hProb, 1).sample(hProb.shape());
sample.muli(sqrtSigH1Mean);
hSample = hProb.add(sample);
hSample = max(hSample, 0.0);
break;
}
case SOFTMAX:
{
hSample = Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform("softmax", hProb));
break;
}
default:
throw new IllegalStateException("Hidden unit type must either be Binary, Gaussian, SoftMax or Rectified");
}
return new Pair<>(hProb, hSample);
}
use of org.nd4j.linalg.api.rng.distribution.Distribution in project deeplearning4j by deeplearning4j.
the class ConvolutionParamInitializer method createWeightMatrix.
protected INDArray createWeightMatrix(NeuralNetConfiguration conf, INDArray weightView, boolean initializeParams) {
/*
Create a 4d weight matrix of:
(number of kernels, num input channels, kernel height, kernel width)
Note c order is used specifically for the CNN weights, as opposed to f order elsewhere
Inputs to the convolution layer are:
(batch size, num input feature maps, image height, image width)
*/
org.deeplearning4j.nn.conf.layers.ConvolutionLayer layerConf = (org.deeplearning4j.nn.conf.layers.ConvolutionLayer) conf.getLayer();
if (initializeParams) {
Distribution dist = Distributions.createDistribution(conf.getLayer().getDist());
int[] kernel = layerConf.getKernelSize();
int[] stride = layerConf.getStride();
int inputDepth = layerConf.getNIn();
int outputDepth = layerConf.getNOut();
double fanIn = inputDepth * kernel[0] * kernel[1];
double fanOut = outputDepth * kernel[0] * kernel[1] / ((double) stride[0] * stride[1]);
int[] weightsShape = new int[] { outputDepth, inputDepth, kernel[0], kernel[1] };
return WeightInitUtil.initWeights(fanIn, fanOut, weightsShape, layerConf.getWeightInit(), dist, 'c', weightView);
} else {
int[] kernel = layerConf.getKernelSize();
return WeightInitUtil.reshapeWeights(new int[] { layerConf.getNOut(), layerConf.getNIn(), kernel[0], kernel[1] }, weightView, 'c');
}
}
use of org.nd4j.linalg.api.rng.distribution.Distribution in project deeplearning4j by deeplearning4j.
the class GravesBidirectionalLSTMParamInitializer method init.
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM layerConf = (org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM) conf.getLayer();
double forgetGateInit = layerConf.getForgetGateBiasInit();
Distribution dist = Distributions.createDistribution(layerConf.getDist());
//i.e., n neurons in this layer
int nL = layerConf.getNOut();
//i.e., n neurons in previous layer
int nLast = layerConf.getNIn();
conf.addVariable(INPUT_WEIGHT_KEY_FORWARDS);
conf.addVariable(RECURRENT_WEIGHT_KEY_FORWARDS);
conf.addVariable(BIAS_KEY_FORWARDS);
conf.addVariable(INPUT_WEIGHT_KEY_BACKWARDS);
conf.addVariable(RECURRENT_WEIGHT_KEY_BACKWARDS);
conf.addVariable(BIAS_KEY_BACKWARDS);
int nParamsInput = nLast * (4 * nL);
int nParamsRecurrent = nL * (4 * nL + 3);
int nBias = 4 * nL;
int rwFOffset = nParamsInput;
int bFOffset = rwFOffset + nParamsRecurrent;
int iwROffset = bFOffset + nBias;
int rwROffset = iwROffset + nParamsInput;
int bROffset = rwROffset + nParamsRecurrent;
INDArray iwF = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(0, rwFOffset));
INDArray rwF = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(rwFOffset, bFOffset));
INDArray bF = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(bFOffset, iwROffset));
INDArray iwR = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(iwROffset, rwROffset));
INDArray rwR = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(rwROffset, bROffset));
INDArray bR = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(bROffset, bROffset + nBias));
if (initializeParams) {
bF.put(new INDArrayIndex[] { NDArrayIndex.point(0), NDArrayIndex.interval(nL, 2 * nL) }, //Order: input, forget, output, input modulation, i.e., IFOG
Nd4j.ones(1, nL).muli(forgetGateInit));
bR.put(new INDArrayIndex[] { NDArrayIndex.point(0), NDArrayIndex.interval(nL, 2 * nL) }, Nd4j.ones(1, nL).muli(forgetGateInit));
}
if (initializeParams) {
//As per standard LSTM
int fanIn = nL;
int fanOut = nLast + nL;
int[] inputWShape = new int[] { nLast, 4 * nL };
int[] recurrentWShape = new int[] { nL, 4 * nL + 3 };
params.put(INPUT_WEIGHT_KEY_FORWARDS, WeightInitUtil.initWeights(fanIn, fanOut, inputWShape, layerConf.getWeightInit(), dist, iwF));
params.put(RECURRENT_WEIGHT_KEY_FORWARDS, WeightInitUtil.initWeights(fanIn, fanOut, recurrentWShape, layerConf.getWeightInit(), dist, rwF));
params.put(BIAS_KEY_FORWARDS, bF);
params.put(INPUT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.initWeights(fanIn, fanOut, inputWShape, layerConf.getWeightInit(), dist, iwR));
params.put(RECURRENT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.initWeights(fanIn, fanOut, recurrentWShape, layerConf.getWeightInit(), dist, rwR));
params.put(BIAS_KEY_BACKWARDS, bR);
} else {
params.put(INPUT_WEIGHT_KEY_FORWARDS, WeightInitUtil.reshapeWeights(new int[] { nLast, 4 * nL }, iwF));
params.put(RECURRENT_WEIGHT_KEY_FORWARDS, WeightInitUtil.reshapeWeights(new int[] { nL, 4 * nL + 3 }, rwF));
params.put(BIAS_KEY_FORWARDS, bF);
params.put(INPUT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.reshapeWeights(new int[] { nLast, 4 * nL }, iwR));
params.put(RECURRENT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.reshapeWeights(new int[] { nL, 4 * nL + 3 }, rwR));
params.put(BIAS_KEY_BACKWARDS, bR);
}
return params;
}
use of org.nd4j.linalg.api.rng.distribution.Distribution in project deeplearning4j by deeplearning4j.
the class VariationalAutoencoderParamInitializer method init.
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
if (paramsView.length() != numParams(conf)) {
throw new IllegalArgumentException("Incorrect paramsView length: Expected length " + numParams(conf) + ", got length " + paramsView.length());
}
Map<String, INDArray> ret = new LinkedHashMap<>();
VariationalAutoencoder layer = (VariationalAutoencoder) conf.getLayer();
int nIn = layer.getNIn();
int nOut = layer.getNOut();
int[] encoderLayerSizes = layer.getEncoderLayerSizes();
int[] decoderLayerSizes = layer.getDecoderLayerSizes();
WeightInit weightInit = layer.getWeightInit();
Distribution dist = Distributions.createDistribution(layer.getDist());
int soFar = 0;
for (int i = 0; i < encoderLayerSizes.length; i++) {
int encoderLayerNIn;
if (i == 0) {
encoderLayerNIn = nIn;
} else {
encoderLayerNIn = encoderLayerSizes[i - 1];
}
int weightParamCount = encoderLayerNIn * encoderLayerSizes[i];
INDArray weightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + weightParamCount));
soFar += weightParamCount;
INDArray biasView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + encoderLayerSizes[i]));
soFar += encoderLayerSizes[i];
INDArray layerWeights = createWeightMatrix(encoderLayerNIn, encoderLayerSizes[i], weightInit, dist, weightView, initializeParams);
//TODO don't hardcode 0
INDArray layerBiases = createBias(encoderLayerSizes[i], 0.0, biasView, initializeParams);
String sW = "e" + i + WEIGHT_KEY_SUFFIX;
String sB = "e" + i + BIAS_KEY_SUFFIX;
ret.put(sW, layerWeights);
ret.put(sB, layerBiases);
conf.addVariable(sW);
conf.addVariable(sB);
}
//Last encoder layer -> p(z|x)
int nWeightsPzx = encoderLayerSizes[encoderLayerSizes.length - 1] * nOut;
INDArray pzxWeightsMean = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + nWeightsPzx));
soFar += nWeightsPzx;
INDArray pzxBiasMean = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + nOut));
soFar += nOut;
INDArray pzxWeightsMeanReshaped = createWeightMatrix(encoderLayerSizes[encoderLayerSizes.length - 1], nOut, weightInit, dist, pzxWeightsMean, initializeParams);
//TODO don't hardcode 0
INDArray pzxBiasMeanReshaped = createBias(nOut, 0.0, pzxBiasMean, initializeParams);
ret.put(PZX_MEAN_W, pzxWeightsMeanReshaped);
ret.put(PZX_MEAN_B, pzxBiasMeanReshaped);
conf.addVariable(PZX_MEAN_W);
conf.addVariable(PZX_MEAN_B);
//Pretrain params
INDArray pzxWeightsLogStdev2 = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + nWeightsPzx));
soFar += nWeightsPzx;
INDArray pzxBiasLogStdev2 = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + nOut));
soFar += nOut;
INDArray pzxWeightsLogStdev2Reshaped = createWeightMatrix(encoderLayerSizes[encoderLayerSizes.length - 1], nOut, weightInit, dist, pzxWeightsLogStdev2, initializeParams);
//TODO don't hardcode 0
INDArray pzxBiasLogStdev2Reshaped = createBias(nOut, 0.0, pzxBiasLogStdev2, initializeParams);
ret.put(PZX_LOGSTD2_W, pzxWeightsLogStdev2Reshaped);
ret.put(PZX_LOGSTD2_B, pzxBiasLogStdev2Reshaped);
conf.addVariable(PZX_LOGSTD2_W);
conf.addVariable(PZX_LOGSTD2_B);
for (int i = 0; i < decoderLayerSizes.length; i++) {
int decoderLayerNIn;
if (i == 0) {
decoderLayerNIn = nOut;
} else {
decoderLayerNIn = decoderLayerSizes[i - 1];
}
int weightParamCount = decoderLayerNIn * decoderLayerSizes[i];
INDArray weightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + weightParamCount));
soFar += weightParamCount;
INDArray biasView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + decoderLayerSizes[i]));
soFar += decoderLayerSizes[i];
INDArray layerWeights = createWeightMatrix(decoderLayerNIn, decoderLayerSizes[i], weightInit, dist, weightView, initializeParams);
//TODO don't hardcode 0
INDArray layerBiases = createBias(decoderLayerSizes[i], 0.0, biasView, initializeParams);
String sW = "d" + i + WEIGHT_KEY_SUFFIX;
String sB = "d" + i + BIAS_KEY_SUFFIX;
ret.put(sW, layerWeights);
ret.put(sB, layerBiases);
conf.addVariable(sW);
conf.addVariable(sB);
}
//Finally, p(x|z):
int nDistributionParams = layer.getOutputDistribution().distributionInputSize(nIn);
int pxzWeightCount = decoderLayerSizes[decoderLayerSizes.length - 1] * nDistributionParams;
INDArray pxzWeightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + pxzWeightCount));
soFar += pxzWeightCount;
INDArray pxzBiasView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(soFar, soFar + nDistributionParams));
INDArray pxzWeightsReshaped = createWeightMatrix(decoderLayerSizes[decoderLayerSizes.length - 1], nDistributionParams, weightInit, dist, pxzWeightView, initializeParams);
//TODO don't hardcode 0
INDArray pxzBiasReshaped = createBias(nDistributionParams, 0.0, pxzBiasView, initializeParams);
ret.put(PXZ_W, pxzWeightsReshaped);
ret.put(PXZ_B, pxzBiasReshaped);
conf.addVariable(PXZ_W);
conf.addVariable(PXZ_B);
return ret;
}
Aggregations