use of org.deeplearning4j.exception.DL4JInvalidInputException in project deeplearning4j by deeplearning4j.
the class BaseLayer method preOutput.
public INDArray preOutput(boolean training) {
applyDropOutIfNecessary(training);
INDArray b = getParam(DefaultParamInitializer.BIAS_KEY);
INDArray W = getParam(DefaultParamInitializer.WEIGHT_KEY);
//Input validation:
if (input.rank() != 2 || input.columns() != W.rows()) {
if (input.rank() != 2) {
throw new DL4JInvalidInputException("Input that is not a matrix; expected matrix (rank 2), got rank " + input.rank() + " array with shape " + Arrays.toString(input.shape()));
}
throw new DL4JInvalidInputException("Input size (" + input.columns() + " columns; shape = " + Arrays.toString(input.shape()) + ") is invalid: does not match layer input size (layer # inputs = " + W.size(0) + ")");
}
if (conf.isUseDropConnect() && training && conf.getLayer().getDropOut() > 0) {
W = Dropout.applyDropConnect(this, DefaultParamInitializer.WEIGHT_KEY);
}
INDArray ret = input.mmul(W).addiRowVector(b);
if (maskArray != null) {
applyMask(ret);
}
return ret;
}
use of org.deeplearning4j.exception.DL4JInvalidInputException in project deeplearning4j by deeplearning4j.
the class LSTMHelpers method activateHelper.
/**
* Returns FwdPassReturn object with activations/INDArrays. Allows activateHelper to be used for forward pass, backward pass
* and rnnTimeStep whilst being reasonably efficient for all
*/
public static FwdPassReturn activateHelper(final Layer layer, final NeuralNetConfiguration conf, //Activation function for the gates - sigmoid or hard sigmoid (must be found in range 0 to 1)
final IActivation gateActivationFn, //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG]
final INDArray input, //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG]
final INDArray recurrentWeights, //Shape: [n^(L-1),4*hiddenLayerSize]; order: [wi,wf,wo,wg]
final INDArray originalInputWeights, //Shape: [4,hiddenLayerSize]; order: [bi,bf,bo,bg]^T
final INDArray biases, final boolean training, final INDArray originalPrevOutputActivations, final INDArray originalPrevMemCellState, boolean forBackprop, boolean forwards, //Input mask: should only be used with bidirectional RNNs + variable length
final String inputWeightKey, //Input mask: should only be used with bidirectional RNNs + variable length
INDArray maskArray) {
//Data has shape [m,nIn,T]. Layer activations/output has shape [m,nHiddenUnits,T]
if (input == null || input.length() == 0)
throw new IllegalArgumentException("Invalid input: not set or 0 length");
INDArray inputWeights = originalInputWeights;
INDArray prevOutputActivations = originalPrevOutputActivations;
//Edge case of T=1, may have shape [m,nIn], equiv. to [m,nIn,1]
boolean is2dInput = input.rank() < 3;
int timeSeriesLength = (is2dInput ? 1 : input.size(2));
int hiddenLayerSize = recurrentWeights.size(0);
int miniBatchSize = input.size(0);
INDArray prevMemCellState;
if (originalPrevMemCellState == null) {
prevMemCellState = Nd4j.create(new int[] { miniBatchSize, hiddenLayerSize }, 'f');
} else {
prevMemCellState = originalPrevMemCellState.dup('f');
}
INDArray recurrentWeightsIFOG = recurrentWeights.get(NDArrayIndex.all(), NDArrayIndex.interval(0, 4 * hiddenLayerSize)).dup('f');
//Apply dropconnect to input (not recurrent) weights only:
if (conf.isUseDropConnect() && training && conf.getLayer().getDropOut() > 0) {
inputWeights = Dropout.applyDropConnect(layer, inputWeightKey);
}
INDArray wFFTranspose = recurrentWeights.get(NDArrayIndex.all(), interval(4 * hiddenLayerSize, 4 * hiddenLayerSize + 1)).transpose();
INDArray wOOTranspose = recurrentWeights.get(NDArrayIndex.all(), interval(4 * hiddenLayerSize + 1, 4 * hiddenLayerSize + 2)).transpose();
INDArray wGGTranspose = recurrentWeights.get(NDArrayIndex.all(), interval(4 * hiddenLayerSize + 2, 4 * hiddenLayerSize + 3)).transpose();
if (timeSeriesLength > 1 || forBackprop) {
wFFTranspose = Shape.toMmulCompatible(wFFTranspose);
wOOTranspose = Shape.toMmulCompatible(wOOTranspose);
wGGTranspose = Shape.toMmulCompatible(wGGTranspose);
}
//Allocate arrays for activations:
boolean sigmoidGates = gateActivationFn instanceof ActivationSigmoid;
IActivation afn = conf.getLayer().getActivationFn();
INDArray outputActivations = null;
FwdPassReturn toReturn = new FwdPassReturn();
if (forBackprop) {
toReturn.fwdPassOutputAsArrays = new INDArray[timeSeriesLength];
toReturn.memCellState = new INDArray[timeSeriesLength];
toReturn.memCellActivations = new INDArray[timeSeriesLength];
toReturn.iz = new INDArray[timeSeriesLength];
toReturn.ia = new INDArray[timeSeriesLength];
toReturn.fa = new INDArray[timeSeriesLength];
toReturn.oa = new INDArray[timeSeriesLength];
toReturn.ga = new INDArray[timeSeriesLength];
if (!sigmoidGates) {
toReturn.fz = new INDArray[timeSeriesLength];
toReturn.oz = new INDArray[timeSeriesLength];
toReturn.gz = new INDArray[timeSeriesLength];
}
} else {
//F order to keep time steps together
outputActivations = Nd4j.create(new int[] { miniBatchSize, hiddenLayerSize, timeSeriesLength }, 'f');
toReturn.fwdPassOutput = outputActivations;
}
Level1 l1BLAS = Nd4j.getBlasWrapper().level1();
//Input validation: check input data matches nIn
if (input.size(1) != inputWeights.size(0)) {
throw new DL4JInvalidInputException("Received input with size(1) = " + input.size(1) + " (input array shape = " + Arrays.toString(input.shape()) + "); input.size(1) must match layer nIn size (nIn = " + inputWeights.size(0) + ")");
}
//These can be different if user forgets to call rnnClearPreviousState() between calls of rnnTimeStep
if (prevOutputActivations != null && prevOutputActivations.size(0) != input.size(0)) {
throw new DL4JInvalidInputException("Previous activations (stored state) number of examples = " + prevOutputActivations.size(0) + " but input array number of examples = " + input.size(0) + ". Possible cause: using rnnTimeStep() without calling" + " rnnClearPreviousState() between different sequences?");
}
//initialize prevOutputActivations to zeroes
if (prevOutputActivations == null) {
prevOutputActivations = Nd4j.zeros(new int[] { miniBatchSize, hiddenLayerSize });
}
for (int iTimeIndex = 0; iTimeIndex < timeSeriesLength; iTimeIndex++) {
int time = iTimeIndex;
if (!forwards) {
time = timeSeriesLength - iTimeIndex - 1;
}
//[Expected shape: [m,nIn]. Also deals with edge case of T=1, with 'time series' data of shape [m,nIn], equiv. to [m,nIn,1]
INDArray miniBatchData = (is2dInput ? input : input.tensorAlongDimension(time, 1, 0));
miniBatchData = Shape.toMmulCompatible(miniBatchData);
//Calculate activations for: network input + forget, output, input modulation gates. Next 3 lines are first part of those
//Shape: [miniBatch,4*layerSize]
INDArray ifogActivations = miniBatchData.mmul(inputWeights);
Nd4j.gemm(prevOutputActivations, recurrentWeightsIFOG, ifogActivations, false, false, 1.0, 1.0);
ifogActivations.addiRowVector(biases);
INDArray inputActivations = ifogActivations.get(NDArrayIndex.all(), NDArrayIndex.interval(0, hiddenLayerSize));
if (forBackprop)
toReturn.iz[time] = inputActivations.dup('f');
conf.getLayer().getActivationFn().getActivation(inputActivations, training);
if (forBackprop)
toReturn.ia[time] = inputActivations;
INDArray forgetGateActivations = ifogActivations.get(NDArrayIndex.all(), NDArrayIndex.interval(hiddenLayerSize, 2 * hiddenLayerSize));
INDArray pmcellWFF = prevMemCellState.dup('f').muliRowVector(wFFTranspose);
//y = a*x + y i.e., forgetGateActivations.addi(pmcellWFF)
l1BLAS.axpy(pmcellWFF.length(), 1.0, pmcellWFF, forgetGateActivations);
//Above line: treats matrix as a vector. Can only do this because we're sure both pwcelWFF and forgetGateACtivations are f order, offset 0 and have same strides
if (forBackprop && !sigmoidGates) {
//Forget gate pre-out (z)
toReturn.fz[time] = forgetGateActivations.dup('f');
}
gateActivationFn.getActivation(forgetGateActivations, training);
if (forBackprop)
toReturn.fa[time] = forgetGateActivations;
INDArray inputModGateActivations = ifogActivations.get(NDArrayIndex.all(), NDArrayIndex.interval(3 * hiddenLayerSize, 4 * hiddenLayerSize));
INDArray pmcellWGG = prevMemCellState.dup('f').muliRowVector(wGGTranspose);
//inputModGateActivations.addi(pmcellWGG)
l1BLAS.axpy(pmcellWGG.length(), 1.0, pmcellWGG, inputModGateActivations);
if (forBackprop && !sigmoidGates) {
//Input modulation gate pre-out (z)
toReturn.gz[time] = inputModGateActivations.dup('f');
}
gateActivationFn.getActivation(inputModGateActivations, training);
if (forBackprop)
toReturn.ga[time] = inputModGateActivations;
//Memory cell state
INDArray currentMemoryCellState;
INDArray inputModMulInput;
if (forBackprop) {
currentMemoryCellState = prevMemCellState.dup('f').muli(forgetGateActivations);
inputModMulInput = inputModGateActivations.dup('f').muli(inputActivations);
} else {
currentMemoryCellState = forgetGateActivations.muli(prevMemCellState);
inputModMulInput = inputModGateActivations.muli(inputActivations);
}
//currentMemoryCellState.addi(inputModMulInput)
l1BLAS.axpy(currentMemoryCellState.length(), 1.0, inputModMulInput, currentMemoryCellState);
INDArray outputGateActivations = ifogActivations.get(NDArrayIndex.all(), NDArrayIndex.interval(2 * hiddenLayerSize, 3 * hiddenLayerSize));
INDArray pmcellWOO = currentMemoryCellState.dup('f').muliRowVector(wOOTranspose);
//outputGateActivations.addi(pmcellWOO)
l1BLAS.axpy(pmcellWOO.length(), 1.0, pmcellWOO, outputGateActivations);
if (forBackprop && !sigmoidGates) {
//Output gate activations
toReturn.oz[time] = outputGateActivations.dup('f');
}
gateActivationFn.getActivation(outputGateActivations, training);
if (forBackprop)
toReturn.oa[time] = outputGateActivations;
//LSTM unit outputs:
INDArray currMemoryCellActivation = afn.getActivation(currentMemoryCellState.dup('f'), training);
INDArray currHiddenUnitActivations;
if (forBackprop) {
//Expected shape: [m,hiddenLayerSize]
currHiddenUnitActivations = currMemoryCellActivation.dup('f').muli(outputGateActivations);
} else {
//Expected shape: [m,hiddenLayerSize]
currHiddenUnitActivations = currMemoryCellActivation.muli(outputGateActivations);
}
if (maskArray != null) {
//Mask array is present: bidirectional RNN -> need to zero out these activations to avoid
// incorrectly using activations from masked time steps (i.e., want 0 initialization in both directions)
//We *also* need to apply this to the memory cells, as they are carried forward
//Mask array has shape [minibatch, timeSeriesLength] -> get column
INDArray timeStepMaskColumn = maskArray.getColumn(time);
currHiddenUnitActivations.muliColumnVector(timeStepMaskColumn);
currentMemoryCellState.muliColumnVector(timeStepMaskColumn);
}
if (forBackprop) {
toReturn.fwdPassOutputAsArrays[time] = currHiddenUnitActivations;
toReturn.memCellState[time] = currentMemoryCellState;
toReturn.memCellActivations[time] = currMemoryCellActivation;
} else {
outputActivations.tensorAlongDimension(time, 1, 0).assign(currHiddenUnitActivations);
}
prevOutputActivations = currHiddenUnitActivations;
prevMemCellState = currentMemoryCellState;
toReturn.lastAct = currHiddenUnitActivations;
toReturn.lastMemCell = currentMemoryCellState;
}
return toReturn;
}
use of org.deeplearning4j.exception.DL4JInvalidInputException in project deeplearning4j by deeplearning4j.
the class CenterLossOutputLayer method getGradientsAndDelta.
/** Returns tuple: {Gradient,Delta,Output} given preOut */
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut) {
ILossFunction lossFunction = layerConf().getLossFn();
INDArray labels2d = getLabels2d();
if (labels2d.size(1) != preOut.size(1)) {
throw new DL4JInvalidInputException("Labels array numColumns (size(1) = " + labels2d.size(1) + ") does not match output layer" + " number of outputs (nOut = " + preOut.size(1) + ")");
}
INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray);
Gradient gradient = new DefaultGradient();
INDArray weightGradView = gradientViews.get(CenterLossParamInitializer.WEIGHT_KEY);
INDArray biasGradView = gradientViews.get(CenterLossParamInitializer.BIAS_KEY);
INDArray centersGradView = gradientViews.get(CenterLossParamInitializer.CENTER_KEY);
// centers delta
double alpha = layerConf().getAlpha();
INDArray centers = params.get(CenterLossParamInitializer.CENTER_KEY);
INDArray centersForExamples = labels.mmul(centers);
INDArray diff = centersForExamples.sub(input).muli(alpha);
INDArray numerator = labels.transpose().mmul(diff);
INDArray denominator = labels.sum(0).addi(1.0).transpose();
INDArray deltaC;
if (layerConf().getGradientCheck()) {
double lambda = layerConf().getLambda();
//For gradient checks: need to multiply dLc/dcj by lambda to get dL/dcj
deltaC = numerator.muli(lambda);
} else {
deltaC = numerator.diviColumnVector(denominator);
}
centersGradView.assign(deltaC);
// other standard calculations
//Equivalent to: weightGradView.assign(input.transpose().mmul(delta));
Nd4j.gemm(input, delta, weightGradView, true, false, 1.0, 0.0);
biasGradView.assign(delta.sum(0));
gradient.gradientForVariable().put(CenterLossParamInitializer.WEIGHT_KEY, weightGradView);
gradient.gradientForVariable().put(CenterLossParamInitializer.BIAS_KEY, biasGradView);
gradient.gradientForVariable().put(CenterLossParamInitializer.CENTER_KEY, centersGradView);
return new Pair<>(gradient, delta);
}
use of org.deeplearning4j.exception.DL4JInvalidInputException in project deeplearning4j by deeplearning4j.
the class Convolution1DLayer method backpropGradient.
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon) {
if (epsilon.rank() != 3)
throw new DL4JInvalidInputException("Got rank " + epsilon.rank() + " array as epsilon for Convolution1DLayer backprop with shape " + Arrays.toString(epsilon.shape()) + ". Expected rank 3 array with shape [minibatchSize, features, length].");
// add singleton fourth dimension to input and next layer's epsilon
epsilon = epsilon.reshape(epsilon.size(0), epsilon.size(1), epsilon.size(2), 1);
INDArray origInput = input;
input = input.reshape(input.size(0), input.size(1), input.size(2), 1);
// call 2D ConvolutionLayer's backpropGradient method
Pair<Gradient, INDArray> gradientEpsNext = super.backpropGradient(epsilon);
INDArray epsNext = gradientEpsNext.getSecond();
// remove singleton fourth dimension from input and current epsilon
epsNext = epsNext.reshape(epsNext.size(0), epsNext.size(1), epsNext.size(2));
input = origInput;
return new Pair<>(gradientEpsNext.getFirst(), epsNext);
}
use of org.deeplearning4j.exception.DL4JInvalidInputException in project deeplearning4j by deeplearning4j.
the class Subsampling1DLayer method activate.
@Override
public INDArray activate(boolean training) {
if (input.rank() != 3)
throw new DL4JInvalidInputException("Got rank " + input.rank() + " array as input to Subsampling1DLayer with shape " + Arrays.toString(input.shape()) + ". Expected rank 3 array with shape [minibatchSize, features, length].");
// add singleton fourth dimension to input
INDArray origInput = input;
input = input.reshape(input.size(0), input.size(1), input.size(2), 1);
// call 2D SubsamplingLayer's activate method
INDArray acts = super.activate(training);
// remove singleton fourth dimension from input and output activations
input = origInput;
acts = acts.reshape(acts.size(0), acts.size(1), acts.size(2));
return acts;
}
Aggregations