Search in sources :

Example 11 with DL4JInvalidInputException

use of org.deeplearning4j.exception.DL4JInvalidInputException in project deeplearning4j by deeplearning4j.

the class BaseLayer method preOutput.

public INDArray preOutput(boolean training) {
    applyDropOutIfNecessary(training);
    INDArray b = getParam(DefaultParamInitializer.BIAS_KEY);
    INDArray W = getParam(DefaultParamInitializer.WEIGHT_KEY);
    //Input validation:
    if (input.rank() != 2 || input.columns() != W.rows()) {
        if (input.rank() != 2) {
            throw new DL4JInvalidInputException("Input that is not a matrix; expected matrix (rank 2), got rank " + input.rank() + " array with shape " + Arrays.toString(input.shape()));
        }
        throw new DL4JInvalidInputException("Input size (" + input.columns() + " columns; shape = " + Arrays.toString(input.shape()) + ") is invalid: does not match layer input size (layer # inputs = " + W.size(0) + ")");
    }
    if (conf.isUseDropConnect() && training && conf.getLayer().getDropOut() > 0) {
        W = Dropout.applyDropConnect(this, DefaultParamInitializer.WEIGHT_KEY);
    }
    INDArray ret = input.mmul(W).addiRowVector(b);
    if (maskArray != null) {
        applyMask(ret);
    }
    return ret;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) DL4JInvalidInputException(org.deeplearning4j.exception.DL4JInvalidInputException)

Example 12 with DL4JInvalidInputException

use of org.deeplearning4j.exception.DL4JInvalidInputException in project deeplearning4j by deeplearning4j.

the class LSTMHelpers method activateHelper.

/**
     * Returns FwdPassReturn object with activations/INDArrays. Allows activateHelper to be used for forward pass, backward pass
     * and rnnTimeStep whilst being reasonably efficient for all
     */
public static FwdPassReturn activateHelper(final Layer layer, final NeuralNetConfiguration conf, //Activation function for the gates - sigmoid or hard sigmoid (must be found in range 0 to 1)
final IActivation gateActivationFn, //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG]
final INDArray input, //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG]
final INDArray recurrentWeights, //Shape: [n^(L-1),4*hiddenLayerSize]; order: [wi,wf,wo,wg]
final INDArray originalInputWeights, //Shape: [4,hiddenLayerSize]; order: [bi,bf,bo,bg]^T
final INDArray biases, final boolean training, final INDArray originalPrevOutputActivations, final INDArray originalPrevMemCellState, boolean forBackprop, boolean forwards, //Input mask: should only be used with bidirectional RNNs + variable length
final String inputWeightKey, //Input mask: should only be used with bidirectional RNNs + variable length
INDArray maskArray) {
    //Data has shape [m,nIn,T]. Layer activations/output has shape [m,nHiddenUnits,T]
    if (input == null || input.length() == 0)
        throw new IllegalArgumentException("Invalid input: not set or 0 length");
    INDArray inputWeights = originalInputWeights;
    INDArray prevOutputActivations = originalPrevOutputActivations;
    //Edge case of T=1, may have shape [m,nIn], equiv. to [m,nIn,1]
    boolean is2dInput = input.rank() < 3;
    int timeSeriesLength = (is2dInput ? 1 : input.size(2));
    int hiddenLayerSize = recurrentWeights.size(0);
    int miniBatchSize = input.size(0);
    INDArray prevMemCellState;
    if (originalPrevMemCellState == null) {
        prevMemCellState = Nd4j.create(new int[] { miniBatchSize, hiddenLayerSize }, 'f');
    } else {
        prevMemCellState = originalPrevMemCellState.dup('f');
    }
    INDArray recurrentWeightsIFOG = recurrentWeights.get(NDArrayIndex.all(), NDArrayIndex.interval(0, 4 * hiddenLayerSize)).dup('f');
    //Apply dropconnect to input (not recurrent) weights only:
    if (conf.isUseDropConnect() && training && conf.getLayer().getDropOut() > 0) {
        inputWeights = Dropout.applyDropConnect(layer, inputWeightKey);
    }
    INDArray wFFTranspose = recurrentWeights.get(NDArrayIndex.all(), interval(4 * hiddenLayerSize, 4 * hiddenLayerSize + 1)).transpose();
    INDArray wOOTranspose = recurrentWeights.get(NDArrayIndex.all(), interval(4 * hiddenLayerSize + 1, 4 * hiddenLayerSize + 2)).transpose();
    INDArray wGGTranspose = recurrentWeights.get(NDArrayIndex.all(), interval(4 * hiddenLayerSize + 2, 4 * hiddenLayerSize + 3)).transpose();
    if (timeSeriesLength > 1 || forBackprop) {
        wFFTranspose = Shape.toMmulCompatible(wFFTranspose);
        wOOTranspose = Shape.toMmulCompatible(wOOTranspose);
        wGGTranspose = Shape.toMmulCompatible(wGGTranspose);
    }
    //Allocate arrays for activations:
    boolean sigmoidGates = gateActivationFn instanceof ActivationSigmoid;
    IActivation afn = conf.getLayer().getActivationFn();
    INDArray outputActivations = null;
    FwdPassReturn toReturn = new FwdPassReturn();
    if (forBackprop) {
        toReturn.fwdPassOutputAsArrays = new INDArray[timeSeriesLength];
        toReturn.memCellState = new INDArray[timeSeriesLength];
        toReturn.memCellActivations = new INDArray[timeSeriesLength];
        toReturn.iz = new INDArray[timeSeriesLength];
        toReturn.ia = new INDArray[timeSeriesLength];
        toReturn.fa = new INDArray[timeSeriesLength];
        toReturn.oa = new INDArray[timeSeriesLength];
        toReturn.ga = new INDArray[timeSeriesLength];
        if (!sigmoidGates) {
            toReturn.fz = new INDArray[timeSeriesLength];
            toReturn.oz = new INDArray[timeSeriesLength];
            toReturn.gz = new INDArray[timeSeriesLength];
        }
    } else {
        //F order to keep time steps together
        outputActivations = Nd4j.create(new int[] { miniBatchSize, hiddenLayerSize, timeSeriesLength }, 'f');
        toReturn.fwdPassOutput = outputActivations;
    }
    Level1 l1BLAS = Nd4j.getBlasWrapper().level1();
    //Input validation: check input data matches nIn
    if (input.size(1) != inputWeights.size(0)) {
        throw new DL4JInvalidInputException("Received input with size(1) = " + input.size(1) + " (input array shape = " + Arrays.toString(input.shape()) + "); input.size(1) must match layer nIn size (nIn = " + inputWeights.size(0) + ")");
    }
    //These can be different if user forgets to call rnnClearPreviousState() between calls of rnnTimeStep
    if (prevOutputActivations != null && prevOutputActivations.size(0) != input.size(0)) {
        throw new DL4JInvalidInputException("Previous activations (stored state) number of examples = " + prevOutputActivations.size(0) + " but input array number of examples = " + input.size(0) + ". Possible cause: using rnnTimeStep() without calling" + " rnnClearPreviousState() between different sequences?");
    }
    //initialize prevOutputActivations to zeroes
    if (prevOutputActivations == null) {
        prevOutputActivations = Nd4j.zeros(new int[] { miniBatchSize, hiddenLayerSize });
    }
    for (int iTimeIndex = 0; iTimeIndex < timeSeriesLength; iTimeIndex++) {
        int time = iTimeIndex;
        if (!forwards) {
            time = timeSeriesLength - iTimeIndex - 1;
        }
        //[Expected shape: [m,nIn]. Also deals with edge case of T=1, with 'time series' data of shape [m,nIn], equiv. to [m,nIn,1]
        INDArray miniBatchData = (is2dInput ? input : input.tensorAlongDimension(time, 1, 0));
        miniBatchData = Shape.toMmulCompatible(miniBatchData);
        //Calculate activations for: network input + forget, output, input modulation gates. Next 3 lines are first part of those
        //Shape: [miniBatch,4*layerSize]
        INDArray ifogActivations = miniBatchData.mmul(inputWeights);
        Nd4j.gemm(prevOutputActivations, recurrentWeightsIFOG, ifogActivations, false, false, 1.0, 1.0);
        ifogActivations.addiRowVector(biases);
        INDArray inputActivations = ifogActivations.get(NDArrayIndex.all(), NDArrayIndex.interval(0, hiddenLayerSize));
        if (forBackprop)
            toReturn.iz[time] = inputActivations.dup('f');
        conf.getLayer().getActivationFn().getActivation(inputActivations, training);
        if (forBackprop)
            toReturn.ia[time] = inputActivations;
        INDArray forgetGateActivations = ifogActivations.get(NDArrayIndex.all(), NDArrayIndex.interval(hiddenLayerSize, 2 * hiddenLayerSize));
        INDArray pmcellWFF = prevMemCellState.dup('f').muliRowVector(wFFTranspose);
        //y = a*x + y i.e., forgetGateActivations.addi(pmcellWFF)
        l1BLAS.axpy(pmcellWFF.length(), 1.0, pmcellWFF, forgetGateActivations);
        //Above line: treats matrix as a vector. Can only do this because we're sure both pwcelWFF and forgetGateACtivations are f order, offset 0 and have same strides
        if (forBackprop && !sigmoidGates) {
            //Forget gate pre-out (z)
            toReturn.fz[time] = forgetGateActivations.dup('f');
        }
        gateActivationFn.getActivation(forgetGateActivations, training);
        if (forBackprop)
            toReturn.fa[time] = forgetGateActivations;
        INDArray inputModGateActivations = ifogActivations.get(NDArrayIndex.all(), NDArrayIndex.interval(3 * hiddenLayerSize, 4 * hiddenLayerSize));
        INDArray pmcellWGG = prevMemCellState.dup('f').muliRowVector(wGGTranspose);
        //inputModGateActivations.addi(pmcellWGG)
        l1BLAS.axpy(pmcellWGG.length(), 1.0, pmcellWGG, inputModGateActivations);
        if (forBackprop && !sigmoidGates) {
            //Input modulation gate pre-out (z)
            toReturn.gz[time] = inputModGateActivations.dup('f');
        }
        gateActivationFn.getActivation(inputModGateActivations, training);
        if (forBackprop)
            toReturn.ga[time] = inputModGateActivations;
        //Memory cell state
        INDArray currentMemoryCellState;
        INDArray inputModMulInput;
        if (forBackprop) {
            currentMemoryCellState = prevMemCellState.dup('f').muli(forgetGateActivations);
            inputModMulInput = inputModGateActivations.dup('f').muli(inputActivations);
        } else {
            currentMemoryCellState = forgetGateActivations.muli(prevMemCellState);
            inputModMulInput = inputModGateActivations.muli(inputActivations);
        }
        //currentMemoryCellState.addi(inputModMulInput)
        l1BLAS.axpy(currentMemoryCellState.length(), 1.0, inputModMulInput, currentMemoryCellState);
        INDArray outputGateActivations = ifogActivations.get(NDArrayIndex.all(), NDArrayIndex.interval(2 * hiddenLayerSize, 3 * hiddenLayerSize));
        INDArray pmcellWOO = currentMemoryCellState.dup('f').muliRowVector(wOOTranspose);
        //outputGateActivations.addi(pmcellWOO)
        l1BLAS.axpy(pmcellWOO.length(), 1.0, pmcellWOO, outputGateActivations);
        if (forBackprop && !sigmoidGates) {
            //Output gate activations
            toReturn.oz[time] = outputGateActivations.dup('f');
        }
        gateActivationFn.getActivation(outputGateActivations, training);
        if (forBackprop)
            toReturn.oa[time] = outputGateActivations;
        //LSTM unit outputs:
        INDArray currMemoryCellActivation = afn.getActivation(currentMemoryCellState.dup('f'), training);
        INDArray currHiddenUnitActivations;
        if (forBackprop) {
            //Expected shape: [m,hiddenLayerSize]
            currHiddenUnitActivations = currMemoryCellActivation.dup('f').muli(outputGateActivations);
        } else {
            //Expected shape: [m,hiddenLayerSize]
            currHiddenUnitActivations = currMemoryCellActivation.muli(outputGateActivations);
        }
        if (maskArray != null) {
            //Mask array is present: bidirectional RNN -> need to zero out these activations to avoid
            // incorrectly using activations from masked time steps (i.e., want 0 initialization in both directions)
            //We *also* need to apply this to the memory cells, as they are carried forward
            //Mask array has shape [minibatch, timeSeriesLength] -> get column
            INDArray timeStepMaskColumn = maskArray.getColumn(time);
            currHiddenUnitActivations.muliColumnVector(timeStepMaskColumn);
            currentMemoryCellState.muliColumnVector(timeStepMaskColumn);
        }
        if (forBackprop) {
            toReturn.fwdPassOutputAsArrays[time] = currHiddenUnitActivations;
            toReturn.memCellState[time] = currentMemoryCellState;
            toReturn.memCellActivations[time] = currMemoryCellActivation;
        } else {
            outputActivations.tensorAlongDimension(time, 1, 0).assign(currHiddenUnitActivations);
        }
        prevOutputActivations = currHiddenUnitActivations;
        prevMemCellState = currentMemoryCellState;
        toReturn.lastAct = currHiddenUnitActivations;
        toReturn.lastMemCell = currentMemoryCellState;
    }
    return toReturn;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) ActivationSigmoid(org.nd4j.linalg.activations.impl.ActivationSigmoid) Level1(org.nd4j.linalg.api.blas.Level1) IActivation(org.nd4j.linalg.activations.IActivation) DL4JInvalidInputException(org.deeplearning4j.exception.DL4JInvalidInputException) NDArrayIndex.point(org.nd4j.linalg.indexing.NDArrayIndex.point)

Example 13 with DL4JInvalidInputException

use of org.deeplearning4j.exception.DL4JInvalidInputException in project deeplearning4j by deeplearning4j.

the class CenterLossOutputLayer method getGradientsAndDelta.

/** Returns tuple: {Gradient,Delta,Output} given preOut */
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut) {
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray labels2d = getLabels2d();
    if (labels2d.size(1) != preOut.size(1)) {
        throw new DL4JInvalidInputException("Labels array numColumns (size(1) = " + labels2d.size(1) + ") does not match output layer" + " number of outputs (nOut = " + preOut.size(1) + ")");
    }
    INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray);
    Gradient gradient = new DefaultGradient();
    INDArray weightGradView = gradientViews.get(CenterLossParamInitializer.WEIGHT_KEY);
    INDArray biasGradView = gradientViews.get(CenterLossParamInitializer.BIAS_KEY);
    INDArray centersGradView = gradientViews.get(CenterLossParamInitializer.CENTER_KEY);
    // centers delta
    double alpha = layerConf().getAlpha();
    INDArray centers = params.get(CenterLossParamInitializer.CENTER_KEY);
    INDArray centersForExamples = labels.mmul(centers);
    INDArray diff = centersForExamples.sub(input).muli(alpha);
    INDArray numerator = labels.transpose().mmul(diff);
    INDArray denominator = labels.sum(0).addi(1.0).transpose();
    INDArray deltaC;
    if (layerConf().getGradientCheck()) {
        double lambda = layerConf().getLambda();
        //For gradient checks: need to multiply dLc/dcj by lambda to get dL/dcj
        deltaC = numerator.muli(lambda);
    } else {
        deltaC = numerator.diviColumnVector(denominator);
    }
    centersGradView.assign(deltaC);
    // other standard calculations
    //Equivalent to:  weightGradView.assign(input.transpose().mmul(delta));
    Nd4j.gemm(input, delta, weightGradView, true, false, 1.0, 0.0);
    biasGradView.assign(delta.sum(0));
    gradient.gradientForVariable().put(CenterLossParamInitializer.WEIGHT_KEY, weightGradView);
    gradient.gradientForVariable().put(CenterLossParamInitializer.BIAS_KEY, biasGradView);
    gradient.gradientForVariable().put(CenterLossParamInitializer.CENTER_KEY, centersGradView);
    return new Pair<>(gradient, delta);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) DL4JInvalidInputException(org.deeplearning4j.exception.DL4JInvalidInputException) Pair(org.deeplearning4j.berkeley.Pair)

Example 14 with DL4JInvalidInputException

use of org.deeplearning4j.exception.DL4JInvalidInputException in project deeplearning4j by deeplearning4j.

the class Convolution1DLayer method backpropGradient.

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon) {
    if (epsilon.rank() != 3)
        throw new DL4JInvalidInputException("Got rank " + epsilon.rank() + " array as epsilon for Convolution1DLayer backprop with shape " + Arrays.toString(epsilon.shape()) + ". Expected rank 3 array with shape [minibatchSize, features, length].");
    // add singleton fourth dimension to input and next layer's epsilon
    epsilon = epsilon.reshape(epsilon.size(0), epsilon.size(1), epsilon.size(2), 1);
    INDArray origInput = input;
    input = input.reshape(input.size(0), input.size(1), input.size(2), 1);
    // call 2D ConvolutionLayer's backpropGradient method
    Pair<Gradient, INDArray> gradientEpsNext = super.backpropGradient(epsilon);
    INDArray epsNext = gradientEpsNext.getSecond();
    // remove singleton fourth dimension from input and current epsilon
    epsNext = epsNext.reshape(epsNext.size(0), epsNext.size(1), epsNext.size(2));
    input = origInput;
    return new Pair<>(gradientEpsNext.getFirst(), epsNext);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DL4JInvalidInputException(org.deeplearning4j.exception.DL4JInvalidInputException) Pair(org.deeplearning4j.berkeley.Pair)

Example 15 with DL4JInvalidInputException

use of org.deeplearning4j.exception.DL4JInvalidInputException in project deeplearning4j by deeplearning4j.

the class Subsampling1DLayer method activate.

@Override
public INDArray activate(boolean training) {
    if (input.rank() != 3)
        throw new DL4JInvalidInputException("Got rank " + input.rank() + " array as input to Subsampling1DLayer with shape " + Arrays.toString(input.shape()) + ". Expected rank 3 array with shape [minibatchSize, features, length].");
    // add singleton fourth dimension to input
    INDArray origInput = input;
    input = input.reshape(input.size(0), input.size(1), input.size(2), 1);
    // call 2D SubsamplingLayer's activate method
    INDArray acts = super.activate(training);
    // remove singleton fourth dimension from input and output activations
    input = origInput;
    acts = acts.reshape(acts.size(0), acts.size(1), acts.size(2));
    return acts;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) DL4JInvalidInputException(org.deeplearning4j.exception.DL4JInvalidInputException)

Aggregations

DL4JInvalidInputException (org.deeplearning4j.exception.DL4JInvalidInputException)15 INDArray (org.nd4j.linalg.api.ndarray.INDArray)14 Pair (org.deeplearning4j.berkeley.Pair)5 Gradient (org.deeplearning4j.nn.gradient.Gradient)4 ArrayList (java.util.ArrayList)3 Writable (org.datavec.api.writable.Writable)3 NDArrayWritable (org.datavec.common.data.NDArrayWritable)3 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)2 ILossFunction (org.nd4j.linalg.lossfunctions.ILossFunction)2 List (java.util.List)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 ZipEntry (java.util.zip.ZipEntry)1 ZipFile (java.util.zip.ZipFile)1 WritableConverterException (org.datavec.api.io.converters.WritableConverterException)1 InMemoryLookupTable (org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable)1 ShallowSequenceElement (org.deeplearning4j.models.sequencevectors.sequence.ShallowSequenceElement)1 StaticWord2Vec (org.deeplearning4j.models.word2vec.StaticWord2Vec)1 VocabWord (org.deeplearning4j.models.word2vec.VocabWord)1 Word2Vec (org.deeplearning4j.models.word2vec.Word2Vec)1