Search in sources :

Example 11 with GraphVertex

use of org.deeplearning4j.nn.graph.vertex.GraphVertex in project deeplearning4j by deeplearning4j.

the class ComputationGraph method rnnActivateUsingStoredState.

/**
     * Similar to rnnTimeStep and feedForward() methods. Difference here is that this method:<br>
     * (a) like rnnTimeStep does forward pass using stored state for RNN layers, and<br>
     * (b) unlike rnnTimeStep does not modify the RNN layer state<br>
     * Therefore multiple calls to this method with the same input should have the same output.<br>
     * Typically used during training only. Use rnnTimeStep for prediction/forward pass at test time.
     *
     * @param inputs            Input to network
     * @param training          Whether training or not
     * @param storeLastForTBPTT set to true if used as part of truncated BPTT training
     * @return Activations for each layer (including input, as per feedforward() etc)
     */
public Map<String, INDArray> rnnActivateUsingStoredState(INDArray[] inputs, boolean training, boolean storeLastForTBPTT) {
    Map<String, INDArray> layerActivations = new HashMap<>();
    //Do forward pass according to the topological ordering of the network
    for (int currVertexIdx : topologicalOrder) {
        GraphVertex current = vertices[currVertexIdx];
        if (current.isInputVertex()) {
            VertexIndices[] inputsTo = current.getOutputVertices();
            INDArray input = inputs[current.getVertexIndex()];
            layerActivations.put(current.getVertexName(), input);
            for (VertexIndices v : inputsTo) {
                int vIdx = v.getVertexIndex();
                int vIdxInputNum = v.getVertexEdgeNumber();
                //This input: the 'vIdxInputNum'th input to vertex 'vIdx'
                //TODO When to dup?
                vertices[vIdx].setInput(vIdxInputNum, input.dup());
            }
        } else {
            INDArray out;
            if (current.hasLayer()) {
                Layer l = current.getLayer();
                if (l instanceof RecurrentLayer) {
                    out = ((RecurrentLayer) l).rnnActivateUsingStoredState(current.getInputs()[0], training, storeLastForTBPTT);
                } else if (l instanceof MultiLayerNetwork) {
                    List<INDArray> temp = ((MultiLayerNetwork) l).rnnActivateUsingStoredState(current.getInputs()[0], training, storeLastForTBPTT);
                    out = temp.get(temp.size() - 1);
                } else {
                    //non-recurrent layer
                    out = current.doForward(training);
                }
                layerActivations.put(current.getVertexName(), out);
            } else {
                out = current.doForward(training);
            }
            //Now, set the inputs for the next vertices:
            VertexIndices[] outputsTo = current.getOutputVertices();
            if (outputsTo != null) {
                for (VertexIndices v : outputsTo) {
                    int vIdx = v.getVertexIndex();
                    int inputNum = v.getVertexEdgeNumber();
                    //This (jth) connection from the output: is the 'inputNum'th input to vertex 'vIdx'
                    vertices[vIdx].setInput(inputNum, out);
                }
            }
        }
    }
    return layerActivations;
}
Also used : GraphVertex(org.deeplearning4j.nn.graph.vertex.GraphVertex) INDArray(org.nd4j.linalg.api.ndarray.INDArray) VertexIndices(org.deeplearning4j.nn.graph.vertex.VertexIndices) Layer(org.deeplearning4j.nn.api.Layer) FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) RecurrentLayer(org.deeplearning4j.nn.api.layers.RecurrentLayer) FeedForwardLayer(org.deeplearning4j.nn.conf.layers.FeedForwardLayer) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) RecurrentLayer(org.deeplearning4j.nn.api.layers.RecurrentLayer)

Example 12 with GraphVertex

use of org.deeplearning4j.nn.graph.vertex.GraphVertex in project deeplearning4j by deeplearning4j.

the class ComputationGraph method summary.

/**
     * String detailing the architecture of the computation graph.
     * Vertices are printed in a topological sort order.
     * Columns are Vertex Names with layer/vertex type, nIn, nOut, Total number of parameters and the Shapes of the parameters
     * And the inputs to the vertex
     * Will also give information about frozen layers/vertices, if any.
     * @return Summary as a string
     */
public String summary() {
    String ret = "\n";
    ret += StringUtils.repeat("=", 140);
    ret += "\n";
    ret += String.format("%-40s%-15s%-15s%-30s %s\n", "VertexName (VertexType)", "nIn,nOut", "TotalParams", "ParamsShape", "Vertex Inputs");
    ret += StringUtils.repeat("=", 140);
    ret += "\n";
    int frozenParams = 0;
    for (int currVertexIdx : topologicalOrder) {
        GraphVertex current = vertices[currVertexIdx];
        String name = current.getVertexName();
        String[] classNameArr = current.getClass().toString().split("\\.");
        String className = classNameArr[classNameArr.length - 1];
        String connections = "-";
        if (!current.isInputVertex()) {
            connections = configuration.getVertexInputs().get(name).toString();
        }
        String paramCount = "-";
        String in = "-";
        String out = "-";
        String paramShape = "-";
        if (current.hasLayer()) {
            Layer currentLayer = ((LayerVertex) current).getLayer();
            classNameArr = currentLayer.getClass().getName().split("\\.");
            className = classNameArr[classNameArr.length - 1];
            paramCount = String.valueOf(currentLayer.numParams());
            if (currentLayer.numParams() > 0) {
                paramShape = "";
                in = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNIn());
                out = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNOut());
                Set<String> paraNames = currentLayer.conf().getLearningRateByParam().keySet();
                for (String aP : paraNames) {
                    String paramS = ArrayUtils.toString(currentLayer.paramTable().get(aP).shape());
                    paramShape += aP + ":" + paramS + ", ";
                }
                paramShape = paramShape.subSequence(0, paramShape.lastIndexOf(",")).toString();
            }
            if (currentLayer instanceof FrozenLayer) {
                frozenParams += currentLayer.numParams();
                classNameArr = ((FrozenLayer) currentLayer).getInsideLayer().getClass().getName().split("\\.");
                className = "Frozen " + classNameArr[classNameArr.length - 1];
            }
        }
        ret += String.format("%-40s%-15s%-15s%-30s %s", name + " (" + className + ")", in + "," + out, paramCount, paramShape, connections);
        ret += "\n";
    }
    ret += StringUtils.repeat("-", 140);
    ret += String.format("\n%30s %d", "Total Parameters: ", params().length());
    ret += String.format("\n%30s %d", "Trainable Parameters: ", params().length() - frozenParams);
    ret += String.format("\n%30s %d", "Frozen Parameters: ", frozenParams);
    ret += "\n";
    ret += StringUtils.repeat("=", 140);
    ret += "\n";
    return ret;
}
Also used : LayerVertex(org.deeplearning4j.nn.graph.vertex.impl.LayerVertex) FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) GraphVertex(org.deeplearning4j.nn.graph.vertex.GraphVertex) FeedForwardLayer(org.deeplearning4j.nn.conf.layers.FeedForwardLayer) Layer(org.deeplearning4j.nn.api.Layer) FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) RecurrentLayer(org.deeplearning4j.nn.api.layers.RecurrentLayer) FeedForwardLayer(org.deeplearning4j.nn.conf.layers.FeedForwardLayer) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Example 13 with GraphVertex

use of org.deeplearning4j.nn.graph.vertex.GraphVertex in project deeplearning4j by deeplearning4j.

the class ComputationGraph method computeGradientAndScore.

@Override
public void computeGradientAndScore() {
    //Calculate activations (which are stored in each layer, and used in backprop)
    if (configuration.getBackpropType() == BackpropType.TruncatedBPTT) {
        Map<String, INDArray> activations = rnnActivateUsingStoredState(inputs, true, true);
        if (trainingListeners.size() > 0) {
            for (TrainingListener tl : trainingListeners) {
                tl.onForwardPass(this, activations);
            }
        }
        calcBackpropGradients(true);
    } else {
        Map<String, INDArray> activations = feedForward(true, true);
        if (trainingListeners.size() > 0) {
            for (TrainingListener tl : trainingListeners) {
                tl.onForwardPass(this, activations);
            }
        }
        calcBackpropGradients(false);
    }
    //Score: sum of the scores for the various output layers...
    double l1 = calcL1();
    double l2 = calcL2();
    score = 0.0;
    for (String s : configuration.getNetworkOutputs()) {
        GraphVertex gv = verticesMap.get(s);
        score += ((IOutputLayer) gv.getLayer()).computeScore(l1, l2, true);
        //Only want to add l1/l2 once...
        l1 = 0.0;
        l2 = 0.0;
    }
    //Listeners
    if (trainingListeners.size() > 0) {
        for (TrainingListener tl : trainingListeners) {
            tl.onBackwardPass(this);
        }
    }
}
Also used : GraphVertex(org.deeplearning4j.nn.graph.vertex.GraphVertex) INDArray(org.nd4j.linalg.api.ndarray.INDArray) TrainingListener(org.deeplearning4j.optimize.api.TrainingListener)

Example 14 with GraphVertex

use of org.deeplearning4j.nn.graph.vertex.GraphVertex in project deeplearning4j by deeplearning4j.

the class ComputationGraph method feedForward.

private Map<String, INDArray> feedForward(boolean train, boolean excludeOutputLayers) {
    Map<String, INDArray> layerActivations = new HashMap<>();
    //Do forward pass according to the topological ordering of the network
    for (int i = 0; i < topologicalOrder.length; i++) {
        GraphVertex current = vertices[topologicalOrder[i]];
        if (current.isInputVertex()) {
            VertexIndices[] inputsTo = current.getOutputVertices();
            INDArray input = inputs[current.getVertexIndex()];
            layerActivations.put(current.getVertexName(), input);
            for (VertexIndices v : inputsTo) {
                int vIdx = v.getVertexIndex();
                int vIdxInputNum = v.getVertexEdgeNumber();
                //This input: the 'vIdxInputNum'th input to vertex 'vIdx'
                vertices[vIdx].setInput(vIdxInputNum, input.dup());
            }
        } else {
            //Do forward pass:
            if (excludeOutputLayers && current.isOutputVertex() && current.hasLayer() && current.getLayer() instanceof IOutputLayer) {
                // we only need to ensure the input to the output layers is set properly
                continue;
            }
            INDArray out = current.doForward(train);
            if (current.hasLayer()) {
                layerActivations.put(current.getVertexName(), out);
            }
            //Now, set the inputs for the next vertices:
            VertexIndices[] outputsTo = current.getOutputVertices();
            if (outputsTo != null) {
                for (VertexIndices v : outputsTo) {
                    int vIdx = v.getVertexIndex();
                    int inputNum = v.getVertexEdgeNumber();
                    //This (jth) connection from the output: is the 'inputNum'th input to vertex 'vIdx'
                    vertices[vIdx].setInput(inputNum, out);
                }
            }
        }
    }
    return layerActivations;
}
Also used : GraphVertex(org.deeplearning4j.nn.graph.vertex.GraphVertex) INDArray(org.nd4j.linalg.api.ndarray.INDArray) VertexIndices(org.deeplearning4j.nn.graph.vertex.VertexIndices) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Example 15 with GraphVertex

use of org.deeplearning4j.nn.graph.vertex.GraphVertex in project deeplearning4j by deeplearning4j.

the class ComputationGraph method calcBackpropGradients.

/**
     * Do backprop (gradient calculation)
     *
     * @param truncatedBPTT    false: normal backprop. true: calculate gradients using truncated BPTT for RNN layers
     * @param externalEpsilons null usually (for typical supervised learning). If not null (and length > 0) then assume that
     *                         the user has provided some errors externally, as they would do for example in reinforcement
     *                         learning situations.
     */
protected void calcBackpropGradients(boolean truncatedBPTT, INDArray... externalEpsilons) {
    if (flattenedGradients == null)
        initGradientsView();
    LinkedList<Triple<String, INDArray, Character>> gradients = new LinkedList<>();
    //Do backprop according to the reverse of the topological ordering of the network
    //If true: already set epsilon for this vertex; later epsilons should be *added* to the existing one, not set
    boolean[] setVertexEpsilon = new boolean[topologicalOrder.length];
    for (int i = topologicalOrder.length - 1; i >= 0; i--) {
        GraphVertex current = vertices[topologicalOrder[i]];
        if (current.isInputVertex())
            //No op
            continue;
        //FIXME: make the frozen vertex feature extraction more flexible
        if (current.hasLayer() && current.getLayer() instanceof FrozenLayer)
            break;
        if (current.isOutputVertex()) {
            //Two reasons for a vertex to be an output vertex:
            //(a) it's an output layer (i.e., instanceof IOutputLayer), or
            //(b) it's a normal layer, but it has been marked as an output layer for use in external errors - for reinforcement learning, for example
            int thisOutputNumber = configuration.getNetworkOutputs().indexOf(current.getVertexName());
            if (current.getLayer() instanceof IOutputLayer) {
                IOutputLayer outputLayer = (IOutputLayer) current.getLayer();
                INDArray currLabels = labels[thisOutputNumber];
                outputLayer.setLabels(currLabels);
            } else {
                current.setEpsilon(externalEpsilons[thisOutputNumber]);
                setVertexEpsilon[topologicalOrder[i]] = true;
            }
        }
        Pair<Gradient, INDArray[]> pair = current.doBackward(truncatedBPTT);
        INDArray[] epsilons = pair.getSecond();
        //Inputs to the current GraphVertex:
        VertexIndices[] inputVertices = current.getInputVertices();
        //Set epsilons for the vertices that provide inputs to this vertex:
        if (inputVertices != null) {
            int j = 0;
            for (VertexIndices v : inputVertices) {
                GraphVertex gv = vertices[v.getVertexIndex()];
                if (setVertexEpsilon[gv.getVertexIndex()]) {
                    //This vertex: must output to multiple vertices... we want to add the epsilons here
                    INDArray currentEps = gv.getEpsilon();
                    //TODO: in some circumstances, it may be safe  to do in-place add (but not always)
                    gv.setEpsilon(currentEps.add(epsilons[j++]));
                } else {
                    gv.setEpsilon(epsilons[j++]);
                }
                setVertexEpsilon[gv.getVertexIndex()] = true;
            }
        }
        if (pair.getFirst() != null) {
            Gradient g = pair.getFirst();
            Map<String, INDArray> map = g.gradientForVariable();
            LinkedList<Triple<String, INDArray, Character>> tempList = new LinkedList<>();
            for (Map.Entry<String, INDArray> entry : map.entrySet()) {
                String origName = entry.getKey();
                String newName = current.getVertexName() + "_" + origName;
                tempList.addFirst(new Triple<>(newName, entry.getValue(), g.flatteningOrderForVariable(origName)));
            }
            for (Triple<String, INDArray, Character> t : tempList) gradients.addFirst(t);
        }
    }
    //Now, add the gradients in the order we need them in for flattening (same as params order)
    Gradient gradient = new DefaultGradient(flattenedGradients);
    for (Triple<String, INDArray, Character> t : gradients) {
        gradient.setGradientFor(t.getFirst(), t.getSecond(), t.getThird());
    }
    this.gradient = gradient;
}
Also used : DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) Gradient(org.deeplearning4j.nn.gradient.Gradient) Triple(org.deeplearning4j.berkeley.Triple) FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) GraphVertex(org.deeplearning4j.nn.graph.vertex.GraphVertex) INDArray(org.nd4j.linalg.api.ndarray.INDArray) VertexIndices(org.deeplearning4j.nn.graph.vertex.VertexIndices) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Aggregations

GraphVertex (org.deeplearning4j.nn.graph.vertex.GraphVertex)22 INDArray (org.nd4j.linalg.api.ndarray.INDArray)19 VertexIndices (org.deeplearning4j.nn.graph.vertex.VertexIndices)9 Test (org.junit.Test)9 Layer (org.deeplearning4j.nn.api.Layer)8 IOutputLayer (org.deeplearning4j.nn.api.layers.IOutputLayer)8 FeedForwardLayer (org.deeplearning4j.nn.conf.layers.FeedForwardLayer)8 FrozenLayer (org.deeplearning4j.nn.layers.FrozenLayer)7 RecurrentLayer (org.deeplearning4j.nn.api.layers.RecurrentLayer)6 Gradient (org.deeplearning4j.nn.gradient.Gradient)4 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)4 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)4 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)2 BaseOutputLayer (org.deeplearning4j.nn.conf.layers.BaseOutputLayer)2 SubsamplingLayer (org.deeplearning4j.nn.conf.layers.SubsamplingLayer)2 Pair (org.deeplearning4j.berkeley.Pair)1 Triple (org.deeplearning4j.berkeley.Triple)1 MaskState (org.deeplearning4j.nn.api.MaskState)1 DuplicateToTimeSeriesVertex (org.deeplearning4j.nn.conf.graph.rnn.DuplicateToTimeSeriesVertex)1