Search in sources :

Example 1 with Matrix

use of in project tribuo by oracle.

the class CRFParameters method merge.

public Tensor[] merge(Tensor[][] gradients, int size) {
    DenseVector biasUpdate = new DenseVector(biases.size());
    List<DenseSparseMatrix> updates = new ArrayList<>(size);
    DenseMatrix denseUpdates = null;
    DenseMatrix labelLabelUpdate = new DenseMatrix(labelLabelWeights.getDimension1Size(), labelLabelWeights.getDimension2Size());
    for (int j = 0; j < gradients.length; j++) {
        Matrix tmpUpdate = (Matrix) gradients[j][1];
        if (tmpUpdate instanceof DenseSparseMatrix) {
            updates.add((DenseSparseMatrix) tmpUpdate);
        } else {
            // is dense
            if (denseUpdates == null) {
                denseUpdates = (DenseMatrix) tmpUpdate;
            } else {
    // Merge the combination of any dense and sparse updates
    Matrix featureLabelUpdate;
    if (updates.size() > 0) {
        featureLabelUpdate = merger.merge(updates.toArray(new DenseSparseMatrix[0]));
        if (denseUpdates != null) {
            featureLabelUpdate = denseUpdates;
    } else {
        featureLabelUpdate = denseUpdates;
    return new Tensor[] { biasUpdate, featureLabelUpdate, labelLabelUpdate };
Also used : DenseMatrix( DenseSparseMatrix( Matrix( Tensor( ArrayList(java.util.ArrayList) DenseSparseMatrix( DenseVector( DenseMatrix(

Example 2 with Matrix

use of in project tribuo by oracle.

the class AbstractLinearSGDModel method writeONNXGraph.

 * Writes this {@link org.tribuo.Model} into {@link OnnxMl.GraphProto.Builder} inside the input's
 * {@link ONNXContext}.
 * @param input The input to the model graph.
 * @return the output node of the model graph.
public ONNXNode writeONNXGraph(ONNXRef<?> input) {
    ONNXContext onnx = input.onnxContext();
    Matrix weightMatrix = (Matrix) modelParameters.get()[0];
    ONNXInitializer weights = onnx.floatTensor("linear_sgd_weights", Arrays.asList(featureIDMap.size(), outputIDInfo.size()), fb -> {
        for (int j = 0; j < weightMatrix.getDimension2Size() - 1; j++) {
            for (int i = 0; i < weightMatrix.getDimension1Size(); i++) {
                fb.put((float) weightMatrix.get(i, j));
    ONNXInitializer bias = onnx.floatTensor("linear_sgd_bias", Collections.singletonList(outputIDInfo.size()), fb -> {
        for (int i = 0; i < weightMatrix.getDimension1Size(); i++) {
            fb.put((float) weightMatrix.get(i, weightMatrix.getDimension2Size() - 1));
    return onnxOutput(input.apply(ONNXOperators.GEMM, Arrays.asList(weights, bias)));
Also used : ONNXContext(org.tribuo.util.onnx.ONNXContext) Matrix( DenseMatrix( ONNXInitializer(org.tribuo.util.onnx.ONNXInitializer)

Example 3 with Matrix

use of in project tribuo by oracle.

the class FMParameters method gradients.

 * Generate the gradients for a particular feature vector given
 * the loss and the per output gradients.
 * <p>
 * This method returns a {@link Tensor} array with numLabels + 2 elements.
 * @param score    The Pair returned by the objective.
 * @param features The feature vector.
 * @return A {@link Tensor} array containing all the gradients.
public Tensor[] gradients(Pair<Double, SGDVector> score, SGDVector features) {
    Tensor[] gradients = new Tensor[weights.length];
    SGDVector outputGradient = score.getB();
    // Bias gradient
    if (outputGradient instanceof SparseVector) {
        gradients[0] = ((SparseVector) outputGradient).densify();
    } else {
        gradients[0] = outputGradient.copy();
    // Feature gradients
    gradients[1] = outputGradient.outer(features);
    // per label
    for (int i = 2; i < weights.length; i++) {
        double curOutputGradient = outputGradient.get(i - 2);
        DenseMatrix curFactors = (DenseMatrix) weights[i];
        if (curOutputGradient != 0.0) {
            // compute /sum_j v_{j,f}x_j
            SGDVector factorSum = curFactors.leftMultiply(features);
            // grad_f: dy/d0 * (x_i * factorSum_f - v_{i,f} * x_i * x_i)
            Matrix factorGradMatrix;
            if (features instanceof SparseVector) {
                List<SparseVector> vectors = new ArrayList<>(numFactors);
                for (int j = 0; j < numFactors; j++) {
                    vectors.add(((SparseVector) features).copy());
                factorGradMatrix = new DenseSparseMatrix(vectors);
            } else {
                factorGradMatrix = new DenseMatrix(numFactors, features.size());
                for (int j = 0; j < numFactors; j++) {
                    for (int k = 0; k < features.size(); k++) {
                        factorGradMatrix.set(j, k, features.get(k));
            for (int j = 0; j < numFactors; j++) {
                // This gets a mutable view of the row
                SGDVector curFactorGrad = factorGradMatrix.getRow(j);
                double curFactorSum = factorSum.get(j);
                final int jFinal = j;
                // Compute the gradient for this element of the factor vector
                curFactorGrad.foreachIndexedInPlace((Integer idx, Double a) -> a * curFactorSum - curFactors.get(jFinal, idx) * a * a);
                // Multiply by the output gradient
            gradients[i] = factorGradMatrix;
        } else {
            // If the output gradient is 0.0 then all the factor gradients are zero.
            // Technically with regularization we should shrink the weights for the specified features.
            gradients[i] = new DenseSparseMatrix(numFactors, features.size());
    return gradients;
Also used : Tensor( ArrayList(java.util.ArrayList) DenseSparseMatrix( SparseVector( DenseMatrix( DenseMatrix( DenseSparseMatrix( Matrix( SGDVector(

Example 4 with Matrix

use of in project tribuo by oracle.

the class ShrinkingMatrix method intersectAndAddInPlace.

public void intersectAndAddInPlace(Tensor other, DoubleUnaryOperator f) {
    if (other instanceof Matrix) {
        Matrix otherMat = (Matrix) other;
        if ((dim1 == otherMat.getDimension1Size()) && (dim2 == otherMat.getDimension2Size())) {
            double shrinkage = scaleShrinking ? 1.0 - (baseRate / iteration) : 1.0 - baseRate;
            for (MatrixTuple tuple : otherMat) {
                double update = f.applyAsDouble(tuple.value);
                double oldValue = values[tuple.i][tuple.j] * multiplier;
                double newValue = oldValue + update;
                squaredTwoNorm -= oldValue * oldValue;
                squaredTwoNorm += newValue * newValue;
                values[tuple.i][tuple.j] = newValue / multiplier;
            if (reproject) {
                double projectionNormaliser = (1.0 / lambdaSqrt) / twoNorm();
                if (projectionNormaliser < 1.0) {
        } else {
            throw new IllegalStateException("Matrices are not the same size, this(" + dim1 + "," + dim2 + "), other(" + otherMat.getDimension1Size() + "," + otherMat.getDimension2Size() + ")");
    } else {
        throw new IllegalStateException("Adding a non-Matrix to a Matrix");
Also used : DenseMatrix( Matrix( MatrixTuple(

Example 5 with Matrix

use of in project tribuo by oracle.

the class CRFParameters method valueAndGradient.

 * Generates predictions based on the input features and labels, then scores those predictions to
 * produce a loss for the example and a gradient update.
 * <p>
 * Assumes all the features in this example are either SparseVector or DenseVector.
 * Mixing the two will cause undefined behaviour.
 * @param features The per token {@link SGDVector} of features.
 * @param labels The per token ground truth labels.
 * @return A {@link Pair} containing the loss for this example and the associated gradient.
public Pair<Double, Tensor[]> valueAndGradient(SGDVector[] features, int[] labels) {
    ChainHelper.ChainCliqueValues scores = getCliqueValues(features);
    // Infer the marginal distribution over labels for each token.
    ChainHelper.ChainBPResults bpResults = ChainHelper.beliefPropagation(scores);
    double logZ = bpResults.logZ;
    DenseVector[] alphas = bpResults.alphas;
    DenseVector[] betas = bpResults.betas;
    // Calculate the gradients for the parameters.
    Tensor[] gradient = new Tensor[3];
    DenseSparseMatrix[] featureGradients = new DenseSparseMatrix[features.length];
    DenseMatrix denseFeatureGradients = null;
    boolean sparseFeatures = false;
    gradient[0] = new DenseVector(biases.size());
    Matrix transGradient = new DenseMatrix(numLabels, numLabels);
    gradient[2] = transGradient;
    double score = -logZ;
    for (int i = 0; i < features.length; i++) {
        int curLabel = labels[i];
        // Increment the loss based on the score for the true label.
        DenseVector curLocalScores = scores.localValues[i];
        score += curLocalScores.get(curLabel);
        // Generate the predicted local marginal from the BP run.
        DenseVector curAlpha = alphas[i];
        DenseVector curBeta = betas[i];
        DenseVector localMarginal = curAlpha.add(curBeta);
        // Generate the gradient for the biases based on the true label and predicted label.
        localMarginal.add(curLabel, 1.0);
        // Generate the gradient for the feature - label weights
        Matrix tmpFeatureGradient = localMarginal.outer(features[i]);
        if (tmpFeatureGradient instanceof DenseSparseMatrix) {
            featureGradients[i] = (DenseSparseMatrix) tmpFeatureGradient;
            sparseFeatures = true;
        } else {
            if (denseFeatureGradients == null) {
                denseFeatureGradients = (DenseMatrix) tmpFeatureGradient;
            } else {
        // If the sequence has more than one token generate the gradient for the label - label transitions.
        if (i >= 1) {
            DenseVector prevAlpha = alphas[i - 1];
            for (int ii = 0; ii < numLabels; ii++) {
                double prevAlphaVal = prevAlpha.get(ii);
                for (int jj = 0; jj < numLabels; jj++) {
                    double update = -Math.exp(prevAlphaVal + labelLabelWeights.get(ii, jj) + curBeta.get(jj) + curLocalScores.get(jj) - logZ);
                    transGradient.add(ii, jj, update);
            int prevLabel = labels[i - 1];
            // Increment the loss based on the transition from the previous predicted label to the true label.
            score += labelLabelWeights.get(prevLabel, curLabel);
            transGradient.add(prevLabel, curLabel, 1.0);
    if (sparseFeatures) {
        // Merge together all the sparse feature - label gradients.
        gradient[1] = merger.merge(featureGradients);
        // throw if we found any dense features as well as the sparse.
        if (denseFeatureGradients != null) {
            throw new IllegalStateException("Mixture of dense and sparse features found.");
    } else {
        gradient[1] = denseFeatureGradients;
    return new Pair<>(score, gradient);
Also used : Tensor( DenseSparseMatrix( DenseMatrix( DenseMatrix( DenseSparseMatrix( Matrix( DenseVector( Pair(


DenseMatrix ( Matrix ( DenseSparseMatrix ( Tensor ( ArrayList (java.util.ArrayList)2 DenseVector ( Pair ( MatrixTuple ( SGDVector ( SparseVector ( ONNXContext (org.tribuo.util.onnx.ONNXContext)1 ONNXInitializer (org.tribuo.util.onnx.ONNXInitializer)1