Search in sources :

Example 1 with Matrix

use of org.tribuo.math.la.Matrix in project tribuo by oracle.

the class CRFParameters method merge.

@Override
public Tensor[] merge(Tensor[][] gradients, int size) {
    DenseVector biasUpdate = new DenseVector(biases.size());
    List<DenseSparseMatrix> updates = new ArrayList<>(size);
    DenseMatrix denseUpdates = null;
    DenseMatrix labelLabelUpdate = new DenseMatrix(labelLabelWeights.getDimension1Size(), labelLabelWeights.getDimension2Size());
    for (int j = 0; j < gradients.length; j++) {
        biasUpdate.intersectAndAddInPlace(gradients[j][0]);
        Matrix tmpUpdate = (Matrix) gradients[j][1];
        if (tmpUpdate instanceof DenseSparseMatrix) {
            updates.add((DenseSparseMatrix) tmpUpdate);
        } else {
            // is dense
            if (denseUpdates == null) {
                denseUpdates = (DenseMatrix) tmpUpdate;
            } else {
                denseUpdates.intersectAndAddInPlace(tmpUpdate);
            }
        }
        labelLabelUpdate.intersectAndAddInPlace(gradients[j][2]);
    }
    // Merge the combination of any dense and sparse updates
    Matrix featureLabelUpdate;
    if (updates.size() > 0) {
        featureLabelUpdate = merger.merge(updates.toArray(new DenseSparseMatrix[0]));
        if (denseUpdates != null) {
            denseUpdates.intersectAndAddInPlace(featureLabelUpdate);
            featureLabelUpdate = denseUpdates;
        }
    } else {
        featureLabelUpdate = denseUpdates;
    }
    return new Tensor[] { biasUpdate, featureLabelUpdate, labelLabelUpdate };
}
Also used : DenseMatrix(org.tribuo.math.la.DenseMatrix) DenseSparseMatrix(org.tribuo.math.la.DenseSparseMatrix) Matrix(org.tribuo.math.la.Matrix) Tensor(org.tribuo.math.la.Tensor) ArrayList(java.util.ArrayList) DenseSparseMatrix(org.tribuo.math.la.DenseSparseMatrix) DenseVector(org.tribuo.math.la.DenseVector) DenseMatrix(org.tribuo.math.la.DenseMatrix)

Example 2 with Matrix

use of org.tribuo.math.la.Matrix in project tribuo by oracle.

the class AbstractLinearSGDModel method writeONNXGraph.

/**
 * Writes this {@link org.tribuo.Model} into {@link OnnxMl.GraphProto.Builder} inside the input's
 * {@link ONNXContext}.
 * @param input The input to the model graph.
 * @return the output node of the model graph.
 */
public ONNXNode writeONNXGraph(ONNXRef<?> input) {
    ONNXContext onnx = input.onnxContext();
    Matrix weightMatrix = (Matrix) modelParameters.get()[0];
    ONNXInitializer weights = onnx.floatTensor("linear_sgd_weights", Arrays.asList(featureIDMap.size(), outputIDInfo.size()), fb -> {
        for (int j = 0; j < weightMatrix.getDimension2Size() - 1; j++) {
            for (int i = 0; i < weightMatrix.getDimension1Size(); i++) {
                fb.put((float) weightMatrix.get(i, j));
            }
        }
    });
    ONNXInitializer bias = onnx.floatTensor("linear_sgd_bias", Collections.singletonList(outputIDInfo.size()), fb -> {
        for (int i = 0; i < weightMatrix.getDimension1Size(); i++) {
            fb.put((float) weightMatrix.get(i, weightMatrix.getDimension2Size() - 1));
        }
    });
    return onnxOutput(input.apply(ONNXOperators.GEMM, Arrays.asList(weights, bias)));
}
Also used : ONNXContext(org.tribuo.util.onnx.ONNXContext) Matrix(org.tribuo.math.la.Matrix) DenseMatrix(org.tribuo.math.la.DenseMatrix) ONNXInitializer(org.tribuo.util.onnx.ONNXInitializer)

Example 3 with Matrix

use of org.tribuo.math.la.Matrix in project tribuo by oracle.

the class FMParameters method gradients.

/**
 * Generate the gradients for a particular feature vector given
 * the loss and the per output gradients.
 * <p>
 * This method returns a {@link Tensor} array with numLabels + 2 elements.
 *
 * @param score    The Pair returned by the objective.
 * @param features The feature vector.
 * @return A {@link Tensor} array containing all the gradients.
 */
@Override
public Tensor[] gradients(Pair<Double, SGDVector> score, SGDVector features) {
    Tensor[] gradients = new Tensor[weights.length];
    SGDVector outputGradient = score.getB();
    // Bias gradient
    if (outputGradient instanceof SparseVector) {
        gradients[0] = ((SparseVector) outputGradient).densify();
    } else {
        gradients[0] = outputGradient.copy();
    }
    // Feature gradients
    gradients[1] = outputGradient.outer(features);
    // per label
    for (int i = 2; i < weights.length; i++) {
        double curOutputGradient = outputGradient.get(i - 2);
        DenseMatrix curFactors = (DenseMatrix) weights[i];
        if (curOutputGradient != 0.0) {
            // compute /sum_j v_{j,f}x_j
            SGDVector factorSum = curFactors.leftMultiply(features);
            // grad_f: dy/d0 * (x_i * factorSum_f - v_{i,f} * x_i * x_i)
            Matrix factorGradMatrix;
            if (features instanceof SparseVector) {
                List<SparseVector> vectors = new ArrayList<>(numFactors);
                for (int j = 0; j < numFactors; j++) {
                    vectors.add(((SparseVector) features).copy());
                }
                factorGradMatrix = new DenseSparseMatrix(vectors);
            } else {
                factorGradMatrix = new DenseMatrix(numFactors, features.size());
                for (int j = 0; j < numFactors; j++) {
                    for (int k = 0; k < features.size(); k++) {
                        factorGradMatrix.set(j, k, features.get(k));
                    }
                }
            }
            for (int j = 0; j < numFactors; j++) {
                // This gets a mutable view of the row
                SGDVector curFactorGrad = factorGradMatrix.getRow(j);
                double curFactorSum = factorSum.get(j);
                final int jFinal = j;
                // Compute the gradient for this element of the factor vector
                curFactorGrad.foreachIndexedInPlace((Integer idx, Double a) -> a * curFactorSum - curFactors.get(jFinal, idx) * a * a);
                // Multiply by the output gradient
                curFactorGrad.scaleInPlace(curOutputGradient);
            }
            gradients[i] = factorGradMatrix;
        } else {
            // If the output gradient is 0.0 then all the factor gradients are zero.
            // Technically with regularization we should shrink the weights for the specified features.
            gradients[i] = new DenseSparseMatrix(numFactors, features.size());
        }
    }
    return gradients;
}
Also used : Tensor(org.tribuo.math.la.Tensor) ArrayList(java.util.ArrayList) DenseSparseMatrix(org.tribuo.math.la.DenseSparseMatrix) SparseVector(org.tribuo.math.la.SparseVector) DenseMatrix(org.tribuo.math.la.DenseMatrix) DenseMatrix(org.tribuo.math.la.DenseMatrix) DenseSparseMatrix(org.tribuo.math.la.DenseSparseMatrix) Matrix(org.tribuo.math.la.Matrix) SGDVector(org.tribuo.math.la.SGDVector)

Example 4 with Matrix

use of org.tribuo.math.la.Matrix in project tribuo by oracle.

the class ShrinkingMatrix method intersectAndAddInPlace.

@Override
public void intersectAndAddInPlace(Tensor other, DoubleUnaryOperator f) {
    if (other instanceof Matrix) {
        Matrix otherMat = (Matrix) other;
        if ((dim1 == otherMat.getDimension1Size()) && (dim2 == otherMat.getDimension2Size())) {
            double shrinkage = scaleShrinking ? 1.0 - (baseRate / iteration) : 1.0 - baseRate;
            scaleInPlace(shrinkage);
            for (MatrixTuple tuple : otherMat) {
                double update = f.applyAsDouble(tuple.value);
                double oldValue = values[tuple.i][tuple.j] * multiplier;
                double newValue = oldValue + update;
                squaredTwoNorm -= oldValue * oldValue;
                squaredTwoNorm += newValue * newValue;
                values[tuple.i][tuple.j] = newValue / multiplier;
            }
            if (reproject) {
                double projectionNormaliser = (1.0 / lambdaSqrt) / twoNorm();
                if (projectionNormaliser < 1.0) {
                    scaleInPlace(projectionNormaliser);
                }
            }
            iteration++;
        } else {
            throw new IllegalStateException("Matrices are not the same size, this(" + dim1 + "," + dim2 + "), other(" + otherMat.getDimension1Size() + "," + otherMat.getDimension2Size() + ")");
        }
    } else {
        throw new IllegalStateException("Adding a non-Matrix to a Matrix");
    }
}
Also used : DenseMatrix(org.tribuo.math.la.DenseMatrix) Matrix(org.tribuo.math.la.Matrix) MatrixTuple(org.tribuo.math.la.MatrixTuple)

Example 5 with Matrix

use of org.tribuo.math.la.Matrix in project tribuo by oracle.

the class CRFParameters method valueAndGradient.

/**
 * Generates predictions based on the input features and labels, then scores those predictions to
 * produce a loss for the example and a gradient update.
 * <p>
 * Assumes all the features in this example are either SparseVector or DenseVector.
 * Mixing the two will cause undefined behaviour.
 * @param features The per token {@link SGDVector} of features.
 * @param labels The per token ground truth labels.
 * @return A {@link Pair} containing the loss for this example and the associated gradient.
 */
public Pair<Double, Tensor[]> valueAndGradient(SGDVector[] features, int[] labels) {
    ChainHelper.ChainCliqueValues scores = getCliqueValues(features);
    // Infer the marginal distribution over labels for each token.
    ChainHelper.ChainBPResults bpResults = ChainHelper.beliefPropagation(scores);
    double logZ = bpResults.logZ;
    DenseVector[] alphas = bpResults.alphas;
    DenseVector[] betas = bpResults.betas;
    // Calculate the gradients for the parameters.
    Tensor[] gradient = new Tensor[3];
    DenseSparseMatrix[] featureGradients = new DenseSparseMatrix[features.length];
    DenseMatrix denseFeatureGradients = null;
    boolean sparseFeatures = false;
    gradient[0] = new DenseVector(biases.size());
    Matrix transGradient = new DenseMatrix(numLabels, numLabels);
    gradient[2] = transGradient;
    double score = -logZ;
    for (int i = 0; i < features.length; i++) {
        int curLabel = labels[i];
        // Increment the loss based on the score for the true label.
        DenseVector curLocalScores = scores.localValues[i];
        score += curLocalScores.get(curLabel);
        // Generate the predicted local marginal from the BP run.
        DenseVector curAlpha = alphas[i];
        DenseVector curBeta = betas[i];
        DenseVector localMarginal = curAlpha.add(curBeta);
        // Generate the gradient for the biases based on the true label and predicted label.
        localMarginal.expNormalize(logZ);
        localMarginal.scaleInPlace(-1.0);
        localMarginal.add(curLabel, 1.0);
        gradient[0].intersectAndAddInPlace(localMarginal);
        // Generate the gradient for the feature - label weights
        Matrix tmpFeatureGradient = localMarginal.outer(features[i]);
        if (tmpFeatureGradient instanceof DenseSparseMatrix) {
            featureGradients[i] = (DenseSparseMatrix) tmpFeatureGradient;
            sparseFeatures = true;
        } else {
            if (denseFeatureGradients == null) {
                denseFeatureGradients = (DenseMatrix) tmpFeatureGradient;
            } else {
                denseFeatureGradients.intersectAndAddInPlace(tmpFeatureGradient);
            }
        }
        // If the sequence has more than one token generate the gradient for the label - label transitions.
        if (i >= 1) {
            DenseVector prevAlpha = alphas[i - 1];
            for (int ii = 0; ii < numLabels; ii++) {
                double prevAlphaVal = prevAlpha.get(ii);
                for (int jj = 0; jj < numLabels; jj++) {
                    double update = -Math.exp(prevAlphaVal + labelLabelWeights.get(ii, jj) + curBeta.get(jj) + curLocalScores.get(jj) - logZ);
                    transGradient.add(ii, jj, update);
                }
            }
            int prevLabel = labels[i - 1];
            // Increment the loss based on the transition from the previous predicted label to the true label.
            score += labelLabelWeights.get(prevLabel, curLabel);
            transGradient.add(prevLabel, curLabel, 1.0);
        }
    }
    if (sparseFeatures) {
        // Merge together all the sparse feature - label gradients.
        gradient[1] = merger.merge(featureGradients);
        // throw if we found any dense features as well as the sparse.
        if (denseFeatureGradients != null) {
            throw new IllegalStateException("Mixture of dense and sparse features found.");
        }
    } else {
        gradient[1] = denseFeatureGradients;
    }
    return new Pair<>(score, gradient);
}
Also used : Tensor(org.tribuo.math.la.Tensor) DenseSparseMatrix(org.tribuo.math.la.DenseSparseMatrix) DenseMatrix(org.tribuo.math.la.DenseMatrix) DenseMatrix(org.tribuo.math.la.DenseMatrix) DenseSparseMatrix(org.tribuo.math.la.DenseSparseMatrix) Matrix(org.tribuo.math.la.Matrix) DenseVector(org.tribuo.math.la.DenseVector) Pair(com.oracle.labs.mlrg.olcut.util.Pair)

Aggregations

DenseMatrix (org.tribuo.math.la.DenseMatrix)5 Matrix (org.tribuo.math.la.Matrix)5 DenseSparseMatrix (org.tribuo.math.la.DenseSparseMatrix)3 Tensor (org.tribuo.math.la.Tensor)3 ArrayList (java.util.ArrayList)2 DenseVector (org.tribuo.math.la.DenseVector)2 Pair (com.oracle.labs.mlrg.olcut.util.Pair)1 MatrixTuple (org.tribuo.math.la.MatrixTuple)1 SGDVector (org.tribuo.math.la.SGDVector)1 SparseVector (org.tribuo.math.la.SparseVector)1 ONNXContext (org.tribuo.util.onnx.ONNXContext)1 ONNXInitializer (org.tribuo.util.onnx.ONNXInitializer)1