use of org.tribuo.math.la.Matrix in project tribuo by oracle.
the class CRFParameters method merge.
@Override
public Tensor[] merge(Tensor[][] gradients, int size) {
DenseVector biasUpdate = new DenseVector(biases.size());
List<DenseSparseMatrix> updates = new ArrayList<>(size);
DenseMatrix denseUpdates = null;
DenseMatrix labelLabelUpdate = new DenseMatrix(labelLabelWeights.getDimension1Size(), labelLabelWeights.getDimension2Size());
for (int j = 0; j < gradients.length; j++) {
biasUpdate.intersectAndAddInPlace(gradients[j][0]);
Matrix tmpUpdate = (Matrix) gradients[j][1];
if (tmpUpdate instanceof DenseSparseMatrix) {
updates.add((DenseSparseMatrix) tmpUpdate);
} else {
// is dense
if (denseUpdates == null) {
denseUpdates = (DenseMatrix) tmpUpdate;
} else {
denseUpdates.intersectAndAddInPlace(tmpUpdate);
}
}
labelLabelUpdate.intersectAndAddInPlace(gradients[j][2]);
}
// Merge the combination of any dense and sparse updates
Matrix featureLabelUpdate;
if (updates.size() > 0) {
featureLabelUpdate = merger.merge(updates.toArray(new DenseSparseMatrix[0]));
if (denseUpdates != null) {
denseUpdates.intersectAndAddInPlace(featureLabelUpdate);
featureLabelUpdate = denseUpdates;
}
} else {
featureLabelUpdate = denseUpdates;
}
return new Tensor[] { biasUpdate, featureLabelUpdate, labelLabelUpdate };
}
use of org.tribuo.math.la.Matrix in project tribuo by oracle.
the class AbstractLinearSGDModel method writeONNXGraph.
/**
* Writes this {@link org.tribuo.Model} into {@link OnnxMl.GraphProto.Builder} inside the input's
* {@link ONNXContext}.
* @param input The input to the model graph.
* @return the output node of the model graph.
*/
public ONNXNode writeONNXGraph(ONNXRef<?> input) {
ONNXContext onnx = input.onnxContext();
Matrix weightMatrix = (Matrix) modelParameters.get()[0];
ONNXInitializer weights = onnx.floatTensor("linear_sgd_weights", Arrays.asList(featureIDMap.size(), outputIDInfo.size()), fb -> {
for (int j = 0; j < weightMatrix.getDimension2Size() - 1; j++) {
for (int i = 0; i < weightMatrix.getDimension1Size(); i++) {
fb.put((float) weightMatrix.get(i, j));
}
}
});
ONNXInitializer bias = onnx.floatTensor("linear_sgd_bias", Collections.singletonList(outputIDInfo.size()), fb -> {
for (int i = 0; i < weightMatrix.getDimension1Size(); i++) {
fb.put((float) weightMatrix.get(i, weightMatrix.getDimension2Size() - 1));
}
});
return onnxOutput(input.apply(ONNXOperators.GEMM, Arrays.asList(weights, bias)));
}
use of org.tribuo.math.la.Matrix in project tribuo by oracle.
the class FMParameters method gradients.
/**
* Generate the gradients for a particular feature vector given
* the loss and the per output gradients.
* <p>
* This method returns a {@link Tensor} array with numLabels + 2 elements.
*
* @param score The Pair returned by the objective.
* @param features The feature vector.
* @return A {@link Tensor} array containing all the gradients.
*/
@Override
public Tensor[] gradients(Pair<Double, SGDVector> score, SGDVector features) {
Tensor[] gradients = new Tensor[weights.length];
SGDVector outputGradient = score.getB();
// Bias gradient
if (outputGradient instanceof SparseVector) {
gradients[0] = ((SparseVector) outputGradient).densify();
} else {
gradients[0] = outputGradient.copy();
}
// Feature gradients
gradients[1] = outputGradient.outer(features);
// per label
for (int i = 2; i < weights.length; i++) {
double curOutputGradient = outputGradient.get(i - 2);
DenseMatrix curFactors = (DenseMatrix) weights[i];
if (curOutputGradient != 0.0) {
// compute /sum_j v_{j,f}x_j
SGDVector factorSum = curFactors.leftMultiply(features);
// grad_f: dy/d0 * (x_i * factorSum_f - v_{i,f} * x_i * x_i)
Matrix factorGradMatrix;
if (features instanceof SparseVector) {
List<SparseVector> vectors = new ArrayList<>(numFactors);
for (int j = 0; j < numFactors; j++) {
vectors.add(((SparseVector) features).copy());
}
factorGradMatrix = new DenseSparseMatrix(vectors);
} else {
factorGradMatrix = new DenseMatrix(numFactors, features.size());
for (int j = 0; j < numFactors; j++) {
for (int k = 0; k < features.size(); k++) {
factorGradMatrix.set(j, k, features.get(k));
}
}
}
for (int j = 0; j < numFactors; j++) {
// This gets a mutable view of the row
SGDVector curFactorGrad = factorGradMatrix.getRow(j);
double curFactorSum = factorSum.get(j);
final int jFinal = j;
// Compute the gradient for this element of the factor vector
curFactorGrad.foreachIndexedInPlace((Integer idx, Double a) -> a * curFactorSum - curFactors.get(jFinal, idx) * a * a);
// Multiply by the output gradient
curFactorGrad.scaleInPlace(curOutputGradient);
}
gradients[i] = factorGradMatrix;
} else {
// If the output gradient is 0.0 then all the factor gradients are zero.
// Technically with regularization we should shrink the weights for the specified features.
gradients[i] = new DenseSparseMatrix(numFactors, features.size());
}
}
return gradients;
}
use of org.tribuo.math.la.Matrix in project tribuo by oracle.
the class ShrinkingMatrix method intersectAndAddInPlace.
@Override
public void intersectAndAddInPlace(Tensor other, DoubleUnaryOperator f) {
if (other instanceof Matrix) {
Matrix otherMat = (Matrix) other;
if ((dim1 == otherMat.getDimension1Size()) && (dim2 == otherMat.getDimension2Size())) {
double shrinkage = scaleShrinking ? 1.0 - (baseRate / iteration) : 1.0 - baseRate;
scaleInPlace(shrinkage);
for (MatrixTuple tuple : otherMat) {
double update = f.applyAsDouble(tuple.value);
double oldValue = values[tuple.i][tuple.j] * multiplier;
double newValue = oldValue + update;
squaredTwoNorm -= oldValue * oldValue;
squaredTwoNorm += newValue * newValue;
values[tuple.i][tuple.j] = newValue / multiplier;
}
if (reproject) {
double projectionNormaliser = (1.0 / lambdaSqrt) / twoNorm();
if (projectionNormaliser < 1.0) {
scaleInPlace(projectionNormaliser);
}
}
iteration++;
} else {
throw new IllegalStateException("Matrices are not the same size, this(" + dim1 + "," + dim2 + "), other(" + otherMat.getDimension1Size() + "," + otherMat.getDimension2Size() + ")");
}
} else {
throw new IllegalStateException("Adding a non-Matrix to a Matrix");
}
}
use of org.tribuo.math.la.Matrix in project tribuo by oracle.
the class CRFParameters method valueAndGradient.
/**
* Generates predictions based on the input features and labels, then scores those predictions to
* produce a loss for the example and a gradient update.
* <p>
* Assumes all the features in this example are either SparseVector or DenseVector.
* Mixing the two will cause undefined behaviour.
* @param features The per token {@link SGDVector} of features.
* @param labels The per token ground truth labels.
* @return A {@link Pair} containing the loss for this example and the associated gradient.
*/
public Pair<Double, Tensor[]> valueAndGradient(SGDVector[] features, int[] labels) {
ChainHelper.ChainCliqueValues scores = getCliqueValues(features);
// Infer the marginal distribution over labels for each token.
ChainHelper.ChainBPResults bpResults = ChainHelper.beliefPropagation(scores);
double logZ = bpResults.logZ;
DenseVector[] alphas = bpResults.alphas;
DenseVector[] betas = bpResults.betas;
// Calculate the gradients for the parameters.
Tensor[] gradient = new Tensor[3];
DenseSparseMatrix[] featureGradients = new DenseSparseMatrix[features.length];
DenseMatrix denseFeatureGradients = null;
boolean sparseFeatures = false;
gradient[0] = new DenseVector(biases.size());
Matrix transGradient = new DenseMatrix(numLabels, numLabels);
gradient[2] = transGradient;
double score = -logZ;
for (int i = 0; i < features.length; i++) {
int curLabel = labels[i];
// Increment the loss based on the score for the true label.
DenseVector curLocalScores = scores.localValues[i];
score += curLocalScores.get(curLabel);
// Generate the predicted local marginal from the BP run.
DenseVector curAlpha = alphas[i];
DenseVector curBeta = betas[i];
DenseVector localMarginal = curAlpha.add(curBeta);
// Generate the gradient for the biases based on the true label and predicted label.
localMarginal.expNormalize(logZ);
localMarginal.scaleInPlace(-1.0);
localMarginal.add(curLabel, 1.0);
gradient[0].intersectAndAddInPlace(localMarginal);
// Generate the gradient for the feature - label weights
Matrix tmpFeatureGradient = localMarginal.outer(features[i]);
if (tmpFeatureGradient instanceof DenseSparseMatrix) {
featureGradients[i] = (DenseSparseMatrix) tmpFeatureGradient;
sparseFeatures = true;
} else {
if (denseFeatureGradients == null) {
denseFeatureGradients = (DenseMatrix) tmpFeatureGradient;
} else {
denseFeatureGradients.intersectAndAddInPlace(tmpFeatureGradient);
}
}
// If the sequence has more than one token generate the gradient for the label - label transitions.
if (i >= 1) {
DenseVector prevAlpha = alphas[i - 1];
for (int ii = 0; ii < numLabels; ii++) {
double prevAlphaVal = prevAlpha.get(ii);
for (int jj = 0; jj < numLabels; jj++) {
double update = -Math.exp(prevAlphaVal + labelLabelWeights.get(ii, jj) + curBeta.get(jj) + curLocalScores.get(jj) - logZ);
transGradient.add(ii, jj, update);
}
}
int prevLabel = labels[i - 1];
// Increment the loss based on the transition from the previous predicted label to the true label.
score += labelLabelWeights.get(prevLabel, curLabel);
transGradient.add(prevLabel, curLabel, 1.0);
}
}
if (sparseFeatures) {
// Merge together all the sparse feature - label gradients.
gradient[1] = merger.merge(featureGradients);
// throw if we found any dense features as well as the sparse.
if (denseFeatureGradients != null) {
throw new IllegalStateException("Mixture of dense and sparse features found.");
}
} else {
gradient[1] = denseFeatureGradients;
}
return new Pair<>(score, gradient);
}
Aggregations