use of org.tribuo.regression.Regressor.DimensionTuple in project tribuo by oracle.
the class RegressionInfo method getDomain.
/**
* Returns a set containing a Regressor for each dimension with the minimum value observed.
* @return A set of Regressors, each with one active dimension.
*/
@Override
public Set<Regressor> getDomain() {
TreeSet<DimensionTuple> outputs = new TreeSet<>(Comparator.comparing(DimensionTuple::getName));
for (Map.Entry<String, MutableDouble> e : minMap.entrySet()) {
outputs.add(new DimensionTuple(e.getKey(), e.getValue().doubleValue()));
}
// DimensionTuple is a subtype of Regressor, and this set is immutable.
@SuppressWarnings("unchecked") SortedSet<Regressor> setOutputs = (SortedSet<Regressor>) (SortedSet) Collections.unmodifiableSortedSet(outputs);
return setOutputs;
}
use of org.tribuo.regression.Regressor.DimensionTuple in project tribuo by oracle.
the class RegressorTrainingNode method createLeaf.
/**
* Makes a {@link LeafNode}
* @param impurityScore the impurity score for the node.
* @param leafIndices the indices of the examples to be placed in the node.
* @return A {@link LeafNode}
*/
private LeafNode<Regressor> createLeaf(double impurityScore, int[] leafIndices) {
double mean = 0.0;
double leafWeightSum = 0.0;
double variance = 0.0;
for (int i = 0; i < leafIndices.length; i++) {
int idx = leafIndices[i];
float value = targets[idx];
float weight = weights[idx];
leafWeightSum += weight;
double oldMean = mean;
mean += (weight / leafWeightSum) * (value - oldMean);
variance += weight * (value - oldMean) * (value - mean);
}
variance = leafIndices.length > 1 ? variance / (leafWeightSum - 1) : 0;
DimensionTuple leafPred = new DimensionTuple(dimName, mean, variance);
return new LeafNode<>(impurityScore, leafPred, Collections.emptyMap(), false);
}
use of org.tribuo.regression.Regressor.DimensionTuple in project tribuo by oracle.
the class ElasticNetCDTrainer method train.
@Override
public SparseModel<Regressor> train(Dataset<Regressor> examples, Map<String, Provenance> runProvenance, int invocationCount) {
if (examples.getOutputInfo().getUnknownCount() > 0) {
throw new IllegalArgumentException("The supplied Dataset contained unknown Outputs, and this Trainer is supervised.");
}
// Creates a new RNG, adds one to the invocation count, generates provenance.
TrainerProvenance trainerProvenance;
SplittableRandom localRNG;
synchronized (this) {
if (invocationCount != INCREMENT_INVOCATION_COUNT) {
setInvocationCount(invocationCount);
}
localRNG = rng.split();
trainerProvenance = getProvenance();
trainInvocationCounter++;
}
ImmutableFeatureMap featureIDMap = examples.getFeatureIDMap();
ImmutableOutputInfo<Regressor> outputInfo = examples.getOutputIDInfo();
int numFeatures = featureIDMap.size();
int numOutputs = outputInfo.size();
int numExamples = examples.size();
SparseVector[] columns = SparseVector.transpose(examples, featureIDMap);
String[] dimensionNames = new String[numOutputs];
DenseVector[] regressionTargets = new DenseVector[numOutputs];
for (int i = 0; i < numOutputs; i++) {
dimensionNames[i] = outputInfo.getOutput(i).getNames()[0];
regressionTargets[i] = new DenseVector(numExamples);
}
int i = 0;
for (Example<Regressor> e : examples) {
for (DimensionTuple d : e.getOutput()) {
regressionTargets[outputInfo.getID(d)].set(i, d.getValue());
}
i++;
}
double l1Penalty = alpha * l1Ratio * numExamples;
double l2Penalty = alpha * (1.0 - l1Ratio) * numExamples;
double[] featureMeans = calculateMeans(columns);
double[] featureVariances = new double[columns.length];
Arrays.fill(featureVariances, 1.0);
boolean center = false;
for (i = 0; i < numFeatures; i++) {
if (Math.abs(featureMeans[i]) > DELTA) {
center = true;
break;
}
}
double[] columnNorms = new double[numFeatures];
int[] featureIndices = new int[numFeatures];
for (i = 0; i < numFeatures; i++) {
featureIndices[i] = i;
double variance = 0.0;
for (VectorTuple v : columns[i]) {
variance += (v.value - featureMeans[i]) * (v.value - featureMeans[i]);
}
columnNorms[i] = variance + (numExamples - columns[i].numActiveElements()) * featureMeans[i] * featureMeans[i];
}
ElasticNetState elState = new ElasticNetState(columns, featureIndices, featureMeans, columnNorms, l1Penalty, l2Penalty, center);
SparseVector[] outputWeights = new SparseVector[numOutputs];
double[] outputMeans = new double[numOutputs];
for (int j = 0; j < dimensionNames.length; j++) {
outputWeights[j] = trainSingleDimension(regressionTargets[j], elState, localRNG.split());
outputMeans[j] = regressionTargets[j].sum() / numExamples;
}
// calculateVariances(regressionTargets,outputMeans);
double[] outputVariances = new double[numOutputs];
Arrays.fill(outputVariances, 1.0);
ModelProvenance provenance = new ModelProvenance(SparseLinearModel.class.getName(), OffsetDateTime.now(), examples.getProvenance(), trainerProvenance, runProvenance);
return new SparseLinearModel("elastic-net-model", dimensionNames, provenance, featureIDMap, outputInfo, outputWeights, DenseVector.createDenseVector(featureMeans), DenseVector.createDenseVector(featureVariances), outputMeans, outputVariances, false);
}
use of org.tribuo.regression.Regressor.DimensionTuple in project tribuo by oracle.
the class IndependentRegressionTreeModel method combine.
private Prediction<Regressor> combine(List<Prediction<Regressor>> predictions) {
DimensionTuple[] tuples = new DimensionTuple[predictions.size()];
int numUsed = 0;
int i = 0;
for (Prediction<Regressor> p : predictions) {
if (numUsed < p.getNumActiveFeatures()) {
numUsed = p.getNumActiveFeatures();
}
Regressor output = p.getOutput();
if (output instanceof DimensionTuple) {
tuples[i] = (DimensionTuple) output;
} else {
throw new IllegalStateException("All the leaves should contain DimensionTuple not Regressor");
}
i++;
}
Example<Regressor> example = predictions.get(0).getExample();
return new Prediction<>(new Regressor(tuples), numUsed, example);
}
Aggregations