Search in sources :

Example 1 with DimensionTuple

use of org.tribuo.regression.Regressor.DimensionTuple in project tribuo by oracle.

the class RegressionInfo method getDomain.

/**
 * Returns a set containing a Regressor for each dimension with the minimum value observed.
 * @return A set of Regressors, each with one active dimension.
 */
@Override
public Set<Regressor> getDomain() {
    TreeSet<DimensionTuple> outputs = new TreeSet<>(Comparator.comparing(DimensionTuple::getName));
    for (Map.Entry<String, MutableDouble> e : minMap.entrySet()) {
        outputs.add(new DimensionTuple(e.getKey(), e.getValue().doubleValue()));
    }
    // DimensionTuple is a subtype of Regressor, and this set is immutable.
    @SuppressWarnings("unchecked") SortedSet<Regressor> setOutputs = (SortedSet<Regressor>) (SortedSet) Collections.unmodifiableSortedSet(outputs);
    return setOutputs;
}
Also used : TreeSet(java.util.TreeSet) MutableDouble(com.oracle.labs.mlrg.olcut.util.MutableDouble) LinkedHashMap(java.util.LinkedHashMap) TreeMap(java.util.TreeMap) Map(java.util.Map) SortedSet(java.util.SortedSet) DimensionTuple(org.tribuo.regression.Regressor.DimensionTuple)

Example 2 with DimensionTuple

use of org.tribuo.regression.Regressor.DimensionTuple in project tribuo by oracle.

the class RegressorTrainingNode method createLeaf.

/**
 * Makes a {@link LeafNode}
 * @param impurityScore the impurity score for the node.
 * @param leafIndices the indices of the examples to be placed in the node.
 * @return A {@link LeafNode}
 */
private LeafNode<Regressor> createLeaf(double impurityScore, int[] leafIndices) {
    double mean = 0.0;
    double leafWeightSum = 0.0;
    double variance = 0.0;
    for (int i = 0; i < leafIndices.length; i++) {
        int idx = leafIndices[i];
        float value = targets[idx];
        float weight = weights[idx];
        leafWeightSum += weight;
        double oldMean = mean;
        mean += (weight / leafWeightSum) * (value - oldMean);
        variance += weight * (value - oldMean) * (value - mean);
    }
    variance = leafIndices.length > 1 ? variance / (leafWeightSum - 1) : 0;
    DimensionTuple leafPred = new DimensionTuple(dimName, mean, variance);
    return new LeafNode<>(impurityScore, leafPred, Collections.emptyMap(), false);
}
Also used : LeafNode(org.tribuo.common.tree.LeafNode) DimensionTuple(org.tribuo.regression.Regressor.DimensionTuple)

Example 3 with DimensionTuple

use of org.tribuo.regression.Regressor.DimensionTuple in project tribuo by oracle.

the class ElasticNetCDTrainer method train.

@Override
public SparseModel<Regressor> train(Dataset<Regressor> examples, Map<String, Provenance> runProvenance, int invocationCount) {
    if (examples.getOutputInfo().getUnknownCount() > 0) {
        throw new IllegalArgumentException("The supplied Dataset contained unknown Outputs, and this Trainer is supervised.");
    }
    // Creates a new RNG, adds one to the invocation count, generates provenance.
    TrainerProvenance trainerProvenance;
    SplittableRandom localRNG;
    synchronized (this) {
        if (invocationCount != INCREMENT_INVOCATION_COUNT) {
            setInvocationCount(invocationCount);
        }
        localRNG = rng.split();
        trainerProvenance = getProvenance();
        trainInvocationCounter++;
    }
    ImmutableFeatureMap featureIDMap = examples.getFeatureIDMap();
    ImmutableOutputInfo<Regressor> outputInfo = examples.getOutputIDInfo();
    int numFeatures = featureIDMap.size();
    int numOutputs = outputInfo.size();
    int numExamples = examples.size();
    SparseVector[] columns = SparseVector.transpose(examples, featureIDMap);
    String[] dimensionNames = new String[numOutputs];
    DenseVector[] regressionTargets = new DenseVector[numOutputs];
    for (int i = 0; i < numOutputs; i++) {
        dimensionNames[i] = outputInfo.getOutput(i).getNames()[0];
        regressionTargets[i] = new DenseVector(numExamples);
    }
    int i = 0;
    for (Example<Regressor> e : examples) {
        for (DimensionTuple d : e.getOutput()) {
            regressionTargets[outputInfo.getID(d)].set(i, d.getValue());
        }
        i++;
    }
    double l1Penalty = alpha * l1Ratio * numExamples;
    double l2Penalty = alpha * (1.0 - l1Ratio) * numExamples;
    double[] featureMeans = calculateMeans(columns);
    double[] featureVariances = new double[columns.length];
    Arrays.fill(featureVariances, 1.0);
    boolean center = false;
    for (i = 0; i < numFeatures; i++) {
        if (Math.abs(featureMeans[i]) > DELTA) {
            center = true;
            break;
        }
    }
    double[] columnNorms = new double[numFeatures];
    int[] featureIndices = new int[numFeatures];
    for (i = 0; i < numFeatures; i++) {
        featureIndices[i] = i;
        double variance = 0.0;
        for (VectorTuple v : columns[i]) {
            variance += (v.value - featureMeans[i]) * (v.value - featureMeans[i]);
        }
        columnNorms[i] = variance + (numExamples - columns[i].numActiveElements()) * featureMeans[i] * featureMeans[i];
    }
    ElasticNetState elState = new ElasticNetState(columns, featureIndices, featureMeans, columnNorms, l1Penalty, l2Penalty, center);
    SparseVector[] outputWeights = new SparseVector[numOutputs];
    double[] outputMeans = new double[numOutputs];
    for (int j = 0; j < dimensionNames.length; j++) {
        outputWeights[j] = trainSingleDimension(regressionTargets[j], elState, localRNG.split());
        outputMeans[j] = regressionTargets[j].sum() / numExamples;
    }
    // calculateVariances(regressionTargets,outputMeans);
    double[] outputVariances = new double[numOutputs];
    Arrays.fill(outputVariances, 1.0);
    ModelProvenance provenance = new ModelProvenance(SparseLinearModel.class.getName(), OffsetDateTime.now(), examples.getProvenance(), trainerProvenance, runProvenance);
    return new SparseLinearModel("elastic-net-model", dimensionNames, provenance, featureIDMap, outputInfo, outputWeights, DenseVector.createDenseVector(featureMeans), DenseVector.createDenseVector(featureVariances), outputMeans, outputVariances, false);
}
Also used : ModelProvenance(org.tribuo.provenance.ModelProvenance) SparseVector(org.tribuo.math.la.SparseVector) DimensionTuple(org.tribuo.regression.Regressor.DimensionTuple) ImmutableFeatureMap(org.tribuo.ImmutableFeatureMap) VectorTuple(org.tribuo.math.la.VectorTuple) Regressor(org.tribuo.regression.Regressor) TrainerProvenance(org.tribuo.provenance.TrainerProvenance) SplittableRandom(java.util.SplittableRandom) DenseVector(org.tribuo.math.la.DenseVector)

Example 4 with DimensionTuple

use of org.tribuo.regression.Regressor.DimensionTuple in project tribuo by oracle.

the class IndependentRegressionTreeModel method combine.

private Prediction<Regressor> combine(List<Prediction<Regressor>> predictions) {
    DimensionTuple[] tuples = new DimensionTuple[predictions.size()];
    int numUsed = 0;
    int i = 0;
    for (Prediction<Regressor> p : predictions) {
        if (numUsed < p.getNumActiveFeatures()) {
            numUsed = p.getNumActiveFeatures();
        }
        Regressor output = p.getOutput();
        if (output instanceof DimensionTuple) {
            tuples[i] = (DimensionTuple) output;
        } else {
            throw new IllegalStateException("All the leaves should contain DimensionTuple not Regressor");
        }
        i++;
    }
    Example<Regressor> example = predictions.get(0).getExample();
    return new Prediction<>(new Regressor(tuples), numUsed, example);
}
Also used : Prediction(org.tribuo.Prediction) Regressor(org.tribuo.regression.Regressor) DimensionTuple(org.tribuo.regression.Regressor.DimensionTuple)

Aggregations

DimensionTuple (org.tribuo.regression.Regressor.DimensionTuple)4 Regressor (org.tribuo.regression.Regressor)2 MutableDouble (com.oracle.labs.mlrg.olcut.util.MutableDouble)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 SortedSet (java.util.SortedSet)1 SplittableRandom (java.util.SplittableRandom)1 TreeMap (java.util.TreeMap)1 TreeSet (java.util.TreeSet)1 ImmutableFeatureMap (org.tribuo.ImmutableFeatureMap)1 Prediction (org.tribuo.Prediction)1 LeafNode (org.tribuo.common.tree.LeafNode)1 DenseVector (org.tribuo.math.la.DenseVector)1 SparseVector (org.tribuo.math.la.SparseVector)1 VectorTuple (org.tribuo.math.la.VectorTuple)1 ModelProvenance (org.tribuo.provenance.ModelProvenance)1 TrainerProvenance (org.tribuo.provenance.TrainerProvenance)1