Search in sources :

Example 26 with Pair

use of com.oracle.labs.mlrg.olcut.util.Pair in project tribuo by oracle.

the class AbstractLinearSGDModel method getTopFeatures.

@Override
public Map<String, List<Pair<String, Double>>> getTopFeatures(int n) {
    DenseMatrix baseWeights = (DenseMatrix) modelParameters.get()[0];
    int maxFeatures = n < 0 ? featureIDMap.size() + 1 : n;
    Comparator<Pair<String, Double>> comparator = Comparator.comparingDouble(p -> Math.abs(p.getB()));
    // 
    // Use a priority queue to find the top N features.
    int numClasses = baseWeights.getDimension1Size();
    // Removing the bias feature.
    int numFeatures = baseWeights.getDimension2Size() - 1;
    Map<String, List<Pair<String, Double>>> map = new HashMap<>();
    for (int i = 0; i < numClasses; i++) {
        PriorityQueue<Pair<String, Double>> q = new PriorityQueue<>(maxFeatures, comparator);
        for (int j = 0; j < numFeatures; j++) {
            Pair<String, Double> curr = new Pair<>(featureIDMap.get(j).getName(), baseWeights.get(i, j));
            if (q.size() < maxFeatures) {
                q.offer(curr);
            } else if (comparator.compare(curr, q.peek()) > 0) {
                q.poll();
                q.offer(curr);
            }
        }
        Pair<String, Double> curr = new Pair<>(BIAS_FEATURE, baseWeights.get(i, numFeatures));
        if (q.size() < maxFeatures) {
            q.offer(curr);
        } else if (comparator.compare(curr, q.peek()) > 0) {
            q.poll();
            q.offer(curr);
        }
        List<Pair<String, Double>> b = new ArrayList<>();
        while (q.size() > 0) {
            b.add(q.poll());
        }
        Collections.reverse(b);
        map.put(getDimensionName(i), b);
    }
    return map;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) PriorityQueue(java.util.PriorityQueue) DenseMatrix(org.tribuo.math.la.DenseMatrix) ArrayList(java.util.ArrayList) List(java.util.List) Pair(com.oracle.labs.mlrg.olcut.util.Pair)

Example 27 with Pair

use of com.oracle.labs.mlrg.olcut.util.Pair in project tribuo by oracle.

the class KNNModel method innerPredictOne.

private static <T extends Output<T>> Prediction<T> innerPredictOne(NeighboursQuery nq, Pair<SGDVector, T>[] vectors, EnsembleCombiner<T> combiner, ImmutableFeatureMap featureIDMap, ImmutableOutputInfo<T> outputIDInfo, int k, Example<T> example) {
    SGDVector vector;
    if (example.size() == featureIDMap.size()) {
        vector = DenseVector.createDenseVector(example, featureIDMap, false);
    } else {
        vector = SparseVector.createSparseVector(example, featureIDMap, false);
    }
    List<Pair<Integer, Double>> indexDistancePairList = nq.query(vector, k);
    List<Prediction<T>> localPredictions = new ArrayList<>();
    for (Pair<Integer, Double> simplePair : indexDistancePairList) {
        Pair<SGDVector, T> pair = vectors[simplePair.getA()];
        localPredictions.add(new Prediction<>(pair.getB(), vector.numActiveElements(), example));
    }
    return combiner.combine(outputIDInfo, localPredictions);
}
Also used : Prediction(org.tribuo.Prediction) ArrayList(java.util.ArrayList) SGDVector(org.tribuo.math.la.SGDVector) Pair(com.oracle.labs.mlrg.olcut.util.Pair)

Example 28 with Pair

use of com.oracle.labs.mlrg.olcut.util.Pair in project tribuo by oracle.

the class XGBoostModel method getTopFeatures.

@Override
public Map<String, List<Pair<String, Double>>> getTopFeatures(int n) {
    try {
        int maxFeatures = n < 0 ? featureIDMap.size() : n;
        Map<String, List<Pair<String, Double>>> map = new HashMap<>();
        for (int i = 0; i < models.size(); i++) {
            Booster model = models.get(i);
            Map<String, MutableDouble> outputMap = new HashMap<>();
            Map<String, Integer> xgboostMap = model.getFeatureScore("");
            for (Map.Entry<String, Integer> f : xgboostMap.entrySet()) {
                int id = Integer.parseInt(f.getKey().substring(1));
                String name = featureIDMap.get(id).getName();
                MutableDouble curVal = outputMap.computeIfAbsent(name, (k) -> new MutableDouble());
                curVal.increment(f.getValue());
            }
            Comparator<Pair<String, Double>> comparator = Comparator.comparingDouble(p -> Math.abs(p.getB()));
            PriorityQueue<Pair<String, Double>> q = new PriorityQueue<>(maxFeatures, comparator);
            for (Map.Entry<String, MutableDouble> e : outputMap.entrySet()) {
                Pair<String, Double> cur = new Pair<>(e.getKey(), e.getValue().doubleValue());
                if (q.size() < maxFeatures) {
                    q.offer(cur);
                } else if (comparator.compare(cur, q.peek()) > 0) {
                    q.poll();
                    q.offer(cur);
                }
            }
            List<Pair<String, Double>> list = new ArrayList<>();
            while (q.size() > 0) {
                list.add(q.poll());
            }
            Collections.reverse(list);
            if (models.size() == 1) {
                map.put(Model.ALL_OUTPUTS, list);
            } else {
                String dimensionName = outputIDInfo.getOutput(i).toString();
                map.put(dimensionName, list);
            }
        }
        return map;
    } catch (XGBoostError e) {
        logger.log(Level.SEVERE, "XGBoost threw an error", e);
        return Collections.emptyMap();
    }
}
Also used : HashMap(java.util.HashMap) MutableDouble(com.oracle.labs.mlrg.olcut.util.MutableDouble) Booster(ml.dmlc.xgboost4j.java.Booster) ArrayList(java.util.ArrayList) XGBoostError(ml.dmlc.xgboost4j.java.XGBoostError) PriorityQueue(java.util.PriorityQueue) MutableDouble(com.oracle.labs.mlrg.olcut.util.MutableDouble) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableFeatureMap(org.tribuo.ImmutableFeatureMap) HashMap(java.util.HashMap) Map(java.util.Map) Pair(com.oracle.labs.mlrg.olcut.util.Pair)

Example 29 with Pair

use of com.oracle.labs.mlrg.olcut.util.Pair in project tribuo by oracle.

the class TreeModel method getExcuse.

@Override
public Optional<Excuse<T>> getExcuse(Example<T> example) {
    List<String> list = new ArrayList<>();
    // 
    // Ensures we handle collisions correctly
    SparseVector vec = SparseVector.createSparseVector(example, featureIDMap, false);
    Node<T> oldNode = root;
    Node<T> curNode = root;
    while (curNode != null) {
        oldNode = curNode;
        if (oldNode instanceof SplitNode) {
            SplitNode<T> node = (SplitNode<T>) curNode;
            list.add(featureIDMap.get(node.getFeatureID()).getName());
        }
        curNode = oldNode.getNextNode(vec);
    }
    // 
    // oldNode must be a LeafNode.
    Prediction<T> pred = ((LeafNode<T>) oldNode).getPrediction(vec.numActiveElements(), example);
    List<Pair<String, Double>> pairs = new ArrayList<>();
    int i = list.size() + 1;
    for (String s : list) {
        pairs.add(new Pair<>(s, i + 0.0));
        i--;
    }
    Map<String, List<Pair<String, Double>>> map = new HashMap<>();
    map.put(Model.ALL_OUTPUTS, pairs);
    return Optional.of(new Excuse<>(example, pred, map));
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SparseVector(org.tribuo.math.la.SparseVector) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) Pair(com.oracle.labs.mlrg.olcut.util.Pair)

Example 30 with Pair

use of com.oracle.labs.mlrg.olcut.util.Pair in project tribuo by oracle.

the class AnomalyDataGenerator method denseTrainTest.

/**
 * Generates a train/test dataset pair which is dense in the features,
 * each example has 4 features,{A,B,C,D}, and there are 4 clusters,
 * {0,1,2,3}.
 * @param negate Supply -1.0 to negate some feature values.
 * @return A pair of datasets.
 */
public static Pair<Dataset<Event>, Dataset<Event>> denseTrainTest(double negate) {
    MutableDataset<Event> train = new MutableDataset<>(new SimpleDataSourceProvenance("TrainingData", OffsetDateTime.now(), anomalyFactory), anomalyFactory);
    String[] names = new String[] { "A", "B", "C", "D" };
    double[] values = new double[] { 1.0, 0.5, 1.0, negate * 1.0 };
    train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
    values = new double[] { 1.5, 0.35, 1.3, negate * 1.2 };
    train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
    values = new double[] { 1.2, 0.45, 1.5, negate * 1.0 };
    train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
    values = new double[] { negate * 1.1, 0.55, negate * 1.5, 0.5 };
    train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
    values = new double[] { negate * 1.5, 0.25, negate * 1, 0.125 };
    train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
    values = new double[] { negate * 1, 0.5, negate * 1.123, 0.123 };
    train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
    values = new double[] { 1.5, 5.0, 0.5, 4.5 };
    train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
    values = new double[] { 1.234, 5.1235, 0.1235, 6.0 };
    train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
    values = new double[] { 1.734, 4.5, 0.5123, 5.5 };
    train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
    values = new double[] { negate * 1, 0.25, 5, 10.0 };
    train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
    values = new double[] { negate * 1.4, 0.55, 5.65, 12.0 };
    train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
    values = new double[] { negate * 1.9, 0.25, 5.9, 15 };
    train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
    MutableDataset<Event> test = new MutableDataset<>(new SimpleDataSourceProvenance("TestingData", OffsetDateTime.now(), anomalyFactory), anomalyFactory);
    values = new double[] { 2.0, 0.45, 3.5, negate * 2.0 };
    test.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
    values = new double[] { negate * 2.0, 0.55, negate * 2.5, 2.5 };
    test.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
    values = new double[] { 1.75, 5.0, 1.0, 6.5 };
    test.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
    values = new double[] { negate * 1.5, 0.25, 5.0, 20.0 };
    test.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
    return new Pair<>(train, test);
}
Also used : SimpleDataSourceProvenance(org.tribuo.provenance.SimpleDataSourceProvenance) Event(org.tribuo.anomaly.Event) MutableDataset(org.tribuo.MutableDataset) Pair(com.oracle.labs.mlrg.olcut.util.Pair)

Aggregations

Pair (com.oracle.labs.mlrg.olcut.util.Pair)59 ArrayList (java.util.ArrayList)27 List (java.util.List)21 HashMap (java.util.HashMap)18 MutableDataset (org.tribuo.MutableDataset)17 SimpleDataSourceProvenance (org.tribuo.provenance.SimpleDataSourceProvenance)16 Label (org.tribuo.classification.Label)14 Feature (org.tribuo.Feature)11 Regressor (org.tribuo.regression.Regressor)11 Prediction (org.tribuo.Prediction)10 DenseVector (org.tribuo.math.la.DenseVector)10 SparseVector (org.tribuo.math.la.SparseVector)10 SGDVector (org.tribuo.math.la.SGDVector)9 Map (java.util.Map)7 Example (org.tribuo.Example)7 ImmutableFeatureMap (org.tribuo.ImmutableFeatureMap)7 PriorityQueue (java.util.PriorityQueue)6 Excuse (org.tribuo.Excuse)5 Model (org.tribuo.Model)5 LabelFactory (org.tribuo.classification.LabelFactory)5