use of com.oracle.labs.mlrg.olcut.util.Pair in project tribuo by oracle.
the class AbstractLinearSGDModel method getTopFeatures.
@Override
public Map<String, List<Pair<String, Double>>> getTopFeatures(int n) {
DenseMatrix baseWeights = (DenseMatrix) modelParameters.get()[0];
int maxFeatures = n < 0 ? featureIDMap.size() + 1 : n;
Comparator<Pair<String, Double>> comparator = Comparator.comparingDouble(p -> Math.abs(p.getB()));
//
// Use a priority queue to find the top N features.
int numClasses = baseWeights.getDimension1Size();
// Removing the bias feature.
int numFeatures = baseWeights.getDimension2Size() - 1;
Map<String, List<Pair<String, Double>>> map = new HashMap<>();
for (int i = 0; i < numClasses; i++) {
PriorityQueue<Pair<String, Double>> q = new PriorityQueue<>(maxFeatures, comparator);
for (int j = 0; j < numFeatures; j++) {
Pair<String, Double> curr = new Pair<>(featureIDMap.get(j).getName(), baseWeights.get(i, j));
if (q.size() < maxFeatures) {
q.offer(curr);
} else if (comparator.compare(curr, q.peek()) > 0) {
q.poll();
q.offer(curr);
}
}
Pair<String, Double> curr = new Pair<>(BIAS_FEATURE, baseWeights.get(i, numFeatures));
if (q.size() < maxFeatures) {
q.offer(curr);
} else if (comparator.compare(curr, q.peek()) > 0) {
q.poll();
q.offer(curr);
}
List<Pair<String, Double>> b = new ArrayList<>();
while (q.size() > 0) {
b.add(q.poll());
}
Collections.reverse(b);
map.put(getDimensionName(i), b);
}
return map;
}
use of com.oracle.labs.mlrg.olcut.util.Pair in project tribuo by oracle.
the class KNNModel method innerPredictOne.
private static <T extends Output<T>> Prediction<T> innerPredictOne(NeighboursQuery nq, Pair<SGDVector, T>[] vectors, EnsembleCombiner<T> combiner, ImmutableFeatureMap featureIDMap, ImmutableOutputInfo<T> outputIDInfo, int k, Example<T> example) {
SGDVector vector;
if (example.size() == featureIDMap.size()) {
vector = DenseVector.createDenseVector(example, featureIDMap, false);
} else {
vector = SparseVector.createSparseVector(example, featureIDMap, false);
}
List<Pair<Integer, Double>> indexDistancePairList = nq.query(vector, k);
List<Prediction<T>> localPredictions = new ArrayList<>();
for (Pair<Integer, Double> simplePair : indexDistancePairList) {
Pair<SGDVector, T> pair = vectors[simplePair.getA()];
localPredictions.add(new Prediction<>(pair.getB(), vector.numActiveElements(), example));
}
return combiner.combine(outputIDInfo, localPredictions);
}
use of com.oracle.labs.mlrg.olcut.util.Pair in project tribuo by oracle.
the class XGBoostModel method getTopFeatures.
@Override
public Map<String, List<Pair<String, Double>>> getTopFeatures(int n) {
try {
int maxFeatures = n < 0 ? featureIDMap.size() : n;
Map<String, List<Pair<String, Double>>> map = new HashMap<>();
for (int i = 0; i < models.size(); i++) {
Booster model = models.get(i);
Map<String, MutableDouble> outputMap = new HashMap<>();
Map<String, Integer> xgboostMap = model.getFeatureScore("");
for (Map.Entry<String, Integer> f : xgboostMap.entrySet()) {
int id = Integer.parseInt(f.getKey().substring(1));
String name = featureIDMap.get(id).getName();
MutableDouble curVal = outputMap.computeIfAbsent(name, (k) -> new MutableDouble());
curVal.increment(f.getValue());
}
Comparator<Pair<String, Double>> comparator = Comparator.comparingDouble(p -> Math.abs(p.getB()));
PriorityQueue<Pair<String, Double>> q = new PriorityQueue<>(maxFeatures, comparator);
for (Map.Entry<String, MutableDouble> e : outputMap.entrySet()) {
Pair<String, Double> cur = new Pair<>(e.getKey(), e.getValue().doubleValue());
if (q.size() < maxFeatures) {
q.offer(cur);
} else if (comparator.compare(cur, q.peek()) > 0) {
q.poll();
q.offer(cur);
}
}
List<Pair<String, Double>> list = new ArrayList<>();
while (q.size() > 0) {
list.add(q.poll());
}
Collections.reverse(list);
if (models.size() == 1) {
map.put(Model.ALL_OUTPUTS, list);
} else {
String dimensionName = outputIDInfo.getOutput(i).toString();
map.put(dimensionName, list);
}
}
return map;
} catch (XGBoostError e) {
logger.log(Level.SEVERE, "XGBoost threw an error", e);
return Collections.emptyMap();
}
}
use of com.oracle.labs.mlrg.olcut.util.Pair in project tribuo by oracle.
the class TreeModel method getExcuse.
@Override
public Optional<Excuse<T>> getExcuse(Example<T> example) {
List<String> list = new ArrayList<>();
//
// Ensures we handle collisions correctly
SparseVector vec = SparseVector.createSparseVector(example, featureIDMap, false);
Node<T> oldNode = root;
Node<T> curNode = root;
while (curNode != null) {
oldNode = curNode;
if (oldNode instanceof SplitNode) {
SplitNode<T> node = (SplitNode<T>) curNode;
list.add(featureIDMap.get(node.getFeatureID()).getName());
}
curNode = oldNode.getNextNode(vec);
}
//
// oldNode must be a LeafNode.
Prediction<T> pred = ((LeafNode<T>) oldNode).getPrediction(vec.numActiveElements(), example);
List<Pair<String, Double>> pairs = new ArrayList<>();
int i = list.size() + 1;
for (String s : list) {
pairs.add(new Pair<>(s, i + 0.0));
i--;
}
Map<String, List<Pair<String, Double>>> map = new HashMap<>();
map.put(Model.ALL_OUTPUTS, pairs);
return Optional.of(new Excuse<>(example, pred, map));
}
use of com.oracle.labs.mlrg.olcut.util.Pair in project tribuo by oracle.
the class AnomalyDataGenerator method denseTrainTest.
/**
* Generates a train/test dataset pair which is dense in the features,
* each example has 4 features,{A,B,C,D}, and there are 4 clusters,
* {0,1,2,3}.
* @param negate Supply -1.0 to negate some feature values.
* @return A pair of datasets.
*/
public static Pair<Dataset<Event>, Dataset<Event>> denseTrainTest(double negate) {
MutableDataset<Event> train = new MutableDataset<>(new SimpleDataSourceProvenance("TrainingData", OffsetDateTime.now(), anomalyFactory), anomalyFactory);
String[] names = new String[] { "A", "B", "C", "D" };
double[] values = new double[] { 1.0, 0.5, 1.0, negate * 1.0 };
train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
values = new double[] { 1.5, 0.35, 1.3, negate * 1.2 };
train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
values = new double[] { 1.2, 0.45, 1.5, negate * 1.0 };
train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
values = new double[] { negate * 1.1, 0.55, negate * 1.5, 0.5 };
train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
values = new double[] { negate * 1.5, 0.25, negate * 1, 0.125 };
train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
values = new double[] { negate * 1, 0.5, negate * 1.123, 0.123 };
train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
values = new double[] { 1.5, 5.0, 0.5, 4.5 };
train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
values = new double[] { 1.234, 5.1235, 0.1235, 6.0 };
train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
values = new double[] { 1.734, 4.5, 0.5123, 5.5 };
train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
values = new double[] { negate * 1, 0.25, 5, 10.0 };
train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
values = new double[] { negate * 1.4, 0.55, 5.65, 12.0 };
train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
values = new double[] { negate * 1.9, 0.25, 5.9, 15 };
train.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
MutableDataset<Event> test = new MutableDataset<>(new SimpleDataSourceProvenance("TestingData", OffsetDateTime.now(), anomalyFactory), anomalyFactory);
values = new double[] { 2.0, 0.45, 3.5, negate * 2.0 };
test.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
values = new double[] { negate * 2.0, 0.55, negate * 2.5, 2.5 };
test.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
values = new double[] { 1.75, 5.0, 1.0, 6.5 };
test.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
values = new double[] { negate * 1.5, 0.25, 5.0, 20.0 };
test.add(new ArrayExample<>(EXPECTED_EVENT, names, values));
return new Pair<>(train, test);
}
Aggregations