use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.
the class EvaluateRetrievalPerformance method run.
/**
* Run the algorithm
*
* @param database Database to run on (for kNN queries)
* @param relation Relation for distance computations
* @param lrelation Relation for class label comparison
* @return Vectors containing mean and standard deviation.
*/
public RetrievalPerformanceResult run(Database database, Relation<O> relation, Relation<?> lrelation) {
final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
// For storing the positive neighbors.
ModifiableDBIDs posn = DBIDUtil.newHashSet();
// Distance storage.
ModifiableDoubleDBIDList nlist = DBIDUtil.newDistanceDBIDList(relation.size());
// For counting labels seen in kNN
Object2IntOpenHashMap<Object> counters = new Object2IntOpenHashMap<>();
// Statistics tracking
double map = 0., mroc = 0.;
double[] knnperf = new double[maxk];
int samples = 0;
FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Processing query objects", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
Object label = lrelation.get(iter);
findMatches(posn, lrelation, label);
if (posn.size() > 0) {
computeDistances(nlist, iter, distQuery, relation);
if (nlist.size() != relation.size() - (includeSelf ? 0 : 1)) {
LOG.warning("Neighbor list does not have the desired size: " + nlist.size());
}
map += AveragePrecisionEvaluation.STATIC.evaluate(posn, nlist);
mroc += ROCEvaluation.STATIC.evaluate(posn, nlist);
KNNEvaluator.STATIC.evaluateKNN(knnperf, nlist, lrelation, counters, label);
samples += 1;
}
LOG.incrementProcessed(objloop);
}
LOG.ensureCompleted(objloop);
if (samples < 1) {
throw new AbortException("No object matched - are labels parsed correctly?");
}
if (!(map >= 0) || !(mroc >= 0)) {
throw new AbortException("NaN in MAP/ROC.");
}
map /= samples;
mroc /= samples;
LOG.statistics(new DoubleStatistic(PREFIX + ".map", map));
LOG.statistics(new DoubleStatistic(PREFIX + ".rocauc", mroc));
LOG.statistics(new DoubleStatistic(PREFIX + ".samples", samples));
for (int k = 0; k < maxk; k++) {
knnperf[k] = knnperf[k] / samples;
LOG.statistics(new DoubleStatistic(PREFIX + ".knn-" + (k + 1), knnperf[k]));
}
return new RetrievalPerformanceResult(samples, map, mroc, knnperf);
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.
the class ROCEvaluationTest method testROCCurve.
/**
* Test ROC curve generation, including curve simplification
*/
@Test
public void testROCCurve() {
HashSetModifiableDBIDs positive = DBIDUtil.newHashSet();
positive.add(DBIDUtil.importInteger(1));
positive.add(DBIDUtil.importInteger(2));
positive.add(DBIDUtil.importInteger(3));
positive.add(DBIDUtil.importInteger(4));
positive.add(DBIDUtil.importInteger(5));
final ModifiableDoubleDBIDList distances = DBIDUtil.newDistanceDBIDList();
// Starting point: ................................ 0.0,0. ++
// + 0.0,.2 -- redundant
distances.add(0.0, DBIDUtil.importInteger(1));
// + 0.0,.4 ++
distances.add(1.0, DBIDUtil.importInteger(2));
// - .25,.4 ++
distances.add(2.0, DBIDUtil.importInteger(6));
// -
distances.add(3.0, DBIDUtil.importInteger(7));
// + .50,.6 -- redundant
distances.add(3.0, DBIDUtil.importInteger(3));
// -
distances.add(4.0, DBIDUtil.importInteger(8));
// + .75,.8 ++
distances.add(4.0, DBIDUtil.importInteger(4));
// - 1.0,.8 ++
distances.add(5.0, DBIDUtil.importInteger(9));
// + 1.0,1. ++
distances.add(6.0, DBIDUtil.importInteger(5));
XYCurve roccurve = ROCEvaluation.materializeROC(new DBIDsTest(positive), new DistanceResultAdapter(distances.iter()));
// System.err.println(roccurve);
assertEquals("ROC curve too complex", 6, roccurve.size());
double auc = XYCurve.areaUnderCurve(roccurve);
assertEquals("ROC AUC (curve) not correct.", 0.6, auc, 1e-14);
double auc2 = new ROCEvaluation().evaluate(positive, distances);
assertEquals("ROC AUC (direct) not correct.", 0.6, auc2, 1e-14);
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.
the class SortingDuplicatesTest method testDuplicateKeys.
@Test(timeout = 100)
public void testDuplicateKeys() {
// We need an ide, but no real data.
DBID id = DBIDUtil.importInteger(1);
int size = 100000;
ModifiableDoubleDBIDList list = DBIDUtil.newDistanceDBIDList(size);
for (int i = 0; i < size; i++) {
double distance = 0. + (i % 2);
list.add(distance, id);
}
list.sort();
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.
the class AveragePrecisionEvaluationTest method testAveragePrecision.
/**
* Test Average Precision score computation.
*/
@Test
public void testAveragePrecision() {
HashSetModifiableDBIDs positive = DBIDUtil.newHashSet();
positive.add(DBIDUtil.importInteger(1));
positive.add(DBIDUtil.importInteger(2));
positive.add(DBIDUtil.importInteger(3));
positive.add(DBIDUtil.importInteger(4));
positive.add(DBIDUtil.importInteger(5));
final ModifiableDoubleDBIDList distances = DBIDUtil.newDistanceDBIDList();
// Precision: 1.0
distances.add(0.0, DBIDUtil.importInteger(1));
// Precision: 1.0
distances.add(1.0, DBIDUtil.importInteger(2));
//
distances.add(2.0, DBIDUtil.importInteger(6));
//
distances.add(3.0, DBIDUtil.importInteger(7));
// Precision: 0.6
distances.add(3.0, DBIDUtil.importInteger(3));
//
distances.add(4.0, DBIDUtil.importInteger(8));
// Precision: 4/7.
distances.add(4.0, DBIDUtil.importInteger(4));
//
distances.add(5.0, DBIDUtil.importInteger(9));
// Precision: 5/9.
distances.add(6.0, DBIDUtil.importInteger(5));
// (1+1+.6+4/7.+5/9.)/5 = 0.7453968253968254
double ap = new AveragePrecisionEvaluation().evaluate(positive, distances);
assertEquals("Average precision not correct.", 0.7453968253968254, ap, 1e-14);
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.
the class KMeansMinusMinus method meansWithTreshhold.
/**
* Returns the mean vectors of the given clusters in the given database.
*
* @param clusters the clusters to compute the means
* @param means the recent means
* @param database the database containing the vectors
* @return the mean vectors of the given clusters in the given database
*/
protected double[][] meansWithTreshhold(List<? extends ModifiableDoubleDBIDList> clusters, double[][] means, Relation<V> database, Double tresh) {
// TODO: use Kahan summation for better numerical precision?
double[][] newMeans = new double[k][];
for (int i = 0; i < k; i++) {
DoubleDBIDList list = clusters.get(i);
double[] raw = null;
int count = 0;
// Update with remaining instances
for (DoubleDBIDListIter iter = list.iter(); iter.valid(); iter.advance()) {
if (iter.doubleValue() >= tresh) {
continue;
}
NumberVector vec = database.get(iter);
if (raw == null) {
// Initialize:
raw = vec.toArray();
}
for (int j = 0; j < raw.length; j++) {
raw[j] += vec.doubleValue(j);
}
count++;
}
newMeans[i] = (raw != null) ? VMath.timesEquals(raw, 1.0 / count) : means[i];
}
return newMeans;
}
Aggregations