Search in sources :

Example 11 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class EvaluateRetrievalPerformance method run.

/**
 * Run the algorithm
 *
 * @param database Database to run on (for kNN queries)
 * @param relation Relation for distance computations
 * @param lrelation Relation for class label comparison
 * @return Vectors containing mean and standard deviation.
 */
public RetrievalPerformanceResult run(Database database, Relation<O> relation, Relation<?> lrelation) {
    final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
    // For storing the positive neighbors.
    ModifiableDBIDs posn = DBIDUtil.newHashSet();
    // Distance storage.
    ModifiableDoubleDBIDList nlist = DBIDUtil.newDistanceDBIDList(relation.size());
    // For counting labels seen in kNN
    Object2IntOpenHashMap<Object> counters = new Object2IntOpenHashMap<>();
    // Statistics tracking
    double map = 0., mroc = 0.;
    double[] knnperf = new double[maxk];
    int samples = 0;
    FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Processing query objects", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        Object label = lrelation.get(iter);
        findMatches(posn, lrelation, label);
        if (posn.size() > 0) {
            computeDistances(nlist, iter, distQuery, relation);
            if (nlist.size() != relation.size() - (includeSelf ? 0 : 1)) {
                LOG.warning("Neighbor list does not have the desired size: " + nlist.size());
            }
            map += AveragePrecisionEvaluation.STATIC.evaluate(posn, nlist);
            mroc += ROCEvaluation.STATIC.evaluate(posn, nlist);
            KNNEvaluator.STATIC.evaluateKNN(knnperf, nlist, lrelation, counters, label);
            samples += 1;
        }
        LOG.incrementProcessed(objloop);
    }
    LOG.ensureCompleted(objloop);
    if (samples < 1) {
        throw new AbortException("No object matched - are labels parsed correctly?");
    }
    if (!(map >= 0) || !(mroc >= 0)) {
        throw new AbortException("NaN in MAP/ROC.");
    }
    map /= samples;
    mroc /= samples;
    LOG.statistics(new DoubleStatistic(PREFIX + ".map", map));
    LOG.statistics(new DoubleStatistic(PREFIX + ".rocauc", mroc));
    LOG.statistics(new DoubleStatistic(PREFIX + ".samples", samples));
    for (int k = 0; k < maxk; k++) {
        knnperf[k] = knnperf[k] / samples;
        LOG.statistics(new DoubleStatistic(PREFIX + ".knn-" + (k + 1), knnperf[k]));
    }
    return new RetrievalPerformanceResult(samples, map, mroc, knnperf);
}
Also used : ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 12 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class ROCEvaluationTest method testROCCurve.

/**
 * Test ROC curve generation, including curve simplification
 */
@Test
public void testROCCurve() {
    HashSetModifiableDBIDs positive = DBIDUtil.newHashSet();
    positive.add(DBIDUtil.importInteger(1));
    positive.add(DBIDUtil.importInteger(2));
    positive.add(DBIDUtil.importInteger(3));
    positive.add(DBIDUtil.importInteger(4));
    positive.add(DBIDUtil.importInteger(5));
    final ModifiableDoubleDBIDList distances = DBIDUtil.newDistanceDBIDList();
    // Starting point: ................................ 0.0,0. ++
    // + 0.0,.2 -- redundant
    distances.add(0.0, DBIDUtil.importInteger(1));
    // + 0.0,.4 ++
    distances.add(1.0, DBIDUtil.importInteger(2));
    // - .25,.4 ++
    distances.add(2.0, DBIDUtil.importInteger(6));
    // -
    distances.add(3.0, DBIDUtil.importInteger(7));
    // + .50,.6 -- redundant
    distances.add(3.0, DBIDUtil.importInteger(3));
    // -
    distances.add(4.0, DBIDUtil.importInteger(8));
    // + .75,.8 ++
    distances.add(4.0, DBIDUtil.importInteger(4));
    // - 1.0,.8 ++
    distances.add(5.0, DBIDUtil.importInteger(9));
    // + 1.0,1. ++
    distances.add(6.0, DBIDUtil.importInteger(5));
    XYCurve roccurve = ROCEvaluation.materializeROC(new DBIDsTest(positive), new DistanceResultAdapter(distances.iter()));
    // System.err.println(roccurve);
    assertEquals("ROC curve too complex", 6, roccurve.size());
    double auc = XYCurve.areaUnderCurve(roccurve);
    assertEquals("ROC AUC (curve) not correct.", 0.6, auc, 1e-14);
    double auc2 = new ROCEvaluation().evaluate(positive, distances);
    assertEquals("ROC AUC (direct) not correct.", 0.6, auc2, 1e-14);
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) XYCurve(de.lmu.ifi.dbs.elki.math.geometry.XYCurve) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBIDsTest(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DBIDsTest) DistanceResultAdapter(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DistanceResultAdapter) Test(org.junit.Test) DBIDsTest(de.lmu.ifi.dbs.elki.evaluation.scores.adapter.DBIDsTest)

Example 13 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class SortingDuplicatesTest method testDuplicateKeys.

@Test(timeout = 100)
public void testDuplicateKeys() {
    // We need an ide, but no real data.
    DBID id = DBIDUtil.importInteger(1);
    int size = 100000;
    ModifiableDoubleDBIDList list = DBIDUtil.newDistanceDBIDList(size);
    for (int i = 0; i < size; i++) {
        double distance = 0. + (i % 2);
        list.add(distance, id);
    }
    list.sort();
}
Also used : ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) Test(org.junit.Test)

Example 14 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class AveragePrecisionEvaluationTest method testAveragePrecision.

/**
 * Test Average Precision score computation.
 */
@Test
public void testAveragePrecision() {
    HashSetModifiableDBIDs positive = DBIDUtil.newHashSet();
    positive.add(DBIDUtil.importInteger(1));
    positive.add(DBIDUtil.importInteger(2));
    positive.add(DBIDUtil.importInteger(3));
    positive.add(DBIDUtil.importInteger(4));
    positive.add(DBIDUtil.importInteger(5));
    final ModifiableDoubleDBIDList distances = DBIDUtil.newDistanceDBIDList();
    // Precision: 1.0
    distances.add(0.0, DBIDUtil.importInteger(1));
    // Precision: 1.0
    distances.add(1.0, DBIDUtil.importInteger(2));
    // 
    distances.add(2.0, DBIDUtil.importInteger(6));
    // 
    distances.add(3.0, DBIDUtil.importInteger(7));
    // Precision: 0.6
    distances.add(3.0, DBIDUtil.importInteger(3));
    // 
    distances.add(4.0, DBIDUtil.importInteger(8));
    // Precision: 4/7.
    distances.add(4.0, DBIDUtil.importInteger(4));
    // 
    distances.add(5.0, DBIDUtil.importInteger(9));
    // Precision: 5/9.
    distances.add(6.0, DBIDUtil.importInteger(5));
    // (1+1+.6+4/7.+5/9.)/5 = 0.7453968253968254
    double ap = new AveragePrecisionEvaluation().evaluate(positive, distances);
    assertEquals("Average precision not correct.", 0.7453968253968254, ap, 1e-14);
}
Also used : HashSetModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) Test(org.junit.Test)

Example 15 with ModifiableDoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList in project elki by elki-project.

the class KMeansMinusMinus method meansWithTreshhold.

/**
 * Returns the mean vectors of the given clusters in the given database.
 *
 * @param clusters the clusters to compute the means
 * @param means the recent means
 * @param database the database containing the vectors
 * @return the mean vectors of the given clusters in the given database
 */
protected double[][] meansWithTreshhold(List<? extends ModifiableDoubleDBIDList> clusters, double[][] means, Relation<V> database, Double tresh) {
    // TODO: use Kahan summation for better numerical precision?
    double[][] newMeans = new double[k][];
    for (int i = 0; i < k; i++) {
        DoubleDBIDList list = clusters.get(i);
        double[] raw = null;
        int count = 0;
        // Update with remaining instances
        for (DoubleDBIDListIter iter = list.iter(); iter.valid(); iter.advance()) {
            if (iter.doubleValue() >= tresh) {
                continue;
            }
            NumberVector vec = database.get(iter);
            if (raw == null) {
                // Initialize:
                raw = vec.toArray();
            }
            for (int j = 0; j < raw.length; j++) {
                raw[j] += vec.doubleValue(j);
            }
            count++;
        }
        newMeans[i] = (raw != null) ? VMath.timesEquals(raw, 1.0 / count) : means[i];
    }
    return newMeans;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)

Aggregations

ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)53 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)22 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)16 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)9 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)8 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)6 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)5 ArrayList (java.util.ArrayList)5 Test (org.junit.Test)5 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 DoubleDBIDPair (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDPair)4 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)4 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)3 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)3 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)3 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)2 DoubleDBIDListMIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListMIter)2