Search in sources :

Example 26 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class EvaluateRetrievalPerformance method run.

/**
 * Run the algorithm
 *
 * @param database Database to run on (for kNN queries)
 * @param relation Relation for distance computations
 * @param lrelation Relation for class label comparison
 * @return Vectors containing mean and standard deviation.
 */
public RetrievalPerformanceResult run(Database database, Relation<O> relation, Relation<?> lrelation) {
    final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
    // For storing the positive neighbors.
    ModifiableDBIDs posn = DBIDUtil.newHashSet();
    // Distance storage.
    ModifiableDoubleDBIDList nlist = DBIDUtil.newDistanceDBIDList(relation.size());
    // For counting labels seen in kNN
    Object2IntOpenHashMap<Object> counters = new Object2IntOpenHashMap<>();
    // Statistics tracking
    double map = 0., mroc = 0.;
    double[] knnperf = new double[maxk];
    int samples = 0;
    FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Processing query objects", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        Object label = lrelation.get(iter);
        findMatches(posn, lrelation, label);
        if (posn.size() > 0) {
            computeDistances(nlist, iter, distQuery, relation);
            if (nlist.size() != relation.size() - (includeSelf ? 0 : 1)) {
                LOG.warning("Neighbor list does not have the desired size: " + nlist.size());
            }
            map += AveragePrecisionEvaluation.STATIC.evaluate(posn, nlist);
            mroc += ROCEvaluation.STATIC.evaluate(posn, nlist);
            KNNEvaluator.STATIC.evaluateKNN(knnperf, nlist, lrelation, counters, label);
            samples += 1;
        }
        LOG.incrementProcessed(objloop);
    }
    LOG.ensureCompleted(objloop);
    if (samples < 1) {
        throw new AbortException("No object matched - are labels parsed correctly?");
    }
    if (!(map >= 0) || !(mroc >= 0)) {
        throw new AbortException("NaN in MAP/ROC.");
    }
    map /= samples;
    mroc /= samples;
    LOG.statistics(new DoubleStatistic(PREFIX + ".map", map));
    LOG.statistics(new DoubleStatistic(PREFIX + ".rocauc", mroc));
    LOG.statistics(new DoubleStatistic(PREFIX + ".samples", samples));
    for (int k = 0; k < maxk; k++) {
        knnperf[k] = knnperf[k] / samples;
        LOG.statistics(new DoubleStatistic(PREFIX + ".knn-" + (k + 1), knnperf[k]));
    }
    return new RetrievalPerformanceResult(samples, map, mroc, knnperf);
}
Also used : ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 27 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class HopkinsStatisticClusteringTendency method computeNNForRealData.

/**
 * Search nearest neighbors for <em>real</em> data members.
 *
 * @param knnQuery KNN query
 * @param relation Data relation
 * @return Aggregated 1NN distances
 */
protected double computeNNForRealData(final KNNQuery<NumberVector> knnQuery, Relation<NumberVector> relation, final int dim) {
    double w = 0.;
    ModifiableDBIDs dataSampleIds = DBIDUtil.randomSample(relation.getDBIDs(), sampleSize, random);
    for (DBIDIter iter = dataSampleIds.iter(); iter.valid(); iter.advance()) {
        final double kdist = knnQuery.getKNNForDBID(iter, k + 1).getKNNDistance();
        w += MathUtil.powi(kdist, dim);
    }
    return w;
}
Also used : ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 28 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class OutlierThresholdClustering method split.

private Clustering<Model> split(OutlierResult or) {
    DoubleRelation scores = or.getScores();
    if (scaling instanceof OutlierScalingFunction) {
        ((OutlierScalingFunction) scaling).prepare(or);
    }
    ArrayList<ModifiableDBIDs> idlists = new ArrayList<>(threshold.length + 1);
    for (int i = 0; i <= threshold.length; i++) {
        idlists.add(DBIDUtil.newHashSet());
    }
    for (DBIDIter iter = scores.getDBIDs().iter(); iter.valid(); iter.advance()) {
        double score = scores.doubleValue(iter);
        if (scaling != null) {
            score = scaling.getScaled(score);
        }
        int i = 0;
        for (; i < threshold.length; i++) {
            if (score < threshold[i]) {
                break;
            }
        }
        idlists.get(i).add(iter);
    }
    Clustering<Model> c = new Clustering<>("Outlier threshold clustering", "threshold-clustering");
    for (int i = 0; i <= threshold.length; i++) {
        String name = (i == 0) ? "Inlier" : "Outlier_" + threshold[i - 1];
        c.addToplevelCluster(new Cluster<>(name, idlists.get(i), (i > 0)));
    }
    return c;
}
Also used : OutlierScalingFunction(de.lmu.ifi.dbs.elki.utilities.scaling.outlier.OutlierScalingFunction) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) Model(de.lmu.ifi.dbs.elki.data.model.Model) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 29 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class DiSHPreferenceVectorIndex method max.

/**
 * Returns the set with the maximum size contained in the specified map.
 *
 * @param candidates the map containing the sets
 * @return the set with the maximum size
 */
private int max(Map<Integer, ModifiableDBIDs> candidates) {
    DBIDs maxSet = null;
    Integer maxDim = null;
    for (Integer nextDim : candidates.keySet()) {
        DBIDs nextSet = candidates.get(nextDim);
        if (maxSet == null || maxSet.size() < nextSet.size()) {
            maxSet = nextSet;
            maxDim = nextDim;
        }
    }
    return maxDim;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 30 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class DiSHPreferenceVectorIndex method maxIntersection.

/**
 * Returns the index of the set having the maximum intersection set with the
 * specified set contained in the specified map.
 *
 * @param candidates the map containing the sets
 * @param set the set to intersect with
 * @param result the set to put the result in
 * @return the set with the maximum size
 */
private int maxIntersection(Map<Integer, ModifiableDBIDs> candidates, DBIDs set, ModifiableDBIDs result) {
    Integer maxDim = null;
    for (Integer nextDim : candidates.keySet()) {
        DBIDs nextSet = candidates.get(nextDim);
        ModifiableDBIDs nextIntersection = DBIDUtil.intersection(set, nextSet);
        if (result.size() < nextIntersection.size()) {
            result = nextIntersection;
            maxDim = nextDim;
        }
    }
    return maxDim;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Aggregations

ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)80 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)44 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)30 ArrayList (java.util.ArrayList)30 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)28 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)18 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)15 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)14 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)12 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)12 Model (de.lmu.ifi.dbs.elki.data.model.Model)11 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)11 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)10 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)10 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)9 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)9 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)8 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)8 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)7