use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class EvaluateRetrievalPerformance method run.
/**
* Run the algorithm
*
* @param database Database to run on (for kNN queries)
* @param relation Relation for distance computations
* @param lrelation Relation for class label comparison
* @return Vectors containing mean and standard deviation.
*/
public RetrievalPerformanceResult run(Database database, Relation<O> relation, Relation<?> lrelation) {
final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
// For storing the positive neighbors.
ModifiableDBIDs posn = DBIDUtil.newHashSet();
// Distance storage.
ModifiableDoubleDBIDList nlist = DBIDUtil.newDistanceDBIDList(relation.size());
// For counting labels seen in kNN
Object2IntOpenHashMap<Object> counters = new Object2IntOpenHashMap<>();
// Statistics tracking
double map = 0., mroc = 0.;
double[] knnperf = new double[maxk];
int samples = 0;
FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Processing query objects", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
Object label = lrelation.get(iter);
findMatches(posn, lrelation, label);
if (posn.size() > 0) {
computeDistances(nlist, iter, distQuery, relation);
if (nlist.size() != relation.size() - (includeSelf ? 0 : 1)) {
LOG.warning("Neighbor list does not have the desired size: " + nlist.size());
}
map += AveragePrecisionEvaluation.STATIC.evaluate(posn, nlist);
mroc += ROCEvaluation.STATIC.evaluate(posn, nlist);
KNNEvaluator.STATIC.evaluateKNN(knnperf, nlist, lrelation, counters, label);
samples += 1;
}
LOG.incrementProcessed(objloop);
}
LOG.ensureCompleted(objloop);
if (samples < 1) {
throw new AbortException("No object matched - are labels parsed correctly?");
}
if (!(map >= 0) || !(mroc >= 0)) {
throw new AbortException("NaN in MAP/ROC.");
}
map /= samples;
mroc /= samples;
LOG.statistics(new DoubleStatistic(PREFIX + ".map", map));
LOG.statistics(new DoubleStatistic(PREFIX + ".rocauc", mroc));
LOG.statistics(new DoubleStatistic(PREFIX + ".samples", samples));
for (int k = 0; k < maxk; k++) {
knnperf[k] = knnperf[k] / samples;
LOG.statistics(new DoubleStatistic(PREFIX + ".knn-" + (k + 1), knnperf[k]));
}
return new RetrievalPerformanceResult(samples, map, mroc, knnperf);
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class HopkinsStatisticClusteringTendency method computeNNForRealData.
/**
* Search nearest neighbors for <em>real</em> data members.
*
* @param knnQuery KNN query
* @param relation Data relation
* @return Aggregated 1NN distances
*/
protected double computeNNForRealData(final KNNQuery<NumberVector> knnQuery, Relation<NumberVector> relation, final int dim) {
double w = 0.;
ModifiableDBIDs dataSampleIds = DBIDUtil.randomSample(relation.getDBIDs(), sampleSize, random);
for (DBIDIter iter = dataSampleIds.iter(); iter.valid(); iter.advance()) {
final double kdist = knnQuery.getKNNForDBID(iter, k + 1).getKNNDistance();
w += MathUtil.powi(kdist, dim);
}
return w;
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class OutlierThresholdClustering method split.
private Clustering<Model> split(OutlierResult or) {
DoubleRelation scores = or.getScores();
if (scaling instanceof OutlierScalingFunction) {
((OutlierScalingFunction) scaling).prepare(or);
}
ArrayList<ModifiableDBIDs> idlists = new ArrayList<>(threshold.length + 1);
for (int i = 0; i <= threshold.length; i++) {
idlists.add(DBIDUtil.newHashSet());
}
for (DBIDIter iter = scores.getDBIDs().iter(); iter.valid(); iter.advance()) {
double score = scores.doubleValue(iter);
if (scaling != null) {
score = scaling.getScaled(score);
}
int i = 0;
for (; i < threshold.length; i++) {
if (score < threshold[i]) {
break;
}
}
idlists.get(i).add(iter);
}
Clustering<Model> c = new Clustering<>("Outlier threshold clustering", "threshold-clustering");
for (int i = 0; i <= threshold.length; i++) {
String name = (i == 0) ? "Inlier" : "Outlier_" + threshold[i - 1];
c.addToplevelCluster(new Cluster<>(name, idlists.get(i), (i > 0)));
}
return c;
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class DiSHPreferenceVectorIndex method max.
/**
* Returns the set with the maximum size contained in the specified map.
*
* @param candidates the map containing the sets
* @return the set with the maximum size
*/
private int max(Map<Integer, ModifiableDBIDs> candidates) {
DBIDs maxSet = null;
Integer maxDim = null;
for (Integer nextDim : candidates.keySet()) {
DBIDs nextSet = candidates.get(nextDim);
if (maxSet == null || maxSet.size() < nextSet.size()) {
maxSet = nextSet;
maxDim = nextDim;
}
}
return maxDim;
}
use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.
the class DiSHPreferenceVectorIndex method maxIntersection.
/**
* Returns the index of the set having the maximum intersection set with the
* specified set contained in the specified map.
*
* @param candidates the map containing the sets
* @param set the set to intersect with
* @param result the set to put the result in
* @return the set with the maximum size
*/
private int maxIntersection(Map<Integer, ModifiableDBIDs> candidates, DBIDs set, ModifiableDBIDs result) {
Integer maxDim = null;
for (Integer nextDim : candidates.keySet()) {
DBIDs nextSet = candidates.get(nextDim);
ModifiableDBIDs nextIntersection = DBIDUtil.intersection(set, nextSet);
if (result.size() < nextIntersection.size()) {
result = nextIntersection;
maxDim = nextDim;
}
}
return maxDim;
}
Aggregations