use of de.lmu.ifi.dbs.elki.result.CollectionResult in project elki by elki-project.
the class AveragePrecisionAtK method run.
/**
* Run the algorithm
*
* @param database Database to run on (for kNN queries)
* @param relation Relation for distance computations
* @param lrelation Relation for class label comparison
* @return Vectors containing mean and standard deviation.
*/
public CollectionResult<double[]> run(Database database, Relation<O> relation, Relation<?> lrelation) {
final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final int qk = k + (includeSelf ? 0 : 1);
final KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, qk);
MeanVarianceMinMax[] mvs = MeanVarianceMinMax.newArray(k);
final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Computing nearest neighbors", ids.size(), LOG) : null;
// sort neighbors
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNList knn = knnQuery.getKNNForDBID(iter, qk);
Object label = lrelation.get(iter);
int positive = 0, i = 0;
for (DBIDIter ri = knn.iter(); i < k && ri.valid(); ri.advance()) {
if (!includeSelf && DBIDUtil.equal(iter, ri)) {
// Do not increment i.
continue;
}
positive += match(label, lrelation.get(ri)) ? 1 : 0;
final double precision = positive / (double) (i + 1);
mvs[i].put(precision);
i++;
}
LOG.incrementProcessed(objloop);
}
LOG.ensureCompleted(objloop);
// Transform Histogram into a Double Vector array.
Collection<double[]> res = new ArrayList<>(k);
for (int i = 0; i < k; i++) {
final MeanVarianceMinMax mv = mvs[i];
final double std = mv.getCount() > 1. ? mv.getSampleStddev() : 0.;
res.add(new double[] { i + 1, mv.getMean(), std, mv.getMin(), mv.getMax(), mv.getCount() });
}
return new CollectionResult<>("Average Precision", "average-precision", res);
}
use of de.lmu.ifi.dbs.elki.result.CollectionResult in project elki by elki-project.
the class DistanceQuantileSampler method run.
/**
* Run the distance quantile sampler.
*
* @param database
* @param rel
* @return Distances sample
*/
public CollectionResult<double[]> run(Database database, Relation<O> rel) {
DistanceQuery<O> dq = rel.getDistanceQuery(getDistanceFunction());
int size = rel.size();
long pairs = (size * (long) size) >> 1;
final long ssize = sampling <= 1 ? (long) Math.ceil(sampling * pairs) : (long) sampling;
if (ssize > Integer.MAX_VALUE) {
throw new AbortException("Sampling size too large.");
}
final int qsize = quantile <= 0 ? 1 : (int) Math.ceil(quantile * ssize);
DoubleMaxHeap heap = new DoubleMaxHeap(qsize);
ArrayDBIDs ids = DBIDUtil.ensureArray(rel.getDBIDs());
DBIDArrayIter i1 = ids.iter(), i2 = ids.iter();
Random r = rand.getSingleThreadedRandom();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling", (int) ssize, LOG) : null;
for (long i = 0; i < ssize; i++) {
int x = r.nextInt(size - 1) + 1, y = r.nextInt(x);
double dist = dq.distance(i1.seek(x), i2.seek(y));
// Skip NaN, and/or zeros.
if (dist != dist || (nozeros && dist < Double.MIN_NORMAL)) {
continue;
}
heap.add(dist, qsize);
LOG.incrementProcessed(prog);
}
LOG.statistics(new DoubleStatistic(PREFIX + ".quantile", quantile));
LOG.statistics(new LongStatistic(PREFIX + ".samplesize", ssize));
LOG.statistics(new DoubleStatistic(PREFIX + ".distance", heap.peek()));
LOG.ensureCompleted(prog);
Collection<String> header = Arrays.asList(new String[] { "Distance" });
Collection<double[]> data = Arrays.asList(new double[][] { new double[] { heap.peek() } });
return new CollectionResult<double[]>("Distances sample", "distance-sample", data, header);
}
Aggregations