use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class OutlierRankingEvaluation method evaluateOrderingResult.
private EvaluationResult evaluateOrderingResult(int size, SetDBIDs positiveids, DBIDs order) {
if (order.size() != size) {
throw new IllegalStateException("Iterable result doesn't match database size - incomplete ordering?");
}
EvaluationResult res = new EvaluationResult("Evaluation of ranking", "ranking-evaluation");
DBIDsTest test = new DBIDsTest(positiveids);
double rate = positiveids.size() / (double) size;
MeasurementGroup g = res.newGroup("Evaluation measures:");
double rocauc = ROCEvaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
g.addMeasure("ROC AUC", rocauc, 0., 1., .5, false);
double avep = AveragePrecisionEvaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
g.addMeasure("Average Precision", avep, 0., 1., rate, false);
double rprec = PrecisionAtKEvaluation.RPRECISION.evaluate(test, new SimpleAdapter(order.iter()));
g.addMeasure("R-Precision", rprec, 0., 1., rate, false);
double maxf1 = MaximumF1Evaluation.STATIC.evaluate(test, new SimpleAdapter(order.iter()));
g.addMeasure("Maximum F1", maxf1, 0., 1., rate, false);
g = res.newGroup("Adjusted for chance:");
double adjauc = 2 * rocauc - 1;
g.addMeasure("Adjusted AUC", adjauc, 0., 1., 0., false);
double adjavep = (avep - rate) / (1 - rate);
g.addMeasure("Adjusted AveP", adjavep, 0., 1., 0., false);
double adjrprec = (rprec - rate) / (1 - rate);
g.addMeasure("Adjusted R-Prec", adjrprec, 0., 1., 0., false);
double adjmaxf1 = (maxf1 - rate) / (1 - rate);
g.addMeasure("Adjusted Max F1", adjmaxf1, 0., 1., 0., false);
if (LOG.isStatistics()) {
LOG.statistics(new DoubleStatistic(key + ".rocauc", rocauc));
LOG.statistics(new DoubleStatistic(key + ".rocauc.adjusted", adjauc));
LOG.statistics(new DoubleStatistic(key + ".precision.average", avep));
LOG.statistics(new DoubleStatistic(key + ".precision.average.adjusted", adjavep));
LOG.statistics(new DoubleStatistic(key + ".precision.r", rprec));
LOG.statistics(new DoubleStatistic(key + ".precision.r.adjusted", adjrprec));
LOG.statistics(new DoubleStatistic(key + ".f1.maximum", maxf1));
LOG.statistics(new DoubleStatistic(key + ".f1.maximum.adjusted", adjmaxf1));
}
return res;
}
use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class IntrinsicNearestNeighborAffinityMatrixBuilder method computePij.
/**
* Compute the sparse pij using the nearest neighbors only.
*
* @param ids ID range
* @param knnq kNN query
* @param square Use squared distances
* @param numberOfNeighbours Number of neighbors to get
* @param pij Output of distances
* @param indices Output of indexes
* @param initialScale Initial scaling factor
*/
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
final double logPerp = FastMath.log(perplexity);
// Scratch arrays, resizable
DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
// Compute nearest-neighbor sparse affinity matrix
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
Mean mid = LOG.isStatistics() ? new Mean() : null;
for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
dists.clear();
inds.clear();
KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
convertNeighbors(ids, ix, square, neighbours, dists, inds, mid);
double beta = computeSigma(//
ix.getOffset(), //
dists, //
perplexity, //
logPerp, pij[ix.getOffset()] = new double[dists.size()]);
if (mv != null) {
// Sigma
mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
}
indices[ix.getOffset()] = inds.toArray();
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (mid != null) {
LOG.statistics(new DoubleStatistic(getClass() + ".average-original-id", mid.getMean()));
}
// Sum of the sparse affinity matrix:
double sum = 0.;
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int offi = 0; offi < pij_i.length; offi++) {
int j = indices[i][offi];
if (j > i) {
// Exploit symmetry.
continue;
}
assert (i != j);
int offj = containsIndex(indices[j], i);
if (offj >= 0) {
// Found
sum += FastMath.sqrt(pij_i[offi] * pij[j][offj]);
}
}
}
final double scale = initialScale / (2 * sum);
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int offi = 0; offi < pij_i.length; offi++) {
int j = indices[i][offi];
assert (i != j);
int offj = containsIndex(indices[j], i);
if (offj >= 0) {
// Found
assert (indices[j][offj] == i);
// Exploit symmetry:
if (i < j) {
// Symmetrize
final double val = FastMath.sqrt(pij_i[offi] * pij[j][offj]);
pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
}
} else {
// Not found, so zero.
pij_i[offi] = 0;
}
}
}
if (LOG.isStatistics()) {
// timer != null, mv != null
LOG.statistics(timer.end());
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
}
}
use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class DistanceQuantileSampler method run.
/**
* Run the distance quantile sampler.
*
* @param database
* @param rel
* @return Distances sample
*/
public CollectionResult<double[]> run(Database database, Relation<O> rel) {
DistanceQuery<O> dq = rel.getDistanceQuery(getDistanceFunction());
int size = rel.size();
long pairs = (size * (long) size) >> 1;
final long ssize = sampling <= 1 ? (long) Math.ceil(sampling * pairs) : (long) sampling;
if (ssize > Integer.MAX_VALUE) {
throw new AbortException("Sampling size too large.");
}
final int qsize = quantile <= 0 ? 1 : (int) Math.ceil(quantile * ssize);
DoubleMaxHeap heap = new DoubleMaxHeap(qsize);
ArrayDBIDs ids = DBIDUtil.ensureArray(rel.getDBIDs());
DBIDArrayIter i1 = ids.iter(), i2 = ids.iter();
Random r = rand.getSingleThreadedRandom();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Sampling", (int) ssize, LOG) : null;
for (long i = 0; i < ssize; i++) {
int x = r.nextInt(size - 1) + 1, y = r.nextInt(x);
double dist = dq.distance(i1.seek(x), i2.seek(y));
// Skip NaN, and/or zeros.
if (dist != dist || (nozeros && dist < Double.MIN_NORMAL)) {
continue;
}
heap.add(dist, qsize);
LOG.incrementProcessed(prog);
}
LOG.statistics(new DoubleStatistic(PREFIX + ".quantile", quantile));
LOG.statistics(new LongStatistic(PREFIX + ".samplesize", ssize));
LOG.statistics(new DoubleStatistic(PREFIX + ".distance", heap.peek()));
LOG.ensureCompleted(prog);
Collection<String> header = Arrays.asList(new String[] { "Distance" });
Collection<double[]> data = Arrays.asList(new double[][] { new double[] { heap.peek() } });
return new CollectionResult<double[]>("Distances sample", "distance-sample", data, header);
}
use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class EstimateIntrinsicDimensionality method run.
public Result run(Database database, Relation<O> relation) {
DBIDs allids = relation.getDBIDs();
// Number of samples to draw.
int ssize = (int) ((samples > 1.) ? samples : Math.ceil(samples * allids.size()));
// Number of neighbors to fetch (+ query point)
int kk = 1 + (int) ((krate > 1.) ? krate : Math.ceil(krate * allids.size()));
DBIDs sampleids = DBIDUtil.randomSample(allids, ssize, RandomFactory.DEFAULT);
DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnq = database.getKNNQuery(dq, kk);
double[] idim = new double[ssize];
int samples = 0;
for (DBIDIter iter = sampleids.iter(); iter.valid(); iter.advance()) {
idim[samples] = estimator.estimate(knnq, iter, kk);
++samples;
}
double id = (samples > 1) ? QuickSelect.median(idim, 0, samples) : -1;
LOG.statistics(new DoubleStatistic(EstimateIntrinsicDimensionality.class.getName() + ".intrinsic-dimensionality", id));
return null;
}
use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class RangeQuerySelectivity method run.
public Result run(Database database, Relation<V> relation) {
DistanceQuery<V> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<V> rangeQuery = database.getRangeQuery(distQuery, radius);
MeanVariance numres = new MeanVariance();
final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Performing range queries", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
numres.put(rangeQuery.getRangeForDBID(iter, radius).size());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
final String prefix = this.getClass().getName();
LOG.statistics(new DoubleStatistic(prefix + ".mean", numres.getMean()));
LOG.statistics(new DoubleStatistic(prefix + ".std", numres.getSampleStddev()));
LOG.statistics(new DoubleStatistic(prefix + ".norm.mean", numres.getMean() / relation.size()));
LOG.statistics(new DoubleStatistic(prefix + ".norm.std", numres.getSampleStddev() / relation.size()));
LOG.statistics(new LongStatistic(prefix + ".samplesize", ids.size()));
return null;
}
Aggregations