use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class CoverTree method initialize.
@Override
public void initialize() {
bulkLoad(relation.getDBIDs());
if (LOG.isVerbose()) {
int[] counts = new int[5];
checkCoverTree(root, counts, 0);
LOG.statistics(new LongStatistic(this.getClass().getName() + ".nodes", counts[0]));
LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".avg-depth", counts[1] / (double) counts[0]));
LOG.statistics(new LongStatistic(this.getClass().getName() + ".max-depth", counts[2]));
LOG.statistics(new LongStatistic(this.getClass().getName() + ".singletons", counts[3]));
LOG.statistics(new LongStatistic(this.getClass().getName() + ".entries", counts[4]));
}
}
use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class EvaluateRetrievalPerformance method run.
/**
* Run the algorithm
*
* @param database Database to run on (for kNN queries)
* @param relation Relation for distance computations
* @param lrelation Relation for class label comparison
* @return Vectors containing mean and standard deviation.
*/
public RetrievalPerformanceResult run(Database database, Relation<O> relation, Relation<?> lrelation) {
final DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final DBIDs ids = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
// For storing the positive neighbors.
ModifiableDBIDs posn = DBIDUtil.newHashSet();
// Distance storage.
ModifiableDoubleDBIDList nlist = DBIDUtil.newDistanceDBIDList(relation.size());
// For counting labels seen in kNN
Object2IntOpenHashMap<Object> counters = new Object2IntOpenHashMap<>();
// Statistics tracking
double map = 0., mroc = 0.;
double[] knnperf = new double[maxk];
int samples = 0;
FiniteProgress objloop = LOG.isVerbose() ? new FiniteProgress("Processing query objects", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
Object label = lrelation.get(iter);
findMatches(posn, lrelation, label);
if (posn.size() > 0) {
computeDistances(nlist, iter, distQuery, relation);
if (nlist.size() != relation.size() - (includeSelf ? 0 : 1)) {
LOG.warning("Neighbor list does not have the desired size: " + nlist.size());
}
map += AveragePrecisionEvaluation.STATIC.evaluate(posn, nlist);
mroc += ROCEvaluation.STATIC.evaluate(posn, nlist);
KNNEvaluator.STATIC.evaluateKNN(knnperf, nlist, lrelation, counters, label);
samples += 1;
}
LOG.incrementProcessed(objloop);
}
LOG.ensureCompleted(objloop);
if (samples < 1) {
throw new AbortException("No object matched - are labels parsed correctly?");
}
if (!(map >= 0) || !(mroc >= 0)) {
throw new AbortException("NaN in MAP/ROC.");
}
map /= samples;
mroc /= samples;
LOG.statistics(new DoubleStatistic(PREFIX + ".map", map));
LOG.statistics(new DoubleStatistic(PREFIX + ".rocauc", mroc));
LOG.statistics(new DoubleStatistic(PREFIX + ".samples", samples));
for (int k = 0; k < maxk; k++) {
knnperf[k] = knnperf[k] / samples;
LOG.statistics(new DoubleStatistic(PREFIX + ".knn-" + (k + 1), knnperf[k]));
}
return new RetrievalPerformanceResult(samples, map, mroc, knnperf);
}
use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class HopkinsStatisticClusteringTendency method run.
/**
* Runs the algorithm in the timed evaluation part.
*
* @param database Database context
* @param relation Relation to analyze
*/
public Result run(Database database, Relation<NumberVector> relation) {
final int dim = RelationUtil.dimensionality(relation);
final DistanceQuery<NumberVector> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<NumberVector> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
final double[] min = new double[dim], extend = new double[dim];
initializeDataExtends(relation, dim, min, extend);
if (!LOG.isStatistics()) {
LOG.warning("This algorithm must be used with at least logging level " + Level.STATISTICS);
}
MeanVariance hmean = new MeanVariance(), umean = new MeanVariance(), wmean = new MeanVariance();
// more stable result
for (int j = 0; j < this.rep; j++) {
// Compute NN distances for random objects from within the database
double w = computeNNForRealData(knnQuery, relation, dim);
// Compute NN distances for randomly created new uniform objects
double u = computeNNForUniformData(knnQuery, min, extend);
// compute hopkins statistik
// = a / (1+a)
double h = u / (u + w);
hmean.put(h);
umean.put(u);
wmean.put(w);
}
final String prefix = this.getClass().getName();
LOG.statistics(new LongStatistic(prefix + ".samplesize", sampleSize));
LOG.statistics(new LongStatistic(prefix + ".dim", dim));
LOG.statistics(new LongStatistic(prefix + ".hopkins.nearest-neighbor", k));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.h.mean", hmean.getMean()));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.u.mean", umean.getMean()));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.w.mean", wmean.getMean()));
if (rep > 1) {
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.h.std", hmean.getSampleStddev()));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.u.std", umean.getSampleStddev()));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.w.std", wmean.getSampleStddev()));
}
// Evaluate:
double x = hmean.getMean();
// See Hopkins for a proof that x is supposedly Beta distributed.
double ix = BetaDistribution.regularizedIncBeta(x, sampleSize, sampleSize);
double p = (x > .5) ? (1. - ix) : ix;
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.p", p));
return null;
}
use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class InMemoryIDistanceIndex method logStatistics.
@Override
public void logStatistics() {
super.logStatistics();
MeanVarianceMinMax mm = new MeanVarianceMinMax();
for (int i = 0; i < index.length; i++) {
mm.put(index[i].size());
}
LOG.statistics(new LongStatistic(InMemoryIDistanceIndex.class.getName() + ".size.min", (int) mm.getMin()));
LOG.statistics(new DoubleStatistic(InMemoryIDistanceIndex.class.getName() + ".size.mean", mm.getMean()));
LOG.statistics(new LongStatistic(InMemoryIDistanceIndex.class.getName() + ".size.max", (int) mm.getMax()));
}
use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class KMeansBatchedLloyd method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
final int dim = RelationUtil.dimensionality(relation);
// Choose initial means
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initializer", initializer.toString()));
}
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), blocks, random);
double[][] meanshift = new double[k][dim];
int[] changesize = new int[k];
double[] varsum = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
boolean changed = false;
FiniteProgress pprog = LOG.isVerbose() ? new FiniteProgress("Batch", parts.length, LOG) : null;
for (int p = 0; p < parts.length; p++) {
// Initialize new means scratch space.
for (int i = 0; i < k; i++) {
Arrays.fill(meanshift[i], 0.);
}
Arrays.fill(changesize, 0);
Arrays.fill(varsum, 0.);
changed |= assignToNearestCluster(relation, parts[p], means, meanshift, changesize, clusters, assignment, varsum);
// Recompute means.
updateMeans(means, meanshift, clusters, changesize);
LOG.incrementProcessed(pprog);
}
LOG.ensureCompleted(pprog);
logVarstat(varstat, varsum);
// Stop if no cluster assignment changed.
if (!changed) {
break;
}
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.size() == 0) {
continue;
}
KMeansModel model = new KMeansModel(means[i], varsum[i]);
result.addToplevelCluster(new Cluster<>(ids, model));
}
return result;
}
Aggregations