use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.
the class IndexTree method initialize.
/**
* Initializes the index.
*
* @param exampleLeaf an object that will be stored in the index
*/
protected final void initialize(E exampleLeaf) {
initializeCapacities(exampleLeaf);
// create empty root
createEmptyRoot(exampleLeaf);
final Logging log = getLogger();
if (log.isStatistics()) {
String cls = this.getClass().getName();
log.statistics(new LongStatistic(cls + ".directory.capacity", dirCapacity));
log.statistics(new LongStatistic(cls + ".directory.minfill", dirMinimum));
log.statistics(new LongStatistic(cls + ".leaf.capacity", leafCapacity));
log.statistics(new LongStatistic(cls + ".leaf.minfill", leafMinimum));
}
initialized = true;
}
use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.
the class HopkinsStatisticClusteringTendency method run.
/**
* Runs the algorithm in the timed evaluation part.
*
* @param database Database context
* @param relation Relation to analyze
*/
public Result run(Database database, Relation<NumberVector> relation) {
final int dim = RelationUtil.dimensionality(relation);
final DistanceQuery<NumberVector> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<NumberVector> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
final double[] min = new double[dim], extend = new double[dim];
initializeDataExtends(relation, dim, min, extend);
if (!LOG.isStatistics()) {
LOG.warning("This algorithm must be used with at least logging level " + Level.STATISTICS);
}
MeanVariance hmean = new MeanVariance(), umean = new MeanVariance(), wmean = new MeanVariance();
// more stable result
for (int j = 0; j < this.rep; j++) {
// Compute NN distances for random objects from within the database
double w = computeNNForRealData(knnQuery, relation, dim);
// Compute NN distances for randomly created new uniform objects
double u = computeNNForUniformData(knnQuery, min, extend);
// compute hopkins statistik
// = a / (1+a)
double h = u / (u + w);
hmean.put(h);
umean.put(u);
wmean.put(w);
}
final String prefix = this.getClass().getName();
LOG.statistics(new LongStatistic(prefix + ".samplesize", sampleSize));
LOG.statistics(new LongStatistic(prefix + ".dim", dim));
LOG.statistics(new LongStatistic(prefix + ".hopkins.nearest-neighbor", k));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.h.mean", hmean.getMean()));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.u.mean", umean.getMean()));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.w.mean", wmean.getMean()));
if (rep > 1) {
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.h.std", hmean.getSampleStddev()));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.u.std", umean.getSampleStddev()));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.w.std", wmean.getSampleStddev()));
}
// Evaluate:
double x = hmean.getMean();
// See Hopkins for a proof that x is supposedly Beta distributed.
double ix = BetaDistribution.regularizedIncBeta(x, sampleSize, sampleSize);
double p = (x > .5) ? (1. - ix) : ix;
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.p", p));
return null;
}
use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.
the class InMemoryIDistanceIndex method logStatistics.
@Override
public void logStatistics() {
super.logStatistics();
MeanVarianceMinMax mm = new MeanVarianceMinMax();
for (int i = 0; i < index.length; i++) {
mm.put(index[i].size());
}
LOG.statistics(new LongStatistic(InMemoryIDistanceIndex.class.getName() + ".size.min", (int) mm.getMin()));
LOG.statistics(new DoubleStatistic(InMemoryIDistanceIndex.class.getName() + ".size.mean", mm.getMean()));
LOG.statistics(new LongStatistic(InMemoryIDistanceIndex.class.getName() + ".size.max", (int) mm.getMax()));
}
use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.
the class KMeansBatchedLloyd method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
final int dim = RelationUtil.dimensionality(relation);
// Choose initial means
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initializer", initializer.toString()));
}
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
ArrayDBIDs[] parts = DBIDUtil.randomSplit(relation.getDBIDs(), blocks, random);
double[][] meanshift = new double[k][dim];
int[] changesize = new int[k];
double[] varsum = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
boolean changed = false;
FiniteProgress pprog = LOG.isVerbose() ? new FiniteProgress("Batch", parts.length, LOG) : null;
for (int p = 0; p < parts.length; p++) {
// Initialize new means scratch space.
for (int i = 0; i < k; i++) {
Arrays.fill(meanshift[i], 0.);
}
Arrays.fill(changesize, 0);
Arrays.fill(varsum, 0.);
changed |= assignToNearestCluster(relation, parts[p], means, meanshift, changesize, clusters, assignment, varsum);
// Recompute means.
updateMeans(means, meanshift, clusters, changesize);
LOG.incrementProcessed(pprog);
}
LOG.ensureCompleted(pprog);
logVarstat(varstat, varsum);
// Stop if no cluster assignment changed.
if (!changed) {
break;
}
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.size() == 0) {
continue;
}
KMeansModel model = new KMeansModel(means[i], varsum[i]);
result.addToplevelCluster(new Cluster<>(ids, model));
}
return result;
}
use of de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic in project elki by elki-project.
the class KMediansLloyd method run.
@Override
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
return new Clustering<>("k-Medians Clustering", "kmedians-clustering");
}
// Choose initial medians
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
}
double[][] medians = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] distsum = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Medians iteration", LOG) : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
boolean changed = assignToNearestCluster(relation, medians, clusters, assignment, distsum);
// Stop if no cluster assignment changed.
if (!changed) {
break;
}
// Recompute medians.
medians = medians(clusters, medians, relation);
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<MeanModel> result = new Clustering<>("k-Medians Clustering", "kmedians-clustering");
for (int i = 0; i < clusters.size(); i++) {
MeanModel model = new MeanModel(medians[i]);
result.addToplevelCluster(new Cluster<>(clusters.get(i), model));
}
return result;
}
Aggregations