use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class NearestNeighborAffinityMatrixBuilder method computePij.
/**
* Compute the sparse pij using the nearest neighbors only.
*
* @param ids ID range
* @param knnq kNN query
* @param square Use squared distances
* @param numberOfNeighbours Number of neighbors to get
* @param pij Output of distances
* @param indices Output of indexes
* @param initialScale Initial scaling factor
*/
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
final double logPerp = FastMath.log(perplexity);
// Scratch arrays, resizable
DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
// Compute nearest-neighbor sparse affinity matrix
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
dists.clear();
inds.clear();
KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
convertNeighbors(ids, ix, square, neighbours, dists, inds);
double beta = computeSigma(//
ix.getOffset(), //
dists, //
perplexity, //
logPerp, pij[ix.getOffset()] = new double[dists.size()]);
if (mv != null) {
// Sigma
mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
}
indices[ix.getOffset()] = inds.toArray();
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// Sum of the sparse affinity matrix:
double sum = 0.;
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int j = 0; j < pij_i.length; j++) {
sum += pij_i[j];
}
}
final double scale = initialScale / (2 * sum);
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int offi = 0; offi < pij_i.length; offi++) {
int j = indices[i][offi];
assert (i != j);
int offj = containsIndex(indices[j], i);
if (offj >= 0) {
// Found
assert (indices[j][offj] == i);
// Exploit symmetry:
if (i < j) {
// Symmetrize
final double val = pij_i[offi] + pij[j][offj];
pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
}
} else {
// Not found
// TODO: the original code produces a symmetric matrix
// And it will now not sum to EARLY_EXAGGERATION anymore.
pij_i[offi] = MathUtil.max(pij_i[offi] * scale, MIN_PIJ);
}
}
}
if (LOG.isStatistics()) {
// timer != null, mv != null
LOG.statistics(timer.end());
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
}
}
use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class UKMeans method run.
/**
* Run the clustering.
*
* @param database the Database
* @param relation the Relation
* @return Clustering result
*/
public Clustering<?> run(final Database database, final Relation<DiscreteUncertainObject> relation) {
if (relation.size() <= 0) {
return new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
}
// Choose initial means randomly
DBIDs sampleids = DBIDUtil.randomSample(relation.getDBIDs(), k, rnd);
List<double[]> means = new ArrayList<>(k);
for (DBIDIter iter = sampleids.iter(); iter.valid(); iter.advance()) {
means.add(ArrayLikeUtil.toPrimitiveDoubleArray(relation.get(iter).getCenterOfMass()));
}
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] varsum = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("UK-Means iteration", LOG) : null;
DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum);
logVarstat(varstat, varsum);
// Stop if no cluster assignment changed.
if (!changed) {
break;
}
// Recompute means.
means = means(clusters, means, relation);
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<KMeansModel> result = new Clustering<>("Uk-Means Clustering", "ukmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.isEmpty()) {
continue;
}
result.addToplevelCluster(new Cluster<>(ids, new KMeansModel(means.get(i), varsum[i])));
}
return result;
}
use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class DBSCAN method run.
/**
* Performs the DBSCAN algorithm on the given database.
*/
public Clustering<Model> run(Relation<O> relation) {
final int size = relation.size();
if (size < minpts) {
Clustering<Model> result = new Clustering<>("DBSCAN Clustering", "dbscan-clustering");
result.addToplevelCluster(new Cluster<Model>(relation.getDBIDs(), true, ClusterModel.CLUSTER));
return result;
}
RangeQuery<O> rangeQuery = QueryUtil.getRangeQuery(relation, getDistanceFunction());
resultList = new ArrayList<>();
noise = DBIDUtil.newHashSet();
runDBSCAN(relation, rangeQuery);
double averagen = ncounter / (double) relation.size();
LOG.statistics(new DoubleStatistic(DBSCAN.class.getName() + ".average-neighbors", averagen));
if (averagen < 1 + 0.1 * (minpts - 1)) {
LOG.warning("There are very few neighbors found. Epsilon may be too small.");
}
if (averagen > 100 * minpts) {
LOG.warning("There are very many neighbors found. Epsilon may be too large.");
}
Clustering<Model> result = new Clustering<>("DBSCAN Clustering", "dbscan-clustering");
for (ModifiableDBIDs res : resultList) {
result.addToplevelCluster(new Cluster<Model>(res, ClusterModel.CLUSTER));
}
result.addToplevelCluster(new Cluster<Model>(noise, true, ClusterModel.CLUSTER));
return result;
}
use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class SimplifiedCoverTree method initialize.
@Override
public void initialize() {
bulkLoad(relation.getDBIDs());
if (LOG.isVerbose()) {
int[] counts = new int[5];
checkCoverTree(root, counts, 0);
LOG.statistics(new LongStatistic(this.getClass().getName() + ".nodes", counts[0]));
LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".avg-depth", counts[1] / (double) counts[0]));
LOG.statistics(new LongStatistic(this.getClass().getName() + ".max-depth", counts[2]));
LOG.statistics(new LongStatistic(this.getClass().getName() + ".singletons", counts[3]));
LOG.statistics(new LongStatistic(this.getClass().getName() + ".entries", counts[4]));
}
}
use of de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic in project elki by elki-project.
the class InMemoryInvertedIndex method logStatistics.
@Override
public void logStatistics() {
long count = 0L;
for (ModifiableDoubleDBIDList column : index) {
count += column.size();
}
double sparsity = count / (index.size() * (double) relation.size());
LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".sparsity", sparsity));
}
Aggregations