use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class GaussianAffinityMatrixBuilder method computePij.
/**
* Compute the pij from the distance matrix.
*
* @param dist Distance matrix.
* @param sigma Kernel bandwidth sigma
* @param initialScale Initial scale
* @return Affinity matrix pij
*/
protected static double[][] computePij(double[][] dist, double sigma, double initialScale) {
final int size = dist.length;
final double msigmasq = -.5 / (sigma * sigma);
double[][] pij = new double[size][size];
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing affinities", size, LOG) : null;
Duration timer = LOG.isStatistics() ? LOG.newDuration(GaussianAffinityMatrixBuilder.class.getName() + ".runtime.pijmatrix").begin() : null;
MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
for (int i = 0; i < size; i++) {
double logP = computeH(i, dist[i], pij[i], msigmasq);
if (mv != null) {
mv.put(FastMath.exp(logP));
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
// timer != null, mv != null
LOG.statistics(timer.end());
LOG.statistics(new DoubleStatistic(GaussianAffinityMatrixBuilder.class.getName() + ".perplexity.average", mv.getMean()));
LOG.statistics(new DoubleStatistic(GaussianAffinityMatrixBuilder.class.getName() + ".perplexity.stddev", mv.getSampleStddev()));
}
// Scale pij to have the desired sum EARLY_EXAGGERATION
double sum = 0.;
for (int i = 1; i < size; i++) {
final double[] pij_i = pij[i];
for (int j = 0; j < i; j++) {
// Nur über halbe Matrix!
// Symmetrie herstellen
sum += (pij_i[j] += pij[j][i]);
}
}
// Scaling taken from original tSNE code:
final double scale = initialScale / (2. * sum);
for (int i = 1; i < size; i++) {
final double[] pij_i = pij[i];
for (int j = 0; j < i; j++) {
pij_i[j] = pij[j][i] = MathUtil.max(pij_i[j] * scale, MIN_PIJ);
}
}
return pij;
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class NearestNeighborAffinityMatrixBuilder method computePij.
/**
* Compute the sparse pij using the nearest neighbors only.
*
* @param ids ID range
* @param knnq kNN query
* @param square Use squared distances
* @param numberOfNeighbours Number of neighbors to get
* @param pij Output of distances
* @param indices Output of indexes
* @param initialScale Initial scaling factor
*/
protected void computePij(DBIDRange ids, KNNQuery<?> knnq, boolean square, int numberOfNeighbours, double[][] pij, int[][] indices, double initialScale) {
Duration timer = LOG.isStatistics() ? LOG.newDuration(this.getClass().getName() + ".runtime.neighborspijmatrix").begin() : null;
final double logPerp = FastMath.log(perplexity);
// Scratch arrays, resizable
DoubleArray dists = new DoubleArray(numberOfNeighbours + 10);
IntegerArray inds = new IntegerArray(numberOfNeighbours + 10);
// Compute nearest-neighbor sparse affinity matrix
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding neighbors and optimizing perplexity", ids.size(), LOG) : null;
MeanVariance mv = LOG.isStatistics() ? new MeanVariance() : null;
for (DBIDArrayIter ix = ids.iter(); ix.valid(); ix.advance()) {
dists.clear();
inds.clear();
KNNList neighbours = knnq.getKNNForDBID(ix, numberOfNeighbours + 1);
convertNeighbors(ids, ix, square, neighbours, dists, inds);
double beta = computeSigma(//
ix.getOffset(), //
dists, //
perplexity, //
logPerp, pij[ix.getOffset()] = new double[dists.size()]);
if (mv != null) {
// Sigma
mv.put(beta > 0 ? FastMath.sqrt(.5 / beta) : 0.);
}
indices[ix.getOffset()] = inds.toArray();
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// Sum of the sparse affinity matrix:
double sum = 0.;
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int j = 0; j < pij_i.length; j++) {
sum += pij_i[j];
}
}
final double scale = initialScale / (2 * sum);
for (int i = 0; i < pij.length; i++) {
final double[] pij_i = pij[i];
for (int offi = 0; offi < pij_i.length; offi++) {
int j = indices[i][offi];
assert (i != j);
int offj = containsIndex(indices[j], i);
if (offj >= 0) {
// Found
assert (indices[j][offj] == i);
// Exploit symmetry:
if (i < j) {
// Symmetrize
final double val = pij_i[offi] + pij[j][offj];
pij_i[offi] = pij[j][offj] = MathUtil.max(val * scale, MIN_PIJ);
}
} else {
// Not found
// TODO: the original code produces a symmetric matrix
// And it will now not sum to EARLY_EXAGGERATION anymore.
pij_i[offi] = MathUtil.max(pij_i[offi] * scale, MIN_PIJ);
}
}
}
if (LOG.isStatistics()) {
// timer != null, mv != null
LOG.statistics(timer.end());
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.average", mv.getMean()));
LOG.statistics(new DoubleStatistic(NearestNeighborAffinityMatrixBuilder.class.getName() + ".sigma.stddev", mv.getSampleStddev()));
}
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class DistanceStddevOutlier method run.
/**
* Run the outlier detection algorithm
*
* @param database Database to use
* @param relation Relation to analyze
* @return Outlier score result
*/
public OutlierResult run(Database database, Relation<O> relation) {
// Get a nearest neighbor query on the relation.
KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k);
// Output data storage
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
// Track minimum and maximum scores
DoubleMinMax minmax = new DoubleMinMax();
// Iterate over all objects
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
KNNList neighbors = knnq.getKNNForDBID(iter, k);
// Aggregate distances
MeanVariance mv = new MeanVariance();
for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// Skip the object itself. The 0 is not very informative.
if (DBIDUtil.equal(iter, neighbor)) {
continue;
}
mv.put(neighbor.doubleValue());
}
// Store score
scores.putDouble(iter, mv.getSampleStddev());
}
// Wrap the result in the standard containers
// Actual min-max, theoretical min-max!
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
DoubleRelation rel = new MaterializedDoubleRelation(relation.getDBIDs(), "stddev-outlier", scores);
return new OutlierResult(meta, rel);
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class IndexPurity method processNewResult.
@Override
public void processNewResult(ResultHierarchy hier, Result newResult) {
Database database = ResultUtil.findDatabase(hier);
final ArrayList<SpatialIndexTree<?, ?>> indexes = ResultUtil.filterResults(hier, newResult, SpatialIndexTree.class);
if (indexes == null || indexes.isEmpty()) {
return;
}
Relation<String> lblrel = DatabaseUtil.guessLabelRepresentation(database);
for (SpatialIndexTree<?, ?> index : indexes) {
List<? extends SpatialEntry> leaves = index.getLeaves();
MeanVariance mv = new MeanVariance();
for (SpatialEntry e : leaves) {
SpatialDirectoryEntry leaf = (SpatialDirectoryEntry) e;
Node<?> n = index.getNode(leaf.getPageID());
final int total = n.getNumEntries();
HashMap<String, Integer> map = new HashMap<>(total);
for (int i = 0; i < total; i++) {
DBID id = ((SpatialPointLeafEntry) n.getEntry(i)).getDBID();
String label = lblrel.get(id);
Integer val = map.get(label);
if (val == null) {
val = 1;
} else {
val += 1;
}
map.put(label, val);
}
double gini = 0.0;
for (Entry<String, Integer> ent : map.entrySet()) {
double rel = ent.getValue() / (double) total;
gini += rel * rel;
}
mv.put(gini);
}
Collection<double[]> col = new ArrayList<>();
col.add(new double[] { mv.getMean(), mv.getSampleStddev() });
database.getHierarchy().add((Result) index, new CollectionResult<>("Gini coefficient of index", "index-gini", col));
}
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class HopkinsStatisticClusteringTendency method run.
/**
* Runs the algorithm in the timed evaluation part.
*
* @param database Database context
* @param relation Relation to analyze
*/
public Result run(Database database, Relation<NumberVector> relation) {
final int dim = RelationUtil.dimensionality(relation);
final DistanceQuery<NumberVector> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<NumberVector> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
final double[] min = new double[dim], extend = new double[dim];
initializeDataExtends(relation, dim, min, extend);
if (!LOG.isStatistics()) {
LOG.warning("This algorithm must be used with at least logging level " + Level.STATISTICS);
}
MeanVariance hmean = new MeanVariance(), umean = new MeanVariance(), wmean = new MeanVariance();
// more stable result
for (int j = 0; j < this.rep; j++) {
// Compute NN distances for random objects from within the database
double w = computeNNForRealData(knnQuery, relation, dim);
// Compute NN distances for randomly created new uniform objects
double u = computeNNForUniformData(knnQuery, min, extend);
// compute hopkins statistik
// = a / (1+a)
double h = u / (u + w);
hmean.put(h);
umean.put(u);
wmean.put(w);
}
final String prefix = this.getClass().getName();
LOG.statistics(new LongStatistic(prefix + ".samplesize", sampleSize));
LOG.statistics(new LongStatistic(prefix + ".dim", dim));
LOG.statistics(new LongStatistic(prefix + ".hopkins.nearest-neighbor", k));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.h.mean", hmean.getMean()));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.u.mean", umean.getMean()));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.w.mean", wmean.getMean()));
if (rep > 1) {
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.h.std", hmean.getSampleStddev()));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.u.std", umean.getSampleStddev()));
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.w.std", wmean.getSampleStddev()));
}
// Evaluate:
double x = hmean.getMean();
// See Hopkins for a proof that x is supposedly Beta distributed.
double ix = BetaDistribution.regularizedIncBeta(x, sampleSize, sampleSize);
double p = (x > .5) ? (1. - ix) : ix;
LOG.statistics(new DoubleStatistic(prefix + ".hopkins.p", p));
return null;
}
Aggregations