use of de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid in project elki by elki-project.
the class NaiveMeanShiftClustering method run.
/**
* Run the mean-shift clustering algorithm.
*
* @param database Database
* @param relation Data relation
* @return Clustering result
*/
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
final DistanceQuery<V> distq = database.getDistanceQuery(relation, getDistanceFunction());
final RangeQuery<V> rangeq = database.getRangeQuery(distq);
final NumberVector.Factory<V> factory = RelationUtil.getNumberVectorFactory(relation);
final int dim = RelationUtil.dimensionality(relation);
// Stopping threshold
final double threshold = bandwidth * 1E-10;
// Result store:
ArrayList<Pair<V, ModifiableDBIDs>> clusters = new ArrayList<>();
ModifiableDBIDs noise = DBIDUtil.newArray();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Mean-shift clustering", relation.size(), LOG) : null;
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
// Initial position:
V position = relation.get(iter);
iterations: for (int j = 1; j <= MAXITER; j++) {
// Compute new position:
V newvec = null;
{
DoubleDBIDList neigh = rangeq.getRangeForObject(position, bandwidth);
boolean okay = (neigh.size() > 1) || (neigh.size() >= 1 && j > 1);
if (okay) {
Centroid newpos = new Centroid(dim);
for (DoubleDBIDListIter niter = neigh.iter(); niter.valid(); niter.advance()) {
final double weight = kernel.density(niter.doubleValue() / bandwidth);
newpos.put(relation.get(niter), weight);
}
newvec = factory.newNumberVector(newpos.getArrayRef());
// TODO: detect 0 weight!
}
if (!okay) {
noise.add(iter);
break iterations;
}
}
// Test if we are close to one of the known clusters:
double bestd = Double.POSITIVE_INFINITY;
Pair<V, ModifiableDBIDs> bestp = null;
for (Pair<V, ModifiableDBIDs> pair : clusters) {
final double merged = distq.distance(newvec, pair.first);
if (merged < bestd) {
bestd = merged;
bestp = pair;
}
}
// Check for convergence:
double delta = distq.distance(position, newvec);
if (bestd < 10 * threshold || bestd * 2 < delta) {
bestp.second.add(iter);
break iterations;
}
if (j == MAXITER) {
LOG.warning("No convergence after " + MAXITER + " iterations. Distance: " + delta);
}
if (Double.isNaN(delta)) {
LOG.warning("Encountered NaN distance. Invalid center vector? " + newvec.toString());
break iterations;
}
if (j == MAXITER || delta < threshold) {
if (LOG.isDebuggingFine()) {
LOG.debugFine("New cluster:" + newvec + " delta: " + delta + " threshold: " + threshold + " bestd: " + bestd);
}
ArrayModifiableDBIDs cids = DBIDUtil.newArray();
cids.add(iter);
clusters.add(new Pair<V, ModifiableDBIDs>(newvec, cids));
break iterations;
}
position = newvec;
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
ArrayList<Cluster<MeanModel>> cs = new ArrayList<>(clusters.size());
for (Pair<V, ModifiableDBIDs> pair : clusters) {
cs.add(new Cluster<>(pair.second, new MeanModel(pair.first.toArray())));
}
if (noise.size() > 0) {
cs.add(new Cluster<MeanModel>(noise, true));
}
Clustering<MeanModel> c = new Clustering<>("Mean-shift Clustering", "mean-shift-clustering", cs);
return c;
}
use of de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid in project elki by elki-project.
the class EvaluatePBMIndex method evaluateClustering.
/**
* Evaluate a single clustering.
*
* @param db Database
* @param rel Data relation
* @param c Clustering
* @return PBM
*/
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
List<? extends Cluster<?>> clusters = c.getAllClusters();
NumberVector[] centroids = new NumberVector[clusters.size()];
int ignorednoise = EvaluateSimplifiedSilhouette.centroids(rel, clusters, centroids, noiseHandling);
// Build global centroid and cluster count:
final int dim = RelationUtil.dimensionality(rel);
Centroid overallCentroid = new Centroid(dim);
EvaluateVarianceRatioCriteria.globalCentroid(overallCentroid, rel, clusters, centroids, noiseHandling);
// Maximum distance between centroids:
double max = 0;
for (int i = 0; i < centroids.length; i++) {
if (centroids[i] == null && noiseHandling != NoiseHandling.TREAT_NOISE_AS_SINGLETONS) {
continue;
}
for (int j = i + 1; j < centroids.length; j++) {
if (centroids[j] == null && noiseHandling != NoiseHandling.TREAT_NOISE_AS_SINGLETONS) {
continue;
}
if (centroids[i] == null && centroids[j] == null) {
// Need to compute pairwise distances of noise clusters.
for (DBIDIter iti = clusters.get(i).getIDs().iter(); iti.valid(); iti.advance()) {
for (DBIDIter itj = clusters.get(j).getIDs().iter(); itj.valid(); itj.advance()) {
double dist = distanceFunction.distance(rel.get(iti), rel.get(itj));
max = dist > max ? dist : max;
}
}
} else if (centroids[i] == null) {
for (DBIDIter iti = clusters.get(i).getIDs().iter(); iti.valid(); iti.advance()) {
double dist = distanceFunction.distance(rel.get(iti), centroids[j]);
max = dist > max ? dist : max;
}
} else if (centroids[j] == null) {
for (DBIDIter itj = clusters.get(j).getIDs().iter(); itj.valid(); itj.advance()) {
double dist = distanceFunction.distance(centroids[i], rel.get(itj));
max = dist > max ? dist : max;
}
} else {
double dist = distanceFunction.distance(centroids[i], centroids[j]);
max = dist > max ? dist : max;
}
}
}
// a: Distance to own centroid
// b: Distance to overall centroid
double a = 0, b = 0;
Iterator<? extends Cluster<?>> ci = clusters.iterator();
for (int i = 0; ci.hasNext(); i++) {
Cluster<?> cluster = ci.next();
if (cluster.size() <= 1 || cluster.isNoise()) {
switch(noiseHandling) {
case IGNORE_NOISE:
// Ignored
continue;
case TREAT_NOISE_AS_SINGLETONS:
// Singletons: a = 0 by definition.
for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
b += SquaredEuclideanDistanceFunction.STATIC.distance(overallCentroid, rel.get(it));
}
// with NEXT cluster.
continue;
case MERGE_NOISE:
// Treat like a cluster below:
break;
}
}
for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
NumberVector obj = rel.get(it);
a += distanceFunction.distance(centroids[i], obj);
b += distanceFunction.distance(overallCentroid, obj);
}
}
final double pbm = FastMath.pow((1. / centroids.length) * (b / a) * max, 2.);
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(key + ".pbm.noise-handling", noiseHandling.toString()));
if (ignorednoise > 0) {
LOG.statistics(new LongStatistic(key + ".pbm.ignored", ignorednoise));
}
LOG.statistics(new DoubleStatistic(key + ".pbm", pbm));
}
EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
g.addMeasure("PBM-Index", pbm, 0., Double.POSITIVE_INFINITY, 0., false);
db.getHierarchy().resultChanged(ev);
return pbm;
}
use of de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid in project elki by elki-project.
the class WeightedCovarianceMatrixBuilder method processIds.
/**
* Weighted Covariance Matrix for a set of IDs. Since we are not supplied any
* distance information, we'll need to compute it ourselves. Covariance is
* tied to Euclidean distance, so it probably does not make much sense to add
* support for other distance functions?
*
* @param ids Database ids to process
* @param relation Relation to process
* @return Covariance matrix
*/
@Override
public double[][] processIds(DBIDs ids, Relation<? extends NumberVector> relation) {
final int dim = RelationUtil.dimensionality(relation);
final CovarianceMatrix cmat = new CovarianceMatrix(dim);
final Centroid centroid = Centroid.make(relation, ids);
// find maximum distance
double maxdist = 0.0, stddev = 0.0;
{
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double distance = weightDistance.distance(centroid, relation.get(iter));
stddev += distance * distance;
if (distance > maxdist) {
maxdist = distance;
}
}
if (maxdist == 0.0) {
maxdist = 1.0;
}
// compute standard deviation.
stddev = FastMath.sqrt(stddev / ids.size());
}
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
NumberVector obj = relation.get(iter);
double distance = weightDistance.distance(centroid, obj);
double weight = weightfunction.getWeight(distance, maxdist, stddev);
cmat.put(obj, weight);
}
return cmat.destroyToPopulationMatrix();
}
use of de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid in project elki by elki-project.
the class LinearDiscriminantAnalysisFilter method computeProjectionMatrix.
@Override
protected double[][] computeProjectionMatrix(List<V> vectorcolumn, List<? extends ClassLabel> classcolumn, int dim) {
Map<ClassLabel, IntList> classes = partition(classcolumn);
// Fix indexing of classes:
List<ClassLabel> keys = new ArrayList<>(classes.keySet());
// Compute centroids:
List<Centroid> centroids = computeCentroids(dim, vectorcolumn, keys, classes);
final double[][] sigmaB, sigmaI;
// Between classes covariance:
{
CovarianceMatrix covmake = new CovarianceMatrix(dim);
for (Centroid c : centroids) {
covmake.put(c);
}
sigmaB = covmake.destroyToSampleMatrix();
}
{
// (Average) within class variance:
CovarianceMatrix covmake = new CovarianceMatrix(dim);
int numc = keys.size();
for (int i = 0; i < numc; i++) {
double[] c = centroids.get(i).getArrayRef();
// TODO: different weighting strategies? Sampling?
for (IntIterator it = classes.get(keys.get(i)).iterator(); it.hasNext(); ) {
covmake.put(minusEquals(vectorcolumn.get(it.nextInt()).toArray(), c));
}
}
sigmaI = covmake.destroyToSampleMatrix();
if (new LUDecomposition(sigmaI).det() == 0) {
for (int i = 0; i < dim; i++) {
sigmaI[i][i] += 1e-10;
}
}
}
double[][] sol = times(inverse(sigmaI), sigmaB);
EigenvalueDecomposition decomp = new EigenvalueDecomposition(sol);
SortedEigenPairs sorted = new SortedEigenPairs(decomp, false);
return transpose(sorted.eigenVectors(tdim));
}
Aggregations