use of de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid in project elki by elki-project.
the class EvaluateVarianceRatioCriteria method evaluateClustering.
/**
* Evaluate a single clustering.
*
* @param db Database
* @param rel Data relation
* @param c Clustering
* @return Variance Ratio Criteria
*/
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
// FIXME: allow using a precomputed distance matrix!
final SquaredEuclideanDistanceFunction df = SquaredEuclideanDistanceFunction.STATIC;
List<? extends Cluster<?>> clusters = c.getAllClusters();
double vrc = 0.;
int ignorednoise = 0;
if (clusters.size() > 1) {
NumberVector[] centroids = new NumberVector[clusters.size()];
ignorednoise = EvaluateSimplifiedSilhouette.centroids(rel, clusters, centroids, noiseOption);
// Build global centroid and cluster count:
final int dim = RelationUtil.dimensionality(rel);
Centroid overallCentroid = new Centroid(dim);
int clustercount = globalCentroid(overallCentroid, rel, clusters, centroids, noiseOption);
// a: Distance to own centroid
// b: Distance to overall centroid
double a = 0, b = 0;
Iterator<? extends Cluster<?>> ci = clusters.iterator();
for (int i = 0; ci.hasNext(); i++) {
Cluster<?> cluster = ci.next();
if (cluster.size() <= 1 || cluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
// Ignored
continue;
case TREAT_NOISE_AS_SINGLETONS:
// Singletons: a = 0 by definition.
for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
b += df.distance(overallCentroid, rel.get(it));
}
// with NEXT cluster.
continue;
case MERGE_NOISE:
// Treat like a cluster below:
break;
}
}
for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
NumberVector vec = rel.get(it);
a += df.distance(centroids[i], vec);
b += df.distance(overallCentroid, vec);
}
}
vrc = ((b - a) / a) * ((rel.size() - clustercount) / (clustercount - 1.));
// Only if {@link NoiseHandling#IGNORE_NOISE}:
if (penalize && ignorednoise > 0) {
vrc *= (rel.size() - ignorednoise) / (double) rel.size();
}
}
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(key + ".vrc.noise-handling", noiseOption.toString()));
if (ignorednoise > 0) {
LOG.statistics(new LongStatistic(key + ".vrc.ignored", ignorednoise));
}
LOG.statistics(new DoubleStatistic(key + ".vrc", vrc));
}
EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
g.addMeasure("Variance Ratio Criteria", vrc, 0., 1., 0., false);
return vrc;
}
use of de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid in project elki by elki-project.
the class DependencyDerivator method run.
/**
* Computes quantitatively linear dependencies among the attributes of the
* given database based on a linear correlation PCA.
*
* @param database the database to run this DependencyDerivator on
* @param relation the relation to use
* @return the CorrelationAnalysisSolution computed by this
* DependencyDerivator
*/
public CorrelationAnalysisSolution<V> run(Database database, Relation<V> relation) {
if (LOG.isVerbose()) {
LOG.verbose("retrieving database objects...");
}
Centroid centroid = Centroid.make(relation, relation.getDBIDs());
NumberVector.Factory<V> factory = RelationUtil.getNumberVectorFactory(relation);
V centroidDV = factory.newNumberVector(centroid.getArrayRef());
DBIDs ids;
if (this.sampleSize > 0) {
if (randomsample) {
ids = DBIDUtil.randomSample(relation.getDBIDs(), this.sampleSize, RandomFactory.DEFAULT);
} else {
DistanceQuery<V> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNList queryResults = //
database.getKNNQuery(distanceQuery, this.sampleSize).getKNNForObject(centroidDV, this.sampleSize);
ids = DBIDUtil.newHashSet(queryResults);
}
} else {
ids = relation.getDBIDs();
}
return generateModel(relation, ids, centroid.getArrayRef());
}
use of de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid in project elki by elki-project.
the class AutotuningPCA method processIds.
@Override
public PCAResult processIds(DBIDs ids, Relation<? extends NumberVector> database) {
// Assume Euclidean distance. In the context of PCA, the neighborhood should
// be L2-spherical to be unbiased.
Centroid center = Centroid.make(database, ids);
ModifiableDoubleDBIDList dres = DBIDUtil.newDistanceDBIDList(ids.size());
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
final double dist = EuclideanDistanceFunction.STATIC.distance(center, database.get(iter));
dres.add(dist, iter);
}
dres.sort();
return processQueryResult(dres, database);
}
use of de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid in project elki by elki-project.
the class LinearDiscriminantAnalysisFilter method computeCentroids.
/**
* Compute the centroid for each class.
*
* @param dim Dimensionality
* @param vectorcolumn Vector column
* @param keys Key index
* @param classes Classes
* @return Centroids for each class.
*/
protected List<Centroid> computeCentroids(int dim, List<V> vectorcolumn, List<ClassLabel> keys, Map<ClassLabel, IntList> classes) {
final int numc = keys.size();
List<Centroid> centroids = new ArrayList<>(numc);
for (int i = 0; i < numc; i++) {
Centroid c = new Centroid(dim);
for (IntIterator it = classes.get(keys.get(i)).iterator(); it.hasNext(); ) {
c.put(vectorcolumn.get(it.nextInt()));
}
centroids.add(c);
}
return centroids;
}
use of de.lmu.ifi.dbs.elki.math.linearalgebra.Centroid in project elki by elki-project.
the class DiSH method findParent.
/**
* Returns the parent of the specified cluster
*
* @param relation the relation storing the objects
* @param child the child to search the parent for
* @param clustersMap the map containing the clusters
* @return the parent of the specified cluster
*/
private Pair<long[], ArrayModifiableDBIDs> findParent(Relation<V> relation, Pair<long[], ArrayModifiableDBIDs> child, Object2ObjectMap<long[], List<ArrayModifiableDBIDs>> clustersMap) {
Centroid child_centroid = ProjectedCentroid.make(child.first, relation, child.second);
Pair<long[], ArrayModifiableDBIDs> result = null;
int resultCardinality = -1;
long[] childPV = child.first;
int childCardinality = BitsUtil.cardinality(childPV);
for (long[] parentPV : clustersMap.keySet()) {
int parentCardinality = BitsUtil.cardinality(parentPV);
if (parentCardinality >= childCardinality) {
continue;
}
if (resultCardinality != -1 && parentCardinality <= resultCardinality) {
continue;
}
long[] pv = BitsUtil.andCMin(childPV, parentPV);
if (BitsUtil.equal(pv, parentPV)) {
List<ArrayModifiableDBIDs> parentList = clustersMap.get(parentPV);
for (ArrayModifiableDBIDs parent : parentList) {
NumberVector parent_centroid = ProjectedCentroid.make(parentPV, relation, parent);
double d = weightedDistance(child_centroid, parent_centroid, parentPV);
if (d <= 2 * epsilon) {
result = new Pair<>(parentPV, parent);
resultCardinality = parentCardinality;
break;
}
}
}
}
return result;
}
Aggregations