use of de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction in project elki by elki-project.
the class EvaluateVarianceRatioCriteria method evaluateClustering.
/**
* Evaluate a single clustering.
*
* @param db Database
* @param rel Data relation
* @param c Clustering
* @return Variance Ratio Criteria
*/
public double evaluateClustering(Database db, Relation<? extends NumberVector> rel, Clustering<?> c) {
// FIXME: allow using a precomputed distance matrix!
final SquaredEuclideanDistanceFunction df = SquaredEuclideanDistanceFunction.STATIC;
List<? extends Cluster<?>> clusters = c.getAllClusters();
double vrc = 0.;
int ignorednoise = 0;
if (clusters.size() > 1) {
NumberVector[] centroids = new NumberVector[clusters.size()];
ignorednoise = EvaluateSimplifiedSilhouette.centroids(rel, clusters, centroids, noiseOption);
// Build global centroid and cluster count:
final int dim = RelationUtil.dimensionality(rel);
Centroid overallCentroid = new Centroid(dim);
int clustercount = globalCentroid(overallCentroid, rel, clusters, centroids, noiseOption);
// a: Distance to own centroid
// b: Distance to overall centroid
double a = 0, b = 0;
Iterator<? extends Cluster<?>> ci = clusters.iterator();
for (int i = 0; ci.hasNext(); i++) {
Cluster<?> cluster = ci.next();
if (cluster.size() <= 1 || cluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
// Ignored
continue;
case TREAT_NOISE_AS_SINGLETONS:
// Singletons: a = 0 by definition.
for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
b += df.distance(overallCentroid, rel.get(it));
}
// with NEXT cluster.
continue;
case MERGE_NOISE:
// Treat like a cluster below:
break;
}
}
for (DBIDIter it = cluster.getIDs().iter(); it.valid(); it.advance()) {
NumberVector vec = rel.get(it);
a += df.distance(centroids[i], vec);
b += df.distance(overallCentroid, vec);
}
}
vrc = ((b - a) / a) * ((rel.size() - clustercount) / (clustercount - 1.));
// Only if {@link NoiseHandling#IGNORE_NOISE}:
if (penalize && ignorednoise > 0) {
vrc *= (rel.size() - ignorednoise) / (double) rel.size();
}
}
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(key + ".vrc.noise-handling", noiseOption.toString()));
if (ignorednoise > 0) {
LOG.statistics(new LongStatistic(key + ".vrc.ignored", ignorednoise));
}
LOG.statistics(new DoubleStatistic(key + ".vrc", vrc));
}
EvaluationResult ev = EvaluationResult.findOrCreate(db.getHierarchy(), c, "Internal Clustering Evaluation", "internal evaluation");
MeasurementGroup g = ev.findOrCreateGroup("Distance-based Evaluation");
g.addMeasure("Variance Ratio Criteria", vrc, 0., 1., 0., false);
return vrc;
}
use of de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction in project elki by elki-project.
the class PassingDataToELKI method main.
/**
* Main method
*
* @param args Command line parameters (not supported)
*/
public static void main(String[] args) {
// Set the logging level to statistics:
LoggingConfiguration.setStatistics();
// Generate a random data set.
// Note: ELKI has a nice data generator class, use that instead.
double[][] data = new double[1000][2];
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[i].length; j++) {
data[i][j] = Math.random();
}
}
// Adapter to load data from an existing array.
DatabaseConnection dbc = new ArrayAdapterDatabaseConnection(data);
// Create a database (which may contain multiple relations!)
Database db = new StaticArrayDatabase(dbc, null);
// Load the data into the database (do NOT forget to initialize...)
db.initialize();
// Relation containing the number vectors:
Relation<NumberVector> rel = db.getRelation(TypeUtil.NUMBER_VECTOR_FIELD);
// We know that the ids must be a continuous range:
DBIDRange ids = (DBIDRange) rel.getDBIDs();
// K-means should be used with squared Euclidean (least squares):
SquaredEuclideanDistanceFunction dist = SquaredEuclideanDistanceFunction.STATIC;
// Default initialization, using global random:
// To fix the random seed, use: new RandomFactory(seed);
RandomlyGeneratedInitialMeans init = new RandomlyGeneratedInitialMeans(RandomFactory.DEFAULT);
// Textbook k-means clustering:
KMeansLloyd<NumberVector> km = new //
KMeansLloyd<>(//
dist, //
3, /* k - number of partitions */
0, /* maximum number of iterations: no limit */
init);
// K-means will automatically choose a numerical relation from the data set:
// But we could make it explicit (if there were more than one numeric
// relation!): km.run(db, rel);
Clustering<KMeansModel> c = km.run(db);
// Output all clusters:
int i = 0;
for (Cluster<KMeansModel> clu : c.getAllClusters()) {
// K-means will name all clusters "Cluster" in lack of noise support:
System.out.println("#" + i + ": " + clu.getNameAutomatic());
System.out.println("Size: " + clu.size());
System.out.println("Center: " + clu.getModel().getPrototype().toString());
// Iterate over objects:
System.out.print("Objects: ");
for (DBIDIter it = clu.getIDs().iter(); it.valid(); it.advance()) {
// To get the vector use:
// NumberVector v = rel.get(it);
// Offset within our DBID range: "line number"
final int offset = ids.getOffset(it);
System.out.print(" " + offset);
// Do NOT rely on using "internalGetIndex()" directly!
}
System.out.println();
++i;
}
}
use of de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction in project elki by elki-project.
the class UKMeans method getExpectedRepDistance.
/**
* Get expected distance between a Vector and an uncertain object
*
* @param rep A vector, e.g. a cluster representative
* @param uo A discrete uncertain object
* @return The distance
*/
protected double getExpectedRepDistance(NumberVector rep, DiscreteUncertainObject uo) {
SquaredEuclideanDistanceFunction euclidean = SquaredEuclideanDistanceFunction.STATIC;
int counter = 0;
double sum = 0.0;
for (int i = 0; i < uo.getNumberSamples(); i++) {
sum += euclidean.distance(rep, uo.getSample(i));
counter++;
}
return sum / counter;
}
use of de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction in project elki by elki-project.
the class LinearScanEuclideanDistanceKNNQuery method linearScan.
/**
* Main loop of the linear scan.
*
* @param relation Data relation
* @param iter ID iterator
* @param obj Query object
* @param heap Output heap
* @return Heap
*/
private KNNHeap linearScan(Relation<? extends O> relation, DBIDIter iter, final O obj, KNNHeap heap) {
final SquaredEuclideanDistanceFunction squared = SquaredEuclideanDistanceFunction.STATIC;
double max = Double.POSITIVE_INFINITY;
while (iter.valid()) {
final double dist = squared.distance(obj, relation.get(iter));
if (dist <= max) {
max = heap.insert(dist, iter);
}
iter.advance();
}
return heap;
}
use of de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction in project elki by elki-project.
the class LinearScanEuclideanDistanceKNNQuery method linearScanBatchKNN.
/**
* Perform a linear scan batch kNN for primitive distance functions.
*
* @param objs Objects list
* @param heaps Heaps array
*/
@Override
protected void linearScanBatchKNN(List<O> objs, List<KNNHeap> heaps) {
final SquaredEuclideanDistanceFunction squared = SquaredEuclideanDistanceFunction.STATIC;
final Relation<? extends O> relation = getRelation();
final int size = objs.size();
// Linear scan style KNN.
for (DBIDIter iter = relation.getDBIDs().iter(); iter.valid(); iter.advance()) {
O candidate = relation.get(iter);
for (int index = 0; index < size; index++) {
final KNNHeap heap = heaps.get(index);
final double dist = squared.distance(objs.get(index), candidate);
if (dist <= heap.getKNNDistance()) {
heap.insert(dist, iter);
}
}
}
}
Aggregations