use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class ORCLUS method projectedEnergy.
/**
* Computes the projected energy of the specified clusters. The projected
* energy is given by the mean square distance of the points to the centroid
* of the union cluster c, when all points in c are projected to the subspace
* of c.
*
* @param relation the relation holding the objects
* @param c_i the first cluster
* @param c_j the second cluster
* @param i the index of cluster c_i in the cluster list
* @param j the index of cluster c_j in the cluster list
* @param dim the dimensionality of the clusters
* @return the projected energy of the specified cluster
*/
private ProjectedEnergy projectedEnergy(Relation<V> relation, ORCLUSCluster c_i, ORCLUSCluster c_j, int i, int j, int dim) {
NumberVectorDistanceFunction<? super V> distFunc = SquaredEuclideanDistanceFunction.STATIC;
// union of cluster c_i and c_j
ORCLUSCluster c_ij = union(relation, c_i, c_j, dim);
double sum = 0.;
NumberVector c_proj = DoubleVector.wrap(project(c_ij, c_ij.centroid));
for (DBIDIter iter = c_ij.objectIDs.iter(); iter.valid(); iter.advance()) {
NumberVector o_proj = DoubleVector.wrap(project(c_ij, relation.get(iter).toArray()));
sum += distFunc.distance(o_proj, c_proj);
}
sum /= c_ij.objectIDs.size();
return new ProjectedEnergy(i, j, c_ij, sum);
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class AbstractKMeans method macQueenIterate.
/**
* Perform a MacQueen style iteration.
*
* @param relation Relation
* @param means Means
* @param clusters Clusters
* @param assignment Current cluster assignment
* @param varsum Variance sum output
* @return true when the means have changed
*/
protected boolean macQueenIterate(Relation<? extends NumberVector> relation, double[][] means, List<ModifiableDBIDs> clusters, WritableIntegerDataStore assignment, double[] varsum) {
boolean changed = false;
Arrays.fill(varsum, 0.);
// Raw distance function
final NumberVectorDistanceFunction<?> df = getDistanceFunction();
// Incremental update
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double mindist = Double.POSITIVE_INFINITY;
NumberVector fv = relation.get(iditer);
int minIndex = 0;
for (int i = 0; i < k; i++) {
double dist = df.distance(fv, DoubleVector.wrap(means[i]));
if (dist < mindist) {
minIndex = i;
mindist = dist;
}
}
varsum[minIndex] += mindist;
changed |= updateMeanAndAssignment(clusters, means, minIndex, fv, iditer, assignment);
}
return changed;
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class AbstractKMeans method assignToNearestCluster.
/**
* Returns a list of clusters. The k<sup>th</sup> cluster contains the ids of
* those FeatureVectors, that are nearest to the k<sup>th</sup> mean.
*
* @param relation the database to cluster
* @param means a list of k means
* @param clusters cluster assignment
* @param assignment Current cluster assignment
* @param varsum Variance sum output
* @return true when the object was reassigned
*/
protected boolean assignToNearestCluster(Relation<? extends NumberVector> relation, double[][] means, List<? extends ModifiableDBIDs> clusters, WritableIntegerDataStore assignment, double[] varsum) {
assert (k == means.length);
boolean changed = false;
// Reset all clusters
Arrays.fill(varsum, 0.);
for (ModifiableDBIDs cluster : clusters) {
cluster.clear();
}
final NumberVectorDistanceFunction<?> df = getDistanceFunction();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double mindist = Double.POSITIVE_INFINITY;
NumberVector fv = relation.get(iditer);
int minIndex = 0;
for (int i = 0; i < k; i++) {
double dist = df.distance(fv, DoubleVector.wrap(means[i]));
if (dist < mindist) {
minIndex = i;
mindist = dist;
}
}
varsum[minIndex] += mindist;
clusters.get(minIndex).add(iditer);
changed |= assignment.putInt(iditer, minIndex) != minIndex;
}
return changed;
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class EM method assignProbabilitiesToInstances.
/**
* Assigns the current probability values to the instances in the database and
* compute the expectation value of the current mixture of distributions.
*
* Computed as the sum of the logarithms of the prior probability of each
* instance.
*
* @param relation the database used for assignment to instances
* @param models Cluster models
* @param probClusterIGivenX Output storage for cluster probabilities
* @return the expectation value of the current mixture of distributions
*/
public static double assignProbabilitiesToInstances(Relation<? extends NumberVector> relation, List<? extends EMClusterModel<?>> models, WritableDataStore<double[]> probClusterIGivenX) {
final int k = models.size();
double emSum = 0.;
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
NumberVector vec = relation.get(iditer);
double[] probs = new double[k];
for (int i = 0; i < k; i++) {
double v = models.get(i).estimateLogDensity(vec);
probs[i] = v > MIN_LOGLIKELIHOOD ? v : MIN_LOGLIKELIHOOD;
}
final double logP = logSumExp(probs);
for (int i = 0; i < k; i++) {
probs[i] = FastMath.exp(probs[i] - logP);
}
probClusterIGivenX.put(iditer, probs);
emSum += logP;
}
return emSum / relation.size();
}
use of de.lmu.ifi.dbs.elki.data.NumberVector in project elki by elki-project.
the class BestOfMultipleKMeans method run.
@Override
public Clustering<M> run(Database database, Relation<V> relation) {
if (!(innerkMeans.getDistanceFunction() instanceof PrimitiveDistanceFunction)) {
throw new AbortException("K-Means results can only be evaluated for primitive distance functions, got: " + innerkMeans.getDistanceFunction().getClass());
}
@SuppressWarnings("unchecked") final NumberVectorDistanceFunction<? super NumberVector> df = (NumberVectorDistanceFunction<? super NumberVector>) innerkMeans.getDistanceFunction();
Clustering<M> bestResult = null;
double bestCost = Double.NaN;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("K-means iterations", trials, LOG) : null;
for (int i = 0; i < trials; i++) {
Clustering<M> currentCandidate = innerkMeans.run(database, relation);
double currentCost = qualityMeasure.quality(currentCandidate, df, relation);
if (LOG.isVerbose()) {
LOG.verbose("Cost of candidate " + i + ": " + currentCost);
}
if (qualityMeasure.isBetter(currentCost, bestCost)) {
bestResult = currentCandidate;
bestCost = currentCost;
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
return bestResult;
}
Aggregations