use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class EvaluateRankingQuality method run.
@Override
public HistogramResult run(Database database) {
final Relation<V> relation = database.getRelation(getInputTypeRestriction()[0]);
final DistanceQuery<V> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<V> knnQuery = database.getKNNQuery(distQuery, relation.size());
if (LOG.isVerbose()) {
LOG.verbose("Preprocessing clusters...");
}
// Cluster by labels
Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
// Compute cluster averages and covariance matrix
HashMap<Cluster<?>, double[]> averages = new HashMap<>(split.size());
HashMap<Cluster<?>, double[][]> covmats = new HashMap<>(split.size());
for (Cluster<?> clus : split) {
CovarianceMatrix covmat = CovarianceMatrix.make(relation, clus.getIDs());
averages.put(clus, covmat.getMeanVector());
covmats.put(clus, covmat.destroyToPopulationMatrix());
}
MeanVarianceStaticHistogram hist = new MeanVarianceStaticHistogram(numbins, 0.0, 1.0);
if (LOG.isVerbose()) {
LOG.verbose("Processing points...");
}
FiniteProgress rocloop = LOG.isVerbose() ? new FiniteProgress("Computing ROC AUC values", relation.size(), LOG) : null;
ROCEvaluation roc = new ROCEvaluation();
// sort neighbors
for (Cluster<?> clus : split) {
ModifiableDoubleDBIDList cmem = DBIDUtil.newDistanceDBIDList(clus.size());
double[] av = averages.get(clus);
double[][] covm = covmats.get(clus);
for (DBIDIter iter = clus.getIDs().iter(); iter.valid(); iter.advance()) {
double d = mahalanobisDistance(covm, relation.get(iter).toArray(), av);
cmem.add(d, iter);
}
cmem.sort();
for (DBIDArrayIter it = cmem.iter(); it.valid(); it.advance()) {
KNNList knn = knnQuery.getKNNForDBID(it, relation.size());
double result = EvaluateClustering.evaluateRanking(roc, clus, knn);
hist.put(((double) it.getOffset()) / clus.size(), result);
LOG.incrementProcessed(rocloop);
}
}
LOG.ensureCompleted(rocloop);
// Collections.sort(results);
// Transform Histogram into a Double Vector array.
Collection<double[]> res = new ArrayList<>(relation.size());
for (ObjHistogram.Iter<MeanVariance> iter = hist.iter(); iter.valid(); iter.advance()) {
res.add(new double[] { iter.getCenter(), iter.getValue().getCount(), iter.getValue().getMean(), iter.getValue().getSampleVariance() });
}
return new HistogramResult("Ranking Quality Histogram", "ranking-histogram", res);
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class NaiveAgglomerativeHierarchicalClustering1 method run.
/**
* Run the algorithm
*
* @param db Database
* @param relation Relation
* @return Clustering hierarchy
*/
public Result run(Database db, Relation<O> relation) {
DistanceQuery<O> dq = db.getDistanceQuery(relation, getDistanceFunction());
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
final int size = ids.size();
LOG.verbose("Notice: SLINK is a much faster algorithm for single-linkage clustering!");
// Compute the initial distance matrix.
double[][] matrix = new double[size][size];
DBIDArrayIter ix = ids.iter(), iy = ids.iter();
for (int x = 0; ix.valid(); x++, ix.advance()) {
iy.seek(0);
for (int y = 0; y < x; y++, iy.advance()) {
final double dist = dq.distance(ix, iy);
matrix[x][y] = dist;
matrix[y][x] = dist;
}
}
// Initialize space for result:
double[] height = new double[size];
Arrays.fill(height, Double.POSITIVE_INFINITY);
// Parent node, to track merges
// have every object point to itself initially
ArrayModifiableDBIDs parent = DBIDUtil.newArray(ids);
// Active clusters, when not trivial.
Int2ReferenceMap<ModifiableDBIDs> clusters = new Int2ReferenceOpenHashMap<>();
// Repeat until everything merged, except the desired number of clusters:
final int stop = size - numclusters;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Agglomerative clustering", stop, LOG) : null;
for (int i = 0; i < stop; i++) {
double min = Double.POSITIVE_INFINITY;
int minx = -1, miny = -1;
for (int x = 0; x < size; x++) {
if (height[x] < Double.POSITIVE_INFINITY) {
continue;
}
for (int y = 0; y < x; y++) {
if (height[y] < Double.POSITIVE_INFINITY) {
continue;
}
if (matrix[x][y] < min) {
min = matrix[x][y];
minx = x;
miny = y;
}
}
}
assert (minx >= 0 && miny >= 0);
// Avoid allocating memory, by reusing existing iterators:
ix.seek(minx);
iy.seek(miny);
// Perform merge in data structure: x -> y
// Since y < x, prefer keeping y, dropping x.
height[minx] = min;
parent.set(minx, iy);
// Merge into cluster
ModifiableDBIDs cx = clusters.get(minx);
ModifiableDBIDs cy = clusters.get(miny);
if (cy == null) {
cy = DBIDUtil.newHashSet();
cy.add(iy);
}
if (cx == null) {
cy.add(ix);
} else {
cy.addDBIDs(cx);
clusters.remove(minx);
}
clusters.put(miny, cy);
// Update distance matrix for y:
for (int j = 0; j < size; j++) {
matrix[j][miny] = Math.min(matrix[j][minx], matrix[j][miny]);
matrix[miny][j] = Math.min(matrix[minx][j], matrix[miny][j]);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// Build the clustering result
final Clustering<Model> dendrogram = new Clustering<>("Hierarchical-Clustering", "hierarchical-clustering");
for (int x = 0; x < size; x++) {
if (height[x] < Double.POSITIVE_INFINITY) {
DBIDs cids = clusters.get(x);
if (cids == null) {
ix.seek(x);
cids = DBIDUtil.deref(ix);
}
Cluster<Model> cluster = new Cluster<>("Cluster", cids);
dendrogram.addToplevelCluster(cluster);
}
}
return dendrogram;
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class NaiveMeanShiftClustering method run.
/**
* Run the mean-shift clustering algorithm.
*
* @param database Database
* @param relation Data relation
* @return Clustering result
*/
public Clustering<MeanModel> run(Database database, Relation<V> relation) {
final DistanceQuery<V> distq = database.getDistanceQuery(relation, getDistanceFunction());
final RangeQuery<V> rangeq = database.getRangeQuery(distq);
final NumberVector.Factory<V> factory = RelationUtil.getNumberVectorFactory(relation);
final int dim = RelationUtil.dimensionality(relation);
// Stopping threshold
final double threshold = bandwidth * 1E-10;
// Result store:
ArrayList<Pair<V, ModifiableDBIDs>> clusters = new ArrayList<>();
ModifiableDBIDs noise = DBIDUtil.newArray();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Mean-shift clustering", relation.size(), LOG) : null;
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
// Initial position:
V position = relation.get(iter);
iterations: for (int j = 1; j <= MAXITER; j++) {
// Compute new position:
V newvec = null;
{
DoubleDBIDList neigh = rangeq.getRangeForObject(position, bandwidth);
boolean okay = (neigh.size() > 1) || (neigh.size() >= 1 && j > 1);
if (okay) {
Centroid newpos = new Centroid(dim);
for (DoubleDBIDListIter niter = neigh.iter(); niter.valid(); niter.advance()) {
final double weight = kernel.density(niter.doubleValue() / bandwidth);
newpos.put(relation.get(niter), weight);
}
newvec = factory.newNumberVector(newpos.getArrayRef());
// TODO: detect 0 weight!
}
if (!okay) {
noise.add(iter);
break iterations;
}
}
// Test if we are close to one of the known clusters:
double bestd = Double.POSITIVE_INFINITY;
Pair<V, ModifiableDBIDs> bestp = null;
for (Pair<V, ModifiableDBIDs> pair : clusters) {
final double merged = distq.distance(newvec, pair.first);
if (merged < bestd) {
bestd = merged;
bestp = pair;
}
}
// Check for convergence:
double delta = distq.distance(position, newvec);
if (bestd < 10 * threshold || bestd * 2 < delta) {
bestp.second.add(iter);
break iterations;
}
if (j == MAXITER) {
LOG.warning("No convergence after " + MAXITER + " iterations. Distance: " + delta);
}
if (Double.isNaN(delta)) {
LOG.warning("Encountered NaN distance. Invalid center vector? " + newvec.toString());
break iterations;
}
if (j == MAXITER || delta < threshold) {
if (LOG.isDebuggingFine()) {
LOG.debugFine("New cluster:" + newvec + " delta: " + delta + " threshold: " + threshold + " bestd: " + bestd);
}
ArrayModifiableDBIDs cids = DBIDUtil.newArray();
cids.add(iter);
clusters.add(new Pair<V, ModifiableDBIDs>(newvec, cids));
break iterations;
}
position = newvec;
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
ArrayList<Cluster<MeanModel>> cs = new ArrayList<>(clusters.size());
for (Pair<V, ModifiableDBIDs> pair : clusters) {
cs.add(new Cluster<>(pair.second, new MeanModel(pair.first.toArray())));
}
if (noise.size() > 0) {
cs.add(new Cluster<MeanModel>(noise, true));
}
Clustering<MeanModel> c = new Clustering<>("Mean-shift Clustering", "mean-shift-clustering", cs);
return c;
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class COPAC method run.
/**
* Run the COPAC algorithm.
*
* @param database Database
* @param relation Vector field relation
* @return COPAC clustering
*/
public Clustering<DimensionModel> run(Database database, Relation<V> relation) {
COPACNeighborPredicate.Instance npred = new COPACNeighborPredicate<V>(settings).instantiate(database, relation);
CorePredicate.Instance<DBIDs> cpred = new MinPtsCorePredicate(settings.minpts).instantiate(database);
Clustering<Model> dclusters = new GeneralizedDBSCAN.Instance<>(npred, cpred, false).run();
// Re-wrap the detected clusters for COPAC:
Clustering<DimensionModel> result = new Clustering<>("COPAC clustering", "copac-clustering");
// Generalized DBSCAN clusterings will be flat.
for (It<Cluster<Model>> iter = dclusters.iterToplevelClusters(); iter.valid(); iter.advance()) {
Cluster<Model> clus = iter.get();
if (clus.size() > 0) {
int dim = npred.dimensionality(clus.getIDs().iter());
DimensionModel model = new DimensionModel(dim);
result.addToplevelCluster(new Cluster<>(clus.getIDs(), model));
}
}
return result;
}
use of de.lmu.ifi.dbs.elki.data.Cluster in project elki by elki-project.
the class OPTICSXi method extractClusters.
/**
* Extract clusters from a cluster order result.
*
* @param clusterOrderResult cluster order result
* @param relation Relation
* @param ixi Parameter 1 - Xi
* @param minpts Parameter minPts
*/
private Clustering<OPTICSModel> extractClusters(ClusterOrder clusterOrderResult, Relation<?> relation, double ixi, int minpts) {
ArrayDBIDs clusterOrder = clusterOrderResult.ids;
DoubleDataStore reach = clusterOrderResult.reachability;
DBIDArrayIter tmp = clusterOrder.iter();
DBIDVar tmp2 = DBIDUtil.newVar();
double mib = 0.0;
List<SteepArea> salist = keepsteep ? new ArrayList<SteepArea>() : null;
List<SteepDownArea> sdaset = new ArrayList<>();
final Clustering<OPTICSModel> clustering = new Clustering<>("OPTICS Xi-Clusters", "optics");
HashSet<Cluster<OPTICSModel>> curclusters = new HashSet<>();
HashSetModifiableDBIDs unclaimedids = DBIDUtil.newHashSet(relation.getDBIDs());
FiniteProgress scanprog = LOG.isVerbose() ? new FiniteProgress("OPTICS Xi cluster extraction", clusterOrder.size(), LOG) : null;
for (SteepScanPosition scan = new SteepScanPosition(clusterOrderResult); scan.hasNext(); ) {
if (scanprog != null) {
scanprog.setProcessed(scan.index, LOG);
}
// Update maximum-inbetween
mib = MathUtil.max(mib, scan.getReachability());
// The last point cannot be the start of a steep area.
if (!scan.next.valid()) {
break;
}
// Xi-steep down area
if (scan.steepDown(ixi)) {
// Update mib values with current mib and filter
updateFilterSDASet(mib, sdaset, ixi);
final double startval = scan.getReachability();
mib = 0.;
int startsteep = scan.index, endsteep = scan.index;
for (scan.next(); scan.hasNext(); scan.next()) {
// still steep - continue.
if (scan.steepDown(ixi)) {
endsteep = scan.index;
continue;
}
// Always stop looking after minpts "flat" steps.
if (!scan.steepDown(1.0) || scan.index - endsteep > minpts) {
break;
}
}
final SteepDownArea sda = new SteepDownArea(startsteep, endsteep, startval, 0);
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("New steep down area: " + sda.toString());
}
sdaset.add(sda);
if (salist != null) {
salist.add(sda);
}
continue;
}
// Xi-steep up area
if (scan.steepUp(ixi)) {
// Update mib values with current mib and filter
updateFilterSDASet(mib, sdaset, ixi);
final SteepUpArea sua;
// Compute steep-up area
{
int startsteep = scan.index, endsteep = scan.index;
mib = scan.getReachability();
double esuccr = scan.getNextReachability();
// Find end of steep-up-area, eventually updating mib again
while (!Double.isInfinite(esuccr) && scan.hasNext()) {
scan.next();
// still steep - continue.
if (scan.steepUp(ixi)) {
endsteep = scan.index;
mib = scan.getReachability();
esuccr = scan.getNextReachability();
continue;
}
// Stop looking after minpts non-up steps.
if (!scan.steepUp(1.0) || scan.index - endsteep > minpts) {
break;
}
}
if (Double.isInfinite(esuccr)) {
scan.next();
}
sua = new SteepUpArea(startsteep, endsteep, esuccr);
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("New steep up area: " + sua.toString());
}
if (salist != null) {
salist.add(sua);
}
}
// Validate and computer clusters
// LOG.debug("SDA size:"+sdaset.size()+" "+sdaset);
ListIterator<SteepDownArea> sdaiter = sdaset.listIterator(sdaset.size());
// Iterate backwards for correct hierarchy generation.
while (sdaiter.hasPrevious()) {
SteepDownArea sda = sdaiter.previous();
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("Comparing: eU=" + mib + " SDA: " + sda.toString());
}
// Condition 3b: end-of-steep-up > maximum-in-between lower
if (mib * ixi < sda.getMib()) {
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("mib * ixi = " + mib * ixi + " >= sda.getMib() = " + sda.getMib());
}
continue;
}
// By default, clusters cover both the steep up and steep down area
int cstart = sda.getStartIndex(), cend = MathUtil.min(sua.getEndIndex(), clusterOrder.size() - 1);
// However, we sometimes have to adjust this (Condition 4):
{
// Case b)
if (sda.getMaximum() * ixi >= sua.getMaximum()) {
while (//
cstart < cend && reach.doubleValue(tmp.seek(cstart + 1)) > sua.getMaximum()) {
cstart++;
}
} else // Case c)
if (sua.getMaximum() * ixi >= sda.getMaximum()) {
while (//
cend > cstart && reach.doubleValue(tmp.seek(cend - 1)) > sda.getMaximum()) {
cend--;
}
}
// Case a) is the default
}
// removes common artifacts from the Xi method
if (!nocorrect) {
simplify: while (cend > cstart) {
clusterOrderResult.predecessor.assignVar(tmp.seek(cend), tmp2);
for (int i = cstart; i < cend; i++) {
if (DBIDUtil.equal(tmp2, tmp.seek(i))) {
break simplify;
}
}
// Not found.
--cend;
}
}
// Condition 3a: obey minpts
if (cend - cstart + 1 < minpts) {
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("MinPts not satisfied.");
}
continue;
}
// Build the cluster
ModifiableDBIDs dbids = DBIDUtil.newArray();
for (int idx = cstart; idx <= cend; idx++) {
tmp.seek(idx);
// Collect only unclaimed IDs.
if (unclaimedids.remove(tmp)) {
dbids.add(tmp);
}
}
if (LOG.isDebuggingFine()) {
LOG.debugFine("Found cluster with " + dbids.size() + " new objects, length " + (cend - cstart + 1));
}
OPTICSModel model = new OPTICSModel(cstart, cend);
Cluster<OPTICSModel> cluster = new Cluster<>("Cluster_" + cstart + "_" + cend, dbids, model);
// Build the hierarchy
{
Iterator<Cluster<OPTICSModel>> iter = curclusters.iterator();
while (iter.hasNext()) {
Cluster<OPTICSModel> clus = iter.next();
OPTICSModel omodel = clus.getModel();
if (model.getStartIndex() <= omodel.getStartIndex() && omodel.getEndIndex() <= model.getEndIndex()) {
clustering.addChildCluster(cluster, clus);
iter.remove();
}
}
}
curclusters.add(cluster);
}
continue;
}
// Flat - advance anyway.
scan.next();
}
if (scanprog != null) {
scanprog.setProcessed(clusterOrder.size(), LOG);
}
if (!unclaimedids.isEmpty()) {
boolean noise = reach.doubleValue(tmp.seek(clusterOrder.size() - 1)) >= Double.POSITIVE_INFINITY;
Cluster<OPTICSModel> allcluster = new Cluster<>(noise ? "Noise" : "Cluster", unclaimedids, noise, new OPTICSModel(0, clusterOrder.size() - 1));
for (Cluster<OPTICSModel> cluster : curclusters) {
clustering.addChildCluster(allcluster, cluster);
}
clustering.addToplevelCluster(allcluster);
} else {
for (Cluster<OPTICSModel> cluster : curclusters) {
clustering.addToplevelCluster(cluster);
}
}
clustering.addChildResult(clusterOrderResult);
if (salist != null) {
clusterOrderResult.addChildResult(new SteepAreaResult(salist));
}
return clustering;
}
Aggregations