use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.
the class COPAC method run.
/**
* Run the COPAC algorithm.
*
* @param database Database
* @param relation Vector field relation
* @return COPAC clustering
*/
public Clustering<DimensionModel> run(Database database, Relation<V> relation) {
COPACNeighborPredicate.Instance npred = new COPACNeighborPredicate<V>(settings).instantiate(database, relation);
CorePredicate.Instance<DBIDs> cpred = new MinPtsCorePredicate(settings.minpts).instantiate(database);
Clustering<Model> dclusters = new GeneralizedDBSCAN.Instance<>(npred, cpred, false).run();
// Re-wrap the detected clusters for COPAC:
Clustering<DimensionModel> result = new Clustering<>("COPAC clustering", "copac-clustering");
// Generalized DBSCAN clusterings will be flat.
for (It<Cluster<Model>> iter = dclusters.iterToplevelClusters(); iter.valid(); iter.advance()) {
Cluster<Model> clus = iter.get();
if (clus.size() > 0) {
int dim = npred.dimensionality(clus.getIDs().iter());
DimensionModel model = new DimensionModel(dim);
result.addToplevelCluster(new Cluster<>(clus.getIDs(), model));
}
}
return result;
}
use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.
the class OPTICSXi method extractClusters.
/**
* Extract clusters from a cluster order result.
*
* @param clusterOrderResult cluster order result
* @param relation Relation
* @param ixi Parameter 1 - Xi
* @param minpts Parameter minPts
*/
private Clustering<OPTICSModel> extractClusters(ClusterOrder clusterOrderResult, Relation<?> relation, double ixi, int minpts) {
ArrayDBIDs clusterOrder = clusterOrderResult.ids;
DoubleDataStore reach = clusterOrderResult.reachability;
DBIDArrayIter tmp = clusterOrder.iter();
DBIDVar tmp2 = DBIDUtil.newVar();
double mib = 0.0;
List<SteepArea> salist = keepsteep ? new ArrayList<SteepArea>() : null;
List<SteepDownArea> sdaset = new ArrayList<>();
final Clustering<OPTICSModel> clustering = new Clustering<>("OPTICS Xi-Clusters", "optics");
HashSet<Cluster<OPTICSModel>> curclusters = new HashSet<>();
HashSetModifiableDBIDs unclaimedids = DBIDUtil.newHashSet(relation.getDBIDs());
FiniteProgress scanprog = LOG.isVerbose() ? new FiniteProgress("OPTICS Xi cluster extraction", clusterOrder.size(), LOG) : null;
for (SteepScanPosition scan = new SteepScanPosition(clusterOrderResult); scan.hasNext(); ) {
if (scanprog != null) {
scanprog.setProcessed(scan.index, LOG);
}
// Update maximum-inbetween
mib = MathUtil.max(mib, scan.getReachability());
// The last point cannot be the start of a steep area.
if (!scan.next.valid()) {
break;
}
// Xi-steep down area
if (scan.steepDown(ixi)) {
// Update mib values with current mib and filter
updateFilterSDASet(mib, sdaset, ixi);
final double startval = scan.getReachability();
mib = 0.;
int startsteep = scan.index, endsteep = scan.index;
for (scan.next(); scan.hasNext(); scan.next()) {
// still steep - continue.
if (scan.steepDown(ixi)) {
endsteep = scan.index;
continue;
}
// Always stop looking after minpts "flat" steps.
if (!scan.steepDown(1.0) || scan.index - endsteep > minpts) {
break;
}
}
final SteepDownArea sda = new SteepDownArea(startsteep, endsteep, startval, 0);
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("New steep down area: " + sda.toString());
}
sdaset.add(sda);
if (salist != null) {
salist.add(sda);
}
continue;
}
// Xi-steep up area
if (scan.steepUp(ixi)) {
// Update mib values with current mib and filter
updateFilterSDASet(mib, sdaset, ixi);
final SteepUpArea sua;
// Compute steep-up area
{
int startsteep = scan.index, endsteep = scan.index;
mib = scan.getReachability();
double esuccr = scan.getNextReachability();
// Find end of steep-up-area, eventually updating mib again
while (!Double.isInfinite(esuccr) && scan.hasNext()) {
scan.next();
// still steep - continue.
if (scan.steepUp(ixi)) {
endsteep = scan.index;
mib = scan.getReachability();
esuccr = scan.getNextReachability();
continue;
}
// Stop looking after minpts non-up steps.
if (!scan.steepUp(1.0) || scan.index - endsteep > minpts) {
break;
}
}
if (Double.isInfinite(esuccr)) {
scan.next();
}
sua = new SteepUpArea(startsteep, endsteep, esuccr);
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("New steep up area: " + sua.toString());
}
if (salist != null) {
salist.add(sua);
}
}
// Validate and computer clusters
// LOG.debug("SDA size:"+sdaset.size()+" "+sdaset);
ListIterator<SteepDownArea> sdaiter = sdaset.listIterator(sdaset.size());
// Iterate backwards for correct hierarchy generation.
while (sdaiter.hasPrevious()) {
SteepDownArea sda = sdaiter.previous();
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("Comparing: eU=" + mib + " SDA: " + sda.toString());
}
// Condition 3b: end-of-steep-up > maximum-in-between lower
if (mib * ixi < sda.getMib()) {
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("mib * ixi = " + mib * ixi + " >= sda.getMib() = " + sda.getMib());
}
continue;
}
// By default, clusters cover both the steep up and steep down area
int cstart = sda.getStartIndex(), cend = MathUtil.min(sua.getEndIndex(), clusterOrder.size() - 1);
// However, we sometimes have to adjust this (Condition 4):
{
// Case b)
if (sda.getMaximum() * ixi >= sua.getMaximum()) {
while (//
cstart < cend && reach.doubleValue(tmp.seek(cstart + 1)) > sua.getMaximum()) {
cstart++;
}
} else // Case c)
if (sua.getMaximum() * ixi >= sda.getMaximum()) {
while (//
cend > cstart && reach.doubleValue(tmp.seek(cend - 1)) > sda.getMaximum()) {
cend--;
}
}
// Case a) is the default
}
// removes common artifacts from the Xi method
if (!nocorrect) {
simplify: while (cend > cstart) {
clusterOrderResult.predecessor.assignVar(tmp.seek(cend), tmp2);
for (int i = cstart; i < cend; i++) {
if (DBIDUtil.equal(tmp2, tmp.seek(i))) {
break simplify;
}
}
// Not found.
--cend;
}
}
// Condition 3a: obey minpts
if (cend - cstart + 1 < minpts) {
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("MinPts not satisfied.");
}
continue;
}
// Build the cluster
ModifiableDBIDs dbids = DBIDUtil.newArray();
for (int idx = cstart; idx <= cend; idx++) {
tmp.seek(idx);
// Collect only unclaimed IDs.
if (unclaimedids.remove(tmp)) {
dbids.add(tmp);
}
}
if (LOG.isDebuggingFine()) {
LOG.debugFine("Found cluster with " + dbids.size() + " new objects, length " + (cend - cstart + 1));
}
OPTICSModel model = new OPTICSModel(cstart, cend);
Cluster<OPTICSModel> cluster = new Cluster<>("Cluster_" + cstart + "_" + cend, dbids, model);
// Build the hierarchy
{
Iterator<Cluster<OPTICSModel>> iter = curclusters.iterator();
while (iter.hasNext()) {
Cluster<OPTICSModel> clus = iter.next();
OPTICSModel omodel = clus.getModel();
if (model.getStartIndex() <= omodel.getStartIndex() && omodel.getEndIndex() <= model.getEndIndex()) {
clustering.addChildCluster(cluster, clus);
iter.remove();
}
}
}
curclusters.add(cluster);
}
continue;
}
// Flat - advance anyway.
scan.next();
}
if (scanprog != null) {
scanprog.setProcessed(clusterOrder.size(), LOG);
}
if (!unclaimedids.isEmpty()) {
boolean noise = reach.doubleValue(tmp.seek(clusterOrder.size() - 1)) >= Double.POSITIVE_INFINITY;
Cluster<OPTICSModel> allcluster = new Cluster<>(noise ? "Noise" : "Cluster", unclaimedids, noise, new OPTICSModel(0, clusterOrder.size() - 1));
for (Cluster<OPTICSModel> cluster : curclusters) {
clustering.addChildCluster(allcluster, cluster);
}
clustering.addToplevelCluster(allcluster);
} else {
for (Cluster<OPTICSModel> cluster : curclusters) {
clustering.addToplevelCluster(cluster);
}
}
clustering.addChildResult(clusterOrderResult);
if (salist != null) {
clusterOrderResult.addChildResult(new SteepAreaResult(salist));
}
return clustering;
}
use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.
the class CLARANS method run.
public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
return new Clustering<>("CLARANS Clustering", "clarans-clustering");
}
if (k * 2 >= relation.size()) {
// Random sampling of non-medoids will be slow for huge k
LOG.warning("A very large k was chosen. This implementation is not optimized for this case.");
}
DBIDs ids = relation.getDBIDs();
DistanceQuery<V> distQ = database.getDistanceQuery(relation, getDistanceFunction());
final boolean metric = getDistanceFunction().isMetric();
// Number of retries, relative rate, or absolute count:
final int retries = (int) Math.ceil(maxneighbor < 1 ? maxneighbor * ids.size() : maxneighbor);
Random rnd = random.getSingleThreadedRandom();
// Might copy!
DBIDArrayIter cand = DBIDUtil.ensureArray(ids).iter();
// Setup cluster assignment store
Assignment best = new Assignment(distQ, ids, DBIDUtil.newArray(k));
Assignment curr = new Assignment(distQ, ids, DBIDUtil.newArray(k));
Assignment scratch = new Assignment(distQ, ids, DBIDUtil.newArray(k));
// 1. initialize
double bestscore = Double.POSITIVE_INFINITY;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("CLARANS sampling restarts", numlocal, LOG) : null;
for (int i = 0; i < numlocal; i++) {
// 2. choose random initial medoids
// TODO: should we always use uniform sampling, to be closer to the paper?
curr.medoids.clear();
curr.medoids.addDBIDs(DBIDUtil.randomSample(ids, k, random));
// Cost of initial solution:
double total = curr.assignToNearestCluster();
// 3. Set j to 1.
int j = 1;
step: while (j < retries) {
// 4 part a. choose a random non-medoid (~ neighbor in G):
for (int r = 0; ; r++) {
// Random point
cand.seek(rnd.nextInt(ids.size()));
if (curr.nearest.doubleValue(cand) > 0) {
// Good: not a medoid.
break;
}
// We may have many duplicate points
if (metric && curr.second.doubleValue(cand) == 0) {
// Cannot yield an improvement if we are metric.
++j;
continue step;
} else if (!metric && !curr.medoids.contains(cand)) {
// Probably not a good candidate, but try nevertheless
break;
}
if (r >= 1000) {
throw new AbortException("Failed to choose a non-medoid in 1000 attempts. Choose k << N.");
}
// else: this must be the medoid.
}
// 4 part b. choose a random medoid to replace:
final int otherm = rnd.nextInt(k);
// 5. check lower cost
double cost = curr.computeCostDifferential(cand, otherm, scratch);
if (!(cost < 0)) {
// 6. try again
++j;
continue;
}
// cost is negative!
total += cost;
// Swap:
Assignment tmp = curr;
curr = scratch;
scratch = tmp;
j = 1;
}
// New best:
if (total < bestscore) {
// Swap:
Assignment tmp = curr;
curr = best;
best = tmp;
bestscore = total;
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
ArrayModifiableDBIDs[] clusters = ClusteringAlgorithmUtil.partitionsFromIntegerLabels(ids, best.assignment, k);
// Wrap result
Clustering<MedoidModel> result = new Clustering<>("CLARANS Clustering", "clarans-clustering");
for (DBIDArrayIter it = best.medoids.iter(); it.valid(); it.advance()) {
MedoidModel model = new MedoidModel(DBIDUtil.deref(it));
result.addToplevelCluster(new Cluster<>(clusters[it.getOffset()], model));
}
return result;
}
use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.
the class KMeansCompare method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
return new Clustering<>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
}
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] varsum = new double[k];
// Cluster distances
double[][] cdist = new double[k][k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
DoubleStatistic varstat = LOG.isStatistics() ? new DoubleStatistic(this.getClass().getName() + ".variance-sum") : null;
LongStatistic diststat = LOG.isStatistics() ? new LongStatistic(KEY + ".distance-computations") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
recomputeSeperation(means, cdist, diststat);
boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum, cdist, diststat);
logVarstat(varstat, varsum);
if (LOG.isStatistics()) {
LOG.statistics(diststat);
}
// Stop if no cluster assignment changed.
if (!changed) {
break;
}
// Recompute means.
means = means(clusters, means, relation);
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.size() == 0) {
continue;
}
KMeansModel model = new KMeansModel(means[i], varsum[i]);
result.addToplevelCluster(new Cluster<>(ids, model));
}
return result;
}
use of de.lmu.ifi.dbs.elki.data.Clustering in project elki by elki-project.
the class KMeansHamerly method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
return new Clustering<>("k-Means Clustering", "kmeans-clustering");
}
// Choose initial means
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
}
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
}
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
// Hamerly bounds
WritableDoubleDataStore upper = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Double.POSITIVE_INFINITY);
WritableDoubleDataStore lower = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, 0.);
// Storage for updated means:
final int dim = means[0].length;
double[][] sums = new double[k][dim];
// Separation of means / distance moved.
double[] sep = new double[k];
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
LongStatistic rstat = LOG.isStatistics() ? new LongStatistic(KEY + ".reassignments") : null;
int iteration = 0;
for (; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
int changed;
if (iteration == 0) {
changed = initialAssignToNearestCluster(relation, means, sums, clusters, assignment, upper, lower);
} else {
recomputeSeperation(means, sep);
changed = assignToNearestCluster(relation, means, sums, clusters, assignment, sep, upper, lower);
}
if (rstat != null) {
rstat.setLong(changed);
LOG.statistics(rstat);
}
// Stop if no cluster assignment changed.
if (changed == 0) {
break;
}
// Recompute means.
for (int i = 0; i < k; i++) {
final int s = clusters.get(i).size();
timesEquals(sums[i], s > 0 ? 1. / s : 1.);
}
double delta = maxMoved(means, sums, sep);
updateBounds(relation, assignment, upper, lower, sep, delta);
for (int i = 0; i < k; i++) {
final int s = clusters.get(i).size();
System.arraycopy(sums[i], 0, means[i], 0, dim);
// Restore to sum for next iteration
timesEquals(sums[i], s > 0 ? s : 1.);
}
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
upper.destroy();
lower.destroy();
// Wrap result
double totalvariance = 0.;
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.size(); i++) {
DBIDs ids = clusters.get(i);
if (ids.size() == 0) {
continue;
}
double[] mean = means[i];
double varsum = 0.;
if (varstat) {
DoubleVector mvec = DoubleVector.wrap(mean);
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
varsum += distanceFunction.distance(mvec, relation.get(it));
}
totalvariance += varsum;
}
KMeansModel model = new KMeansModel(mean, varsum);
result.addToplevelCluster(new Cluster<>(ids, model));
}
if (LOG.isStatistics() && varstat) {
LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".variance-sum", totalvariance));
}
return result;
}
Aggregations