use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.
the class ParallelLloydKMeans method run.
@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
DBIDs ids = relation.getDBIDs();
// Choose initial means
double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
// Store for current cluster assignment.
WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
double[] varsum = new double[k];
KMeansProcessor<V> kmm = new KMeansProcessor<>(relation, distanceFunction, assignment, varsum);
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
for (int iteration = 0; maxiter <= 0 || iteration < maxiter; iteration++) {
LOG.incrementProcessed(prog);
kmm.nextIteration(means);
ParallelExecutor.run(ids, kmm);
// Stop if no cluster assignment changed.
if (!kmm.changed()) {
break;
}
means = kmm.getMeans();
}
LOG.setCompleted(prog);
// Wrap result
ArrayModifiableDBIDs[] clusters = ClusteringAlgorithmUtil.partitionsFromIntegerLabels(ids, assignment, k);
Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
for (int i = 0; i < clusters.length; i++) {
DBIDs cids = clusters[i];
if (cids.size() == 0) {
continue;
}
KMeansModel model = new KMeansModel(means[i], varsum[i]);
result.addToplevelCluster(new Cluster<>(cids, model));
}
return result;
}
use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.
the class LMCLUS method run.
/**
* The main LMCLUS (Linear manifold clustering algorithm) is processed in this
* method.
*
* <PRE>
* The algorithm samples random linear manifolds and tries to find clusters in it.
* It calculates a distance histogram searches for a threshold and partitions the
* points in two groups the ones in the cluster and everything else.
* Then the best fitting linear manifold is searched and registered as a cluster.
* The process is started over until all points are clustered.
* The last cluster should contain all the outliers. (or the whole data if no clusters have been found.)
* For details see {@link LMCLUS}.
* </PRE>
*
* @param database The database to operate on
* @param relation Relation
* @return Clustering result
*/
public Clustering<Model> run(Database database, Relation<NumberVector> relation) {
Clustering<Model> ret = new Clustering<>("LMCLUS Clustering", "lmclus-clustering");
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Clustered objects", relation.size(), LOG) : null;
IndefiniteProgress cprogress = LOG.isVerbose() ? new IndefiniteProgress("Clusters found", LOG) : null;
ModifiableDBIDs unclustered = DBIDUtil.newHashSet(relation.getDBIDs());
Random r = rnd.getSingleThreadedRandom();
final int maxdim = Math.min(maxLMDim, RelationUtil.dimensionality(relation));
int cnum = 0;
while (unclustered.size() > minsize) {
DBIDs current = unclustered;
int lmDim = 1;
for (int k = 1; k <= maxdim; k++) {
// stopping at the appropriate dimensionality either.
while (true) {
Separation separation = findSeparation(relation, current, k, r);
// " threshold: " + separation.threshold);
if (separation.goodness <= sensitivityThreshold) {
break;
}
ModifiableDBIDs subset = DBIDUtil.newArray(current.size());
for (DBIDIter iter = current.iter(); iter.valid(); iter.advance()) {
if (deviation(minusEquals(relation.get(iter).toArray(), separation.originV), separation.basis) < separation.threshold) {
subset.add(iter);
}
}
// logger.verbose("size:"+subset.size());
if (subset.size() < minsize) {
break;
}
current = subset;
lmDim = k;
// System.out.println("Partition: " + subset.size());
}
}
// No more clusters found
if (current.size() < minsize || current == unclustered) {
break;
}
// New cluster found
// TODO: annotate cluster with dimensionality
final Cluster<Model> cluster = new Cluster<>(current);
cluster.setName("Cluster_" + lmDim + "d_" + cnum);
cnum++;
ret.addToplevelCluster(cluster);
// Remove from main working set.
unclustered.removeDBIDs(current);
if (progress != null) {
progress.setProcessed(relation.size() - unclustered.size(), LOG);
}
if (cprogress != null) {
cprogress.setProcessed(cnum, LOG);
}
}
// Remaining objects are noise
if (unclustered.size() > 0) {
ret.addToplevelCluster(new Cluster<>(unclustered, true));
}
if (progress != null) {
progress.setProcessed(relation.size(), LOG);
progress.ensureCompleted(LOG);
}
LOG.setCompleted(cprogress);
return ret;
}
use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.
the class KMedoidsEM method run.
/**
* Run k-medoids
*
* @param database Database
* @param relation relation to use
* @return result
*/
public Clustering<MedoidModel> run(Database database, Relation<V> relation) {
if (relation.size() <= 0) {
return new Clustering<>("k-Medoids Clustering", "kmedoids-clustering");
}
DistanceQuery<V> distQ = null;
// Only enforce a distance matrix for PAM initialization, which is slow.
if (initializer instanceof PAMInitialMeans) {
distQ = DatabaseUtil.precomputedDistanceQuery(database, relation, getDistanceFunction(), LOG);
} else {
distQ = database.getDistanceQuery(relation, getDistanceFunction());
}
// Choose initial medoids
if (LOG.isStatistics()) {
LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
}
ArrayModifiableDBIDs medoids = DBIDUtil.newArray(initializer.chooseInitialMedoids(k, relation.getDBIDs(), distQ));
DBIDArrayMIter miter = medoids.iter();
double[] mdists = new double[k];
// Setup cluster assignment store
List<ModifiableDBIDs> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
HashSetModifiableDBIDs set = DBIDUtil.newHashSet(relation.size() / k);
// Add medoids.
set.add(miter.seek(i));
clusters.add(set);
}
// Initial assignment to nearest medoids
// TODO: reuse this information, from the build phase, when possible?
double tc = assignToNearestCluster(miter, mdists, clusters, distQ);
if (LOG.isStatistics()) {
LOG.statistics(new DoubleStatistic(KEY + ".iteration-" + 0 + ".cost", tc));
}
IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Medoids EM iteration", LOG) : null;
// Swap phase
int iteration = 0;
DBIDVar best = DBIDUtil.newVar();
while (true) {
boolean changed = false;
// Try to swap the medoid with a better cluster member:
int i = 0;
for (miter.seek(0); miter.valid(); miter.advance(), i++) {
best.unset();
double bestm = mdists[i];
for (DBIDIter iter = clusters.get(i).iter(); iter.valid(); iter.advance()) {
if (DBIDUtil.equal(miter, iter)) {
continue;
}
double sum = 0;
for (DBIDIter iter2 = clusters.get(i).iter(); iter2.valid(); iter2.advance()) {
sum += distQ.distance(iter, iter2);
}
if (sum < bestm) {
best.set(iter);
bestm = sum;
}
}
if (best.isSet() && !DBIDUtil.equal(miter, best)) {
changed = true;
assert (clusters.get(i).contains(best));
medoids.set(i, best);
mdists[i] = bestm;
}
}
// Reassign
if (!changed) {
break;
}
double nc = assignToNearestCluster(miter, mdists, clusters, distQ);
++iteration;
if (LOG.isStatistics()) {
LOG.statistics(new DoubleStatistic(KEY + ".iteration-" + iteration + ".cost", nc));
}
LOG.incrementProcessed(prog);
}
LOG.setCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
}
// Wrap result
Clustering<MedoidModel> result = new Clustering<>("k-Medoids Clustering", "kmedoids-clustering");
for (DBIDArrayIter it = medoids.iter(); it.valid(); it.advance()) {
result.addToplevelCluster(new Cluster<>(clusters.get(it.getOffset()), new MedoidModel(DBIDUtil.deref(it))));
}
return result;
}
use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.
the class HiCS method calculateSubspaces.
/**
* Identifies high contrast subspaces in a given full-dimensional database.
*
* @param relation the relation the HiCS should be evaluated for
* @param subspaceIndex Subspace indexes
* @return a set of high contrast subspaces
*/
private Set<HiCSSubspace> calculateSubspaces(Relation<? extends NumberVector> relation, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
final int dbdim = RelationUtil.dimensionality(relation);
FiniteProgress dprog = LOG.isVerbose() ? new FiniteProgress("Subspace dimensionality", dbdim, LOG) : null;
if (dprog != null) {
dprog.setProcessed(2, LOG);
}
TreeSet<HiCSSubspace> subspaceList = new TreeSet<>(HiCSSubspace.SORT_BY_SUBSPACE);
TopBoundedHeap<HiCSSubspace> dDimensionalList = new TopBoundedHeap<>(cutoff, HiCSSubspace.SORT_BY_CONTRAST_ASC);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Generating two-element subsets", (dbdim * (dbdim - 1)) >> 1, LOG) : null;
// compute two-element sets of subspaces
for (int i = 0; i < dbdim; i++) {
for (int j = i + 1; j < dbdim; j++) {
HiCSSubspace ts = new HiCSSubspace();
ts.set(i);
ts.set(j);
calculateContrast(relation, ts, subspaceIndex, random);
dDimensionalList.add(ts);
LOG.incrementProcessed(prog);
}
}
LOG.ensureCompleted(prog);
IndefiniteProgress qprog = LOG.isVerbose() ? new IndefiniteProgress("Testing subspace candidates", LOG) : null;
for (int d = 3; !dDimensionalList.isEmpty(); d++) {
if (dprog != null) {
dprog.setProcessed(d, LOG);
}
// result now contains all d-dimensional sets of subspaces
ArrayList<HiCSSubspace> candidateList = new ArrayList<>(dDimensionalList.size());
for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
subspaceList.add(it.get());
candidateList.add(it.get());
}
dDimensionalList.clear();
// candidateList now contains the *m* best d-dimensional sets
Collections.sort(candidateList, HiCSSubspace.SORT_BY_SUBSPACE);
// TODO: optimize APRIORI style, by not even computing the bit set or?
for (int i = 0; i < candidateList.size() - 1; i++) {
for (int j = i + 1; j < candidateList.size(); j++) {
HiCSSubspace set1 = candidateList.get(i);
HiCSSubspace set2 = candidateList.get(j);
HiCSSubspace joinedSet = new HiCSSubspace();
joinedSet.or(set1);
joinedSet.or(set2);
if (joinedSet.cardinality() != d) {
continue;
}
calculateContrast(relation, joinedSet, subspaceIndex, random);
dDimensionalList.add(joinedSet);
LOG.incrementProcessed(qprog);
}
}
// Prune
for (HiCSSubspace cand : candidateList) {
for (Heap<HiCSSubspace>.UnorderedIter it = dDimensionalList.unorderedIter(); it.valid(); it.advance()) {
if (it.get().contrast > cand.contrast) {
subspaceList.remove(cand);
break;
}
}
}
}
LOG.setCompleted(qprog);
if (dprog != null) {
dprog.setProcessed(dbdim, LOG);
dprog.ensureCompleted(LOG);
}
return subspaceList;
}
use of de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress in project elki by elki-project.
the class DWOF method run.
/**
* Performs the Generalized DWOF_SCORE algorithm on the given database by
* calling all the other methods in the proper order.
*
* @param database Database to query
* @param relation Data to process
* @return new OutlierResult instance
*/
public OutlierResult run(Database database, Relation<O> relation) {
final DBIDs ids = relation.getDBIDs();
DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
// Get k nearest neighbor and range query on the relation.
KNNQuery<O> knnq = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
RangeQuery<O> rnnQuery = database.getRangeQuery(distFunc, DatabaseQuery.HINT_HEAVY_USE);
StepProgress stepProg = LOG.isVerbose() ? new StepProgress("DWOF", 2) : null;
// DWOF output score storage.
WritableDoubleDataStore dwofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_HOT, 0.);
if (stepProg != null) {
stepProg.beginStep(1, "Initializing objects' Radii", LOG);
}
WritableDoubleDataStore radii = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, 0.);
// Find an initial radius for each object:
initializeRadii(ids, knnq, distFunc, radii);
WritableIntegerDataStore oldSizes = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT, 1);
WritableIntegerDataStore newSizes = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT, 1);
int countUnmerged = relation.size();
if (stepProg != null) {
stepProg.beginStep(2, "Clustering-Evaluating Cycles.", LOG);
}
IndefiniteProgress clusEvalProgress = LOG.isVerbose() ? new IndefiniteProgress("Evaluating DWOFs", LOG) : null;
while (countUnmerged > 0) {
LOG.incrementProcessed(clusEvalProgress);
// Increase radii
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
radii.putDouble(iter, radii.doubleValue(iter) * delta);
}
// stores the clustering label for each object
WritableDataStore<ModifiableDBIDs> labels = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_TEMP, ModifiableDBIDs.class);
// Cluster objects based on the current radius
clusterData(ids, rnnQuery, radii, labels);
// simple reference swap
WritableIntegerDataStore temp = newSizes;
newSizes = oldSizes;
oldSizes = temp;
// Update the cluster size count for each object.
countUnmerged = updateSizes(ids, labels, newSizes);
labels.destroy();
// Update DWOF scores.
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double newScore = (newSizes.intValue(iter) > 0) ? ((double) (oldSizes.intValue(iter) - 1) / (double) newSizes.intValue(iter)) : 0.0;
dwofs.putDouble(iter, dwofs.doubleValue(iter) + newScore);
}
}
LOG.setCompleted(clusEvalProgress);
LOG.setCompleted(stepProg);
// Build result representation.
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
minmax.put(dwofs.doubleValue(iter));
}
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
DoubleRelation rel = new MaterializedDoubleRelation("Dynamic-Window Outlier Factors", "dwof-outlier", dwofs, ids);
return new OutlierResult(meta, rel);
}
Aggregations