use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class UKMeans method means.
/**
* Returns the mean vectors of the given clusters in the given database.
*
* @param clusters the clusters to compute the means
* @param means the recent means
* @param database the database containing the vectors
* @return the mean vectors of the given clusters in the given database
*/
protected List<double[]> means(List<? extends ModifiableDBIDs> clusters, List<double[]> means, Relation<DiscreteUncertainObject> database) {
List<double[]> newMeans = new ArrayList<>(k);
for (int i = 0; i < k; i++) {
ModifiableDBIDs list = clusters.get(i);
double[] mean = null;
if (list.size() > 0) {
DBIDIter iter = list.iter();
// Initialize with first.
mean = ArrayLikeUtil.toPrimitiveDoubleArray(database.get(iter).getCenterOfMass());
iter.advance();
// Update with remaining instances
for (; iter.valid(); iter.advance()) {
NumberVector vec = database.get(iter).getCenterOfMass();
for (int j = 0; j < mean.length; j++) {
mean[j] += vec.doubleValue(j);
}
}
timesEquals(mean, 1.0 / list.size());
} else {
// Keep degenerated means as-is for now.
mean = means.get(i);
}
newMeans.add(mean);
}
return newMeans;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class DistanceStddevOutlier method run.
/**
* Run the outlier detection algorithm
*
* @param database Database to use
* @param relation Relation to analyze
* @return Outlier score result
*/
public OutlierResult run(Database database, Relation<O> relation) {
// Get a nearest neighbor query on the relation.
KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k);
// Output data storage
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
// Track minimum and maximum scores
DoubleMinMax minmax = new DoubleMinMax();
// Iterate over all objects
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
KNNList neighbors = knnq.getKNNForDBID(iter, k);
// Aggregate distances
MeanVariance mv = new MeanVariance();
for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// Skip the object itself. The 0 is not very informative.
if (DBIDUtil.equal(iter, neighbor)) {
continue;
}
mv.put(neighbor.doubleValue());
}
// Store score
scores.putDouble(iter, mv.getSampleStddev());
}
// Wrap the result in the standard containers
// Actual min-max, theoretical min-max!
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
DoubleRelation rel = new MaterializedDoubleRelation(relation.getDBIDs(), "stddev-outlier", scores);
return new OutlierResult(meta, rel);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class RepresentativeUncertainClustering method run.
/**
* This run method will do the wrapping.
*
* Its called from {@link AbstractAlgorithm#run(Database)} and performs the
* call to the algorithms particular run method as well as the storing and
* comparison of the resulting Clusterings.
*
* @param database Database
* @param relation Data relation of uncertain objects
* @return Clustering result
*/
public Clustering<?> run(Database database, Relation<? extends UncertainObject> relation) {
ResultHierarchy hierarchy = database.getHierarchy();
ArrayList<Clustering<?>> clusterings = new ArrayList<>();
final int dim = RelationUtil.dimensionality(relation);
DBIDs ids = relation.getDBIDs();
// To collect samples
Result samples = new BasicResult("Samples", "samples");
// Step 1: Cluster sampled possible worlds:
Random rand = random.getSingleThreadedRandom();
FiniteProgress sampleP = LOG.isVerbose() ? new FiniteProgress("Clustering samples", numsamples, LOG) : null;
for (int i = 0; i < numsamples; i++) {
WritableDataStore<DoubleVector> store = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_DB, DoubleVector.class);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
store.put(iter, relation.get(iter).drawSample(rand));
}
clusterings.add(runClusteringAlgorithm(hierarchy, samples, ids, store, dim, "Sample " + i));
LOG.incrementProcessed(sampleP);
}
LOG.ensureCompleted(sampleP);
// Step 2: perform the meta clustering (on samples only).
DBIDRange rids = DBIDFactory.FACTORY.generateStaticDBIDRange(clusterings.size());
WritableDataStore<Clustering<?>> datastore = DataStoreUtil.makeStorage(rids, DataStoreFactory.HINT_DB, Clustering.class);
{
Iterator<Clustering<?>> it2 = clusterings.iterator();
for (DBIDIter iter = rids.iter(); iter.valid(); iter.advance()) {
datastore.put(iter, it2.next());
}
}
assert (rids.size() == clusterings.size());
// Build a relation, and a distance matrix.
Relation<Clustering<?>> crel = new MaterializedRelation<Clustering<?>>(Clustering.TYPE, rids, "Clusterings", datastore);
PrecomputedDistanceMatrix<Clustering<?>> mat = new PrecomputedDistanceMatrix<>(crel, rids, distance);
mat.initialize();
ProxyDatabase d = new ProxyDatabase(rids, crel);
d.getHierarchy().add(crel, mat);
Clustering<?> c = metaAlgorithm.run(d);
// Detach from database
d.getHierarchy().remove(d, c);
// Evaluation
Result reps = new BasicResult("Representants", "representative");
hierarchy.add(relation, reps);
DistanceQuery<Clustering<?>> dq = mat.getDistanceQuery(distance);
List<? extends Cluster<?>> cl = c.getAllClusters();
List<DoubleObjPair<Clustering<?>>> evaluated = new ArrayList<>(cl.size());
for (Cluster<?> clus : cl) {
double besttau = Double.POSITIVE_INFINITY;
Clustering<?> bestc = null;
for (DBIDIter it1 = clus.getIDs().iter(); it1.valid(); it1.advance()) {
double tau = 0.;
Clustering<?> curc = crel.get(it1);
for (DBIDIter it2 = clus.getIDs().iter(); it2.valid(); it2.advance()) {
if (DBIDUtil.equal(it1, it2)) {
continue;
}
double di = dq.distance(curc, it2);
tau = di > tau ? di : tau;
}
// Cluster member with the least maximum distance.
if (tau < besttau) {
besttau = tau;
bestc = curc;
}
}
if (bestc == null) {
// E.g. degenerate empty clusters
continue;
}
// Global tau:
double gtau = 0.;
for (DBIDIter it2 = crel.iterDBIDs(); it2.valid(); it2.advance()) {
double di = dq.distance(bestc, it2);
gtau = di > gtau ? di : gtau;
}
final double cprob = computeConfidence(clus.size(), crel.size());
// Build an evaluation result
hierarchy.add(bestc, new RepresentativenessEvaluation(gtau, besttau, cprob));
evaluated.add(new DoubleObjPair<Clustering<?>>(cprob, bestc));
}
// Sort evaluated results by confidence:
Collections.sort(evaluated, Collections.reverseOrder());
for (DoubleObjPair<Clustering<?>> pair : evaluated) {
// Attach parent relation (= sample) to the representative samples.
for (It<Relation<?>> it = hierarchy.iterParents(pair.second).filter(Relation.class); it.valid(); it.advance()) {
hierarchy.add(reps, it.get());
}
}
// Add the random samples below the representative results only:
if (keep) {
hierarchy.add(relation, samples);
} else {
hierarchy.removeSubtree(samples);
}
return c;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class Leader method run.
/**
* Run the leader clustering algorithm.
*
* @param relation Data set
* @return Clustering result
*/
public Clustering<PrototypeModel<O>> run(Relation<O> relation) {
RangeQuery<O> rq = relation.getRangeQuery(getDistanceFunction(), threshold);
ModifiableDBIDs seen = DBIDUtil.newHashSet(relation.size());
Clustering<PrototypeModel<O>> clustering = new Clustering<>("Prototype clustering", "prototype-clustering");
int queries = 0;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Leader clustering", relation.size(), LOG) : null;
for (DBIDIter it = relation.iterDBIDs(); it.valid() && seen.size() < relation.size(); it.advance()) {
if (seen.contains(it)) {
continue;
}
DoubleDBIDList res = rq.getRangeForDBID(it, threshold);
++queries;
ModifiableDBIDs ids = DBIDUtil.newArray(res.size());
for (DBIDIter cand = res.iter(); cand.valid(); cand.advance()) {
if (seen.add(cand)) {
LOG.incrementProcessed(prog);
ids.add(cand);
}
}
assert (ids.size() > 0 && ids.contains(it));
PrototypeModel<O> mod = new SimplePrototypeModel<>(relation.get(it));
clustering.addToplevelCluster(new Cluster<>(ids, mod));
}
LOG.statistics(new LongStatistic(this.getClass().getName() + ".queries", queries));
LOG.ensureCompleted(prog);
return clustering;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class SNNClustering method expandCluster.
/**
* DBSCAN-function expandCluster adapted to SNN criterion.
* <p/>
* <p/>
* Border-Objects become members of the first possible cluster.
*
* @param snnInstance shared nearest neighbors
* @param startObjectID potential seed of a new potential cluster
* @param objprog the progress object to report about the progress of
* clustering
*/
protected void expandCluster(SimilarityQuery<O> snnInstance, DBIDRef startObjectID, FiniteProgress objprog, IndefiniteProgress clusprog) {
ArrayModifiableDBIDs seeds = findSNNNeighbors(snnInstance, startObjectID);
// startObject is no core-object
if (seeds.size() < minpts) {
noise.add(startObjectID);
processedIDs.add(startObjectID);
if (objprog != null && clusprog != null) {
objprog.setProcessed(processedIDs.size(), LOG);
clusprog.setProcessed(resultList.size(), LOG);
}
return;
}
// try to expand the cluster
ModifiableDBIDs currentCluster = DBIDUtil.newArray();
for (DBIDIter seed = seeds.iter(); seed.valid(); seed.advance()) {
if (!processedIDs.contains(seed)) {
currentCluster.add(seed);
processedIDs.add(seed);
} else if (noise.contains(seed)) {
currentCluster.add(seed);
noise.remove(seed);
}
}
DBIDVar o = DBIDUtil.newVar();
while (seeds.size() > 0) {
seeds.pop(o);
ArrayModifiableDBIDs neighborhood = findSNNNeighbors(snnInstance, o);
if (neighborhood.size() >= minpts) {
for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
boolean inNoise = noise.contains(iter);
boolean unclassified = !processedIDs.contains(iter);
if (inNoise || unclassified) {
if (unclassified) {
seeds.add(iter);
}
currentCluster.add(iter);
processedIDs.add(iter);
if (inNoise) {
noise.remove(iter);
}
}
}
}
if (objprog != null && clusprog != null) {
objprog.setProcessed(processedIDs.size(), LOG);
int numClusters = currentCluster.size() > minpts ? resultList.size() + 1 : resultList.size();
clusprog.setProcessed(numClusters, LOG);
}
if (processedIDs.size() == snnInstance.getRelation().size() && noise.size() == 0) {
break;
}
}
if (currentCluster.size() >= minpts) {
resultList.add(currentCluster);
} else {
noise.addDBIDs(currentCluster);
noise.add(startObjectID);
processedIDs.add(startObjectID);
}
}
Aggregations