use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.
the class LoOP method run.
/**
* Performs the LoOP algorithm on the given database.
*
* @param database Database to process
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(5) : null;
Pair<KNNQuery<O>, KNNQuery<O>> pair = getKNNQueries(database, relation, stepprog);
KNNQuery<O> knnComp = pair.getFirst();
KNNQuery<O> knnReach = pair.getSecond();
// Assert we got something
if (knnComp == null) {
throw new AbortException("No kNN queries supported by database for comparison distance function.");
}
if (knnReach == null) {
throw new AbortException("No kNN queries supported by database for density estimation distance function.");
}
// FIXME: tie handling!
// Probabilistic distances
WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
LOG.beginStep(stepprog, 3, "Computing pdists");
computePDists(relation, knnReach, pdists);
// Compute PLOF values.
WritableDoubleDataStore plofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
LOG.beginStep(stepprog, 4, "Computing PLOF");
double nplof = computePLOFs(relation, knnComp, pdists, plofs);
// Normalize the outlier scores.
DoubleMinMax mm = new DoubleMinMax();
{
// compute LOOP_SCORE of each db object
LOG.beginStep(stepprog, 5, "Computing LoOP scores");
FiniteProgress progressLOOPs = LOG.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), LOG) : null;
final double norm = 1. / (nplof * MathUtil.SQRT2);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double loop = NormalDistribution.erf((plofs.doubleValue(iditer) - 1.) * norm);
plofs.putDouble(iditer, loop);
mm.put(loop);
LOG.incrementProcessed(progressLOOPs);
}
LOG.ensureCompleted(progressLOOPs);
}
LOG.setCompleted(stepprog);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Probabilities", "loop-outlier", plofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore(mm.getMin(), mm.getMax(), 0.);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.
the class SimpleKernelDensityLOF method run.
/**
* Run the naive kernel density LOF algorithm.
*
* @param database Database to query
* @param relation Data to process
* @return LOF outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("KernelDensityLOF", 3) : null;
final int dim = RelationUtil.dimensionality(relation);
DBIDs ids = relation.getDBIDs();
LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
// Compute LRDs
LOG.beginStep(stepprog, 2, "Computing densities.");
WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final KNNList neighbors = knnq.getKNNForDBID(it, k);
int count = 0;
double sum = 0.0;
// Fast version for double distances
for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(neighbor, it)) {
continue;
}
double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance();
if (max == 0) {
sum = Double.POSITIVE_INFINITY;
break;
}
final double v = neighbor.doubleValue() / max;
sum += kernel.density(v) / MathUtil.powi(max, dim);
count++;
}
final double density = count > 0 ? sum / count : 0.;
dens.putDouble(it, density);
LOG.incrementProcessed(densProgress);
}
LOG.ensureCompleted(densProgress);
// compute LOF_SCORE of each db object
LOG.beginStep(stepprog, 3, "Computing KLOFs.");
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("KLOF_SCORE for objects", ids.size(), LOG) : null;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final double lrdp = dens.doubleValue(it);
final double lof;
if (lrdp > 0) {
final KNNList neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
if (DBIDUtil.equal(neighbor, it)) {
continue;
}
sum += dens.doubleValue(neighbor);
count++;
}
lof = (lrdp == Double.POSITIVE_INFINITY) ? (sum == Double.POSITIVE_INFINITY ? 1 : 0.) : sum / (count * lrdp);
} else {
lof = 1.0;
}
lofs.putDouble(it, lof);
// update minimum and maximum
lofminmax.put(lof);
LOG.incrementProcessed(progressLOFs);
}
LOG.ensureCompleted(progressLOFs);
LOG.setCompleted(stepprog);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", lofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
}
use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.
the class LSDBC method run.
/**
* Run the LSDBC algorithm
*
* @param database Database to process
* @param relation Data relation
* @return Clustering result
*/
public Clustering<Model> run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LSDBC", 3) : null;
final int dim = RelationUtil.dimensionality(relation);
final double factor = FastMath.pow(2., alpha / dim);
final DBIDs ids = relation.getDBIDs();
LOG.beginStep(stepprog, 1, "Materializing kNN neighborhoods");
KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
LOG.beginStep(stepprog, 2, "Sorting by density");
WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
fillDensities(knnq, ids, dens);
ArrayModifiableDBIDs sids = DBIDUtil.newArray(ids);
sids.sort(new DataStoreUtil.AscendingByDoubleDataStore(dens));
LOG.beginStep(stepprog, 3, "Computing clusters");
// Setup progress logging
final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("LSDBC Clustering", ids.size(), LOG) : null;
final IndefiniteProgress clusprogress = LOG.isVerbose() ? new IndefiniteProgress("Number of clusters found", LOG) : null;
// (Temporary) store the cluster ID assigned.
final WritableIntegerDataStore clusterids = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_TEMP, UNPROCESSED);
// Note: these are not exact, as objects may be stolen from noise.
final IntArrayList clustersizes = new IntArrayList();
// Unprocessed dummy value.
clustersizes.add(0);
// Noise counter.
clustersizes.add(0);
// Implementation Note: using Integer objects should result in
// reduced memory use in the HashMap!
int clusterid = NOISE + 1;
// Iterate over all objects in the database.
for (DBIDIter id = sids.iter(); id.valid(); id.advance()) {
// Skip already processed ids.
if (clusterids.intValue(id) != UNPROCESSED) {
continue;
}
// Evaluate Neighborhood predicate
final KNNList neighbors = knnq.getKNNForDBID(id, k);
// Evaluate Core-Point predicate:
if (isLocalMaximum(neighbors.getKNNDistance(), neighbors, dens)) {
double mindens = factor * neighbors.getKNNDistance();
clusterids.putInt(id, clusterid);
clustersizes.add(expandCluster(clusterid, clusterids, knnq, neighbors, mindens, progress));
// start next cluster on next iteration.
++clusterid;
if (clusprogress != null) {
clusprogress.setProcessed(clusterid, LOG);
}
} else {
// otherwise, it's a noise point
clusterids.putInt(id, NOISE);
clustersizes.set(NOISE, clustersizes.getInt(NOISE) + 1);
}
// We've completed this element
LOG.incrementProcessed(progress);
}
// Finish progress logging.
LOG.ensureCompleted(progress);
LOG.setCompleted(clusprogress);
LOG.setCompleted(stepprog);
// Transform cluster ID mapping into a clustering result:
ArrayList<ArrayModifiableDBIDs> clusterlists = new ArrayList<>(clusterid);
// add storage containers for clusters
for (int i = 0; i < clustersizes.size(); i++) {
clusterlists.add(DBIDUtil.newArray(clustersizes.getInt(i)));
}
// do the actual inversion
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
// Negative values are non-core points:
int cid = clusterids.intValue(id);
int cluster = Math.abs(cid);
clusterlists.get(cluster).add(id);
}
clusterids.destroy();
Clustering<Model> result = new Clustering<>("LSDBC", "lsdbc-clustering");
for (int cid = NOISE; cid < clusterlists.size(); cid++) {
boolean isNoise = (cid == NOISE);
Cluster<Model> c;
c = new Cluster<Model>(clusterlists.get(cid), isNoise, ClusterModel.CLUSTER);
result.addToplevelCluster(c);
}
return result;
}
use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.
the class SUBCLU method run.
/**
* Performs the SUBCLU algorithm on the given database.
*
* @param relation Relation to process
* @return Clustering result
*/
public Clustering<SubspaceModel> run(Relation<V> relation) {
final int dimensionality = RelationUtil.dimensionality(relation);
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(dimensionality) : null;
// Generate all 1-dimensional clusters
LOG.beginStep(stepprog, 1, "Generate all 1-dimensional clusters.");
// mapping of dimensionality to set of subspaces
HashMap<Integer, List<Subspace>> subspaceMap = new HashMap<>();
// list of 1-dimensional subspaces containing clusters
List<Subspace> s_1 = new ArrayList<>();
subspaceMap.put(0, s_1);
// mapping of subspaces to list of clusters
TreeMap<Subspace, List<Cluster<Model>>> clusterMap = new TreeMap<>(new Subspace.DimensionComparator());
for (int d = 0; d < dimensionality; d++) {
Subspace currentSubspace = new Subspace(d);
List<Cluster<Model>> clusters = runDBSCAN(relation, null, currentSubspace);
if (LOG.isDebuggingFiner()) {
StringBuilder msg = new StringBuilder();
msg.append('\n').append(clusters.size()).append(" clusters in subspace ").append(currentSubspace.dimensonsToString()).append(": \n");
for (Cluster<Model> cluster : clusters) {
msg.append(" " + cluster.getIDs() + "\n");
}
LOG.debugFiner(msg.toString());
}
if (!clusters.isEmpty()) {
s_1.add(currentSubspace);
clusterMap.put(currentSubspace, clusters);
}
}
// Generate (d+1)-dimensional clusters from d-dimensional clusters
for (int d = 0; d < dimensionality - 1; d++) {
if (stepprog != null) {
stepprog.beginStep(d + 2, "Generate " + (d + 2) + "-dimensional clusters from " + (d + 1) + "-dimensional clusters.", LOG);
}
List<Subspace> subspaces = subspaceMap.get(d);
if (subspaces == null || subspaces.isEmpty()) {
if (stepprog != null) {
for (int dim = d + 1; dim < dimensionality - 1; dim++) {
stepprog.beginStep(dim + 2, "Generation of" + (dim + 2) + "-dimensional clusters not applicable, because no more " + (d + 2) + "-dimensional subspaces found.", LOG);
}
}
break;
}
List<Subspace> candidates = generateSubspaceCandidates(subspaces);
List<Subspace> s_d = new ArrayList<>();
for (Subspace candidate : candidates) {
Subspace bestSubspace = bestSubspace(subspaces, candidate, clusterMap);
if (LOG.isDebuggingFine()) {
LOG.debugFine("best subspace of " + candidate.dimensonsToString() + ": " + bestSubspace.dimensonsToString());
}
List<Cluster<Model>> bestSubspaceClusters = clusterMap.get(bestSubspace);
List<Cluster<Model>> clusters = new ArrayList<>();
for (Cluster<Model> cluster : bestSubspaceClusters) {
List<Cluster<Model>> candidateClusters = runDBSCAN(relation, cluster.getIDs(), candidate);
if (!candidateClusters.isEmpty()) {
clusters.addAll(candidateClusters);
}
}
if (LOG.isDebuggingFine()) {
StringBuilder msg = new StringBuilder();
msg.append(clusters.size() + " cluster(s) in subspace " + candidate + ": \n");
for (Cluster<Model> c : clusters) {
msg.append(" " + c.getIDs() + "\n");
}
LOG.debugFine(msg.toString());
}
if (!clusters.isEmpty()) {
s_d.add(candidate);
clusterMap.put(candidate, clusters);
}
}
if (!s_d.isEmpty()) {
subspaceMap.put(d + 1, s_d);
}
}
// build result
int numClusters = 1;
result = new Clustering<>("SUBCLU clustering", "subclu-clustering");
for (Subspace subspace : clusterMap.descendingKeySet()) {
List<Cluster<Model>> clusters = clusterMap.get(subspace);
for (Cluster<Model> cluster : clusters) {
Cluster<SubspaceModel> newCluster = new Cluster<>(cluster.getIDs());
newCluster.setModel(new SubspaceModel(subspace, Centroid.make(relation, cluster.getIDs()).getArrayRef()));
newCluster.setName("cluster_" + numClusters++);
result.addToplevelCluster(newCluster);
}
}
LOG.setCompleted(stepprog);
return result;
}
use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.
the class KNNKernelDensityMinimaClustering method run.
/**
* Run the clustering algorithm on a data relation.
*
* @param relation Relation
* @return Clustering result
*/
public Clustering<ClusterModel> run(Relation<V> relation) {
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relation.getDBIDs());
final int size = ids.size();
// Sort by the sole dimension
ids.sort(new VectorUtil.SortDBIDsBySingleDimension(relation, dim));
// Density storage.
WritableDoubleDataStore density = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
DBIDArrayIter iter = ids.iter(), iter2 = ids.iter();
StepProgress sprog = LOG.isVerbose() ? new StepProgress("Clustering steps", 2) : null;
LOG.beginStep(sprog, 1, "Kernel density estimation.");
{
double[] scratch = new double[2 * k];
iter.seek(0);
for (int i = 0; i < size; i++, iter.advance()) {
// Current value.
final double curv = relation.get(iter).doubleValue(dim);
final int pre = Math.max(i - k, 0), prek = i - pre;
final int pos = Math.min(i + k, size - 1), posk = pos - i;
iter2.seek(pre);
for (int j = 0; j < prek; j++, iter2.advance()) {
scratch[j] = curv - relation.get(iter2).doubleValue(dim);
}
assert (iter2.getOffset() == i);
iter2.advance();
for (int j = 0; j < posk; j++, iter2.advance()) {
scratch[prek + j] = relation.get(iter2).doubleValue(dim) - curv;
}
assert (prek + posk >= k);
double kdist = QuickSelect.quickSelect(scratch, 0, prek + posk, k);
switch(mode) {
case BALLOON:
{
double dens = 0.;
if (kdist > 0.) {
for (int j = 0; j < prek + posk; j++) {
dens += kernel.density(scratch[j] / kdist);
}
} else {
dens = Double.POSITIVE_INFINITY;
}
assert (iter.getOffset() == i);
density.putDouble(iter, dens);
break;
}
case SAMPLE:
{
if (kdist > 0.) {
iter2.seek(pre);
for (int j = 0; j < prek; j++, iter2.advance()) {
double delta = curv - relation.get(iter2).doubleValue(dim);
density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
}
assert (iter2.getOffset() == i);
iter2.advance();
for (int j = 0; j < posk; j++, iter2.advance()) {
double delta = relation.get(iter2).doubleValue(dim) - curv;
density.putDouble(iter2, density.doubleValue(iter2) + kernel.density(delta / kdist));
}
} else {
iter2.seek(pre);
for (int j = 0; j < prek; j++, iter2.advance()) {
double delta = curv - relation.get(iter2).doubleValue(dim);
if (!(delta > 0.)) {
density.putDouble(iter2, Double.POSITIVE_INFINITY);
}
}
assert (iter2.getOffset() == i);
iter2.advance();
for (int j = 0; j < posk; j++, iter2.advance()) {
double delta = relation.get(iter2).doubleValue(dim) - curv;
if (!(delta > 0.)) {
density.putDouble(iter2, Double.POSITIVE_INFINITY);
}
}
}
break;
}
default:
throw new UnsupportedOperationException("Unknown mode specified.");
}
}
}
LOG.beginStep(sprog, 2, "Local minima detection.");
Clustering<ClusterModel> clustering = new Clustering<>("onedimensional-kde-clustering", "One-Dimensional clustering using kernel density estimation.");
{
double[] scratch = new double[2 * minwindow + 1];
int begin = 0;
int halfw = (minwindow + 1) >> 1;
iter.seek(0);
// Fill initial buffer.
for (int i = 0; i < size; i++, iter.advance()) {
final int m = i % scratch.length, t = (i - minwindow - 1) % scratch.length;
scratch[m] = density.doubleValue(iter);
if (i > scratch.length) {
double min = Double.POSITIVE_INFINITY;
for (int j = 0; j < scratch.length; j++) {
if (j != t && scratch[j] < min) {
min = scratch[j];
}
}
// Local minimum:
if (scratch[t] < min) {
int end = i - minwindow + 1;
{
// Test on which side the kNN is
iter2.seek(end);
double curv = relation.get(iter2).doubleValue(dim);
iter2.seek(end - halfw);
double left = relation.get(iter2).doubleValue(dim) - curv;
iter2.seek(end + halfw);
double right = curv - relation.get(iter2).doubleValue(dim);
if (left < right) {
end++;
}
}
iter2.seek(begin);
ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
for (int j = 0; j < end - begin; j++, iter2.advance()) {
cids.add(iter2);
}
clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
begin = end;
}
}
}
// Extract last cluster
int end = size;
iter2.seek(begin);
ArrayModifiableDBIDs cids = DBIDUtil.newArray(end - begin);
for (int j = 0; j < end - begin; j++, iter2.advance()) {
cids.add(iter2);
}
clustering.addToplevelCluster(new Cluster<>(cids, ClusterModel.CLUSTER));
}
LOG.ensureCompleted(sprog);
return clustering;
}
Aggregations