use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.
the class SilhouetteOutlierDetection method run.
@Override
public OutlierResult run(Database database) {
Relation<O> relation = database.getRelation(getDistanceFunction().getInputTypeRestriction());
DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
// TODO: improve ELKI api to ensure we're using the same DBIDs!
Clustering<?> c = clusterer.run(database);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
DoubleMinMax mm = new DoubleMinMax();
List<? extends Cluster<?>> clusters = c.getAllClusters();
for (Cluster<?> cluster : clusters) {
if (cluster.size() <= 1 || cluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
case TREAT_NOISE_AS_SINGLETONS:
// As suggested in Rousseeuw, we use 0 for singletons.
for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
scores.put(iter, 0.);
}
mm.put(0.);
continue;
case MERGE_NOISE:
// Treat as cluster below
break;
}
}
ArrayDBIDs ids = DBIDUtil.ensureArray(cluster.getIDs());
// temporary storage.
double[] as = new double[ids.size()];
DBIDArrayIter it1 = ids.iter(), it2 = ids.iter();
for (it1.seek(0); it1.valid(); it1.advance()) {
// a: In-cluster distances
// Already computed distances
double a = as[it1.getOffset()];
for (it2.seek(it1.getOffset() + 1); it2.valid(); it2.advance()) {
final double dist = dq.distance(it1, it2);
a += dist;
as[it2.getOffset()] += dist;
}
a /= (ids.size() - 1);
// b: other clusters:
double min = Double.POSITIVE_INFINITY;
for (Cluster<?> ocluster : clusters) {
if (ocluster == /* yes, reference identity */
cluster) {
continue;
}
if (ocluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
continue;
case MERGE_NOISE:
// No special treatment
break;
case TREAT_NOISE_AS_SINGLETONS:
// Treat noise cluster as singletons:
for (DBIDIter it3 = ocluster.getIDs().iter(); it3.valid(); it3.advance()) {
double dist = dq.distance(it1, it3);
if (dist < min) {
min = dist;
}
}
continue;
}
}
final DBIDs oids = ocluster.getIDs();
double b = 0.;
for (DBIDIter it3 = oids.iter(); it3.valid(); it3.advance()) {
b += dq.distance(it1, it3);
}
b /= oids.size();
if (b < min) {
min = b;
}
}
final double score = (min - a) / Math.max(min, a);
scores.put(it1, score);
mm.put(score);
}
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Silhouette Coefficients", "silhouette-outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), -1., 1., .5);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.
the class OutRankS1 method run.
@Override
public OutlierResult run(Database database) {
DBIDs ids = database.getRelation(TypeUtil.ANY).getDBIDs();
// Run the primary algorithm
Clustering<? extends SubspaceModel> clustering = clusteralg.run(database);
WritableDoubleDataStore score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
score.putDouble(iter, 0);
}
int maxdim = 0, maxsize = 0;
// Find maximum dimensionality and cluster size
for (Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
maxsize = Math.max(maxsize, cluster.size());
maxdim = Math.max(maxdim, BitsUtil.cardinality(cluster.getModel().getDimensions()));
}
// Iterate over all clusters:
DoubleMinMax minmax = new DoubleMinMax();
for (Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
double relsize = cluster.size() / (double) maxsize;
double reldim = BitsUtil.cardinality(cluster.getModel().getDimensions()) / (double) maxdim;
// Process objects in the cluster
for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
double newscore = score.doubleValue(iter) + alpha * relsize + (1 - alpha) * reldim;
score.putDouble(iter, newscore);
minmax.put(newscore);
}
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("OutRank-S1", "OUTRANK_S1", score, ids);
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
OutlierResult res = new OutlierResult(meta, scoreResult);
res.addChildResult(clustering);
return res;
}
use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.
the class KMeansOutlierDetection method run.
/**
* Run the outlier detection algorithm.
*
* @param database Database
* @param relation Relation
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<O> relation) {
DistanceFunction<? super O> df = clusterer.getDistanceFunction();
DistanceQuery<O> dq = database.getDistanceQuery(relation, df);
// TODO: improve ELKI api to ensure we're using the same DBIDs!
Clustering<?> c = clusterer.run(database, relation);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
DoubleMinMax mm = new DoubleMinMax();
@SuppressWarnings("unchecked") NumberVector.Factory<O> factory = (NumberVector.Factory<O>) RelationUtil.assumeVectorField(relation).getFactory();
List<? extends Cluster<?>> clusters = c.getAllClusters();
for (Cluster<?> cluster : clusters) {
// FIXME: use a primitive distance function on number vectors instead.
O mean = factory.newNumberVector(ModelUtil.getPrototype(cluster.getModel(), relation));
for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
double dist = dq.distance(mean, iter);
scores.put(iter, dist);
mm.put(dist);
}
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("KMeans outlier scores", "kmeans-outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.
the class SimpleKernelDensityLOFTest method testLDF.
@Test
public void testLDF() {
Database db = makeSimpleDatabase(UNITTEST + "outlier-axis-subspaces-6d.ascii", 1345);
OutlierResult result = //
new ELKIBuilder<SimpleKernelDensityLOF<DoubleVector>>(SimpleKernelDensityLOF.class).with(LOF.Parameterizer.K_ID, //
20).with(SimpleKernelDensityLOF.Parameterizer.KERNEL_ID, //
BiweightKernelDensityFunction.class).build().run(db);
testAUC(db, "Noise", result, 0.87192156);
testSingleScore(result, 1293, 12.271188);
}
use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.
the class VarianceOfVolumeTest method testVOV.
@Test
public void testVOV() {
Database db = makeSimpleDatabase(UNITTEST + "outlier-axis-subspaces-6d.ascii", 1345);
OutlierResult result = //
new ELKIBuilder<VarianceOfVolume<DoubleVector>>(VarianceOfVolume.class).with(VarianceOfVolume.Parameterizer.K_ID, //
10).build().run(db);
testSingleScore(result, 1293, 2.0733100852601836e13);
testAUC(db, "Noise", result, 0.9306946778);
}
Aggregations