use of de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore in project elki by elki-project.
the class TrivialGeneratedOutlier method run.
/**
* Run the algorithm
*
* @param models Model relation
* @param vecs Vector relation
* @param labels Label relation
* @return Outlier result
*/
public OutlierResult run(Relation<Model> models, Relation<NumberVector> vecs, Relation<?> labels) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT);
HashSet<GeneratorSingleCluster> generators = new HashSet<>();
for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
Model model = models.get(iditer);
if (model instanceof GeneratorSingleCluster) {
generators.add((GeneratorSingleCluster) model);
}
}
if (generators.isEmpty()) {
LOG.warning("No generator models found for dataset - all points will be considered outliers.");
}
for (GeneratorSingleCluster gen : generators) {
for (int i = 0; i < gen.getDim(); i++) {
Distribution dist = gen.getDistribution(i);
if (!(dist instanceof NormalDistribution)) {
throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
}
}
}
for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = 1.;
double[] v = vecs.get(iditer).toArray();
for (GeneratorSingleCluster gen : generators) {
double[] tv = v;
// Transform backwards
if (gen.getTransformation() != null) {
tv = gen.getTransformation().applyInverse(v);
}
final int dim = tv.length;
double lensq = 0.0;
int norm = 0;
for (int i = 0; i < dim; i++) {
Distribution dist = gen.getDistribution(i);
if (dist instanceof NormalDistribution) {
NormalDistribution d = (NormalDistribution) dist;
double delta = (tv[i] - d.getMean()) / d.getStddev();
lensq += delta * delta;
norm += 1;
} else {
throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
}
}
if (norm > 0.) {
// The squared distances are ChiSquared distributed
score = Math.min(score, ChiSquaredDistribution.cdf(lensq, norm));
} else {
score = 0.;
}
}
if (expect < 1) {
score = expect * score / (1 - score + expect);
}
scores.putDouble(iditer, score);
}
DoubleRelation scoreres = new MaterializedDoubleRelation("Model outlier scores", "model-outlier", scores, models.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore(0., 1.);
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore in project elki by elki-project.
the class SimpleCOP method run.
public OutlierResult run(Database database, Relation<V> data) throws IllegalStateException {
KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(data, getDistanceFunction(), k + 1);
DBIDs ids = data.getDBIDs();
WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableDataStore<double[]> cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[].class);
WritableDataStore<double[][]> cop_datav = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[][].class);
WritableIntegerDataStore cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
WritableDataStore<CorrelationAnalysisSolution<?>> cop_sol = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, CorrelationAnalysisSolution.class);
{
// compute neighbors of each db object
FiniteProgress progressLocalPCA = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", data.size(), LOG) : null;
double sqrt2 = MathUtil.SQRT2;
for (DBIDIter id = data.iterDBIDs(); id.valid(); id.advance()) {
KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs nids = DBIDUtil.newArray(neighbors);
nids.remove(id);
// TODO: do we want to use the query point as centroid?
CorrelationAnalysisSolution<V> depsol = dependencyDerivator.generateModel(data, nids);
double stddev = depsol.getStandardDeviation();
double distance = depsol.distance(data.get(id));
double prob = NormalDistribution.erf(distance / (stddev * sqrt2));
cop_score.putDouble(id, prob);
cop_err_v.put(id, times(depsol.errorVector(data.get(id)), -1));
double[][] datav = depsol.dataProjections(data.get(id));
cop_datav.put(id, datav);
cop_dim.putInt(id, depsol.getCorrelationDimensionality());
cop_sol.put(id, depsol);
LOG.incrementProcessed(progressLocalPCA);
}
LOG.ensureCompleted(progressLocalPCA);
}
// combine results.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Original Correlation Outlier Probabilities", "origcop-outlier", cop_score, ids);
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
// extra results
result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP.COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
result.addChildResult(new MaterializedRelation<>("Error vectors", COP.COP_ERRORVEC, TypeUtil.DOUBLE_ARRAY, cop_err_v, ids));
result.addChildResult(new MaterializedRelation<>("Data vectors", "cop-datavec", TypeUtil.MATRIX, cop_datav, ids));
result.addChildResult(new MaterializedRelation<>("Correlation analysis", "cop-sol", new SimpleTypeInformation<CorrelationAnalysisSolution<?>>(CorrelationAnalysisSolution.class), cop_sol, ids));
return result;
}
use of de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore in project elki by elki-project.
the class LoOP method run.
/**
* Performs the LoOP algorithm on the given database.
*
* @param database Database to process
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(5) : null;
Pair<KNNQuery<O>, KNNQuery<O>> pair = getKNNQueries(database, relation, stepprog);
KNNQuery<O> knnComp = pair.getFirst();
KNNQuery<O> knnReach = pair.getSecond();
// Assert we got something
if (knnComp == null) {
throw new AbortException("No kNN queries supported by database for comparison distance function.");
}
if (knnReach == null) {
throw new AbortException("No kNN queries supported by database for density estimation distance function.");
}
// FIXME: tie handling!
// Probabilistic distances
WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
LOG.beginStep(stepprog, 3, "Computing pdists");
computePDists(relation, knnReach, pdists);
// Compute PLOF values.
WritableDoubleDataStore plofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
LOG.beginStep(stepprog, 4, "Computing PLOF");
double nplof = computePLOFs(relation, knnComp, pdists, plofs);
// Normalize the outlier scores.
DoubleMinMax mm = new DoubleMinMax();
{
// compute LOOP_SCORE of each db object
LOG.beginStep(stepprog, 5, "Computing LoOP scores");
FiniteProgress progressLOOPs = LOG.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), LOG) : null;
final double norm = 1. / (nplof * MathUtil.SQRT2);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double loop = NormalDistribution.erf((plofs.doubleValue(iditer) - 1.) * norm);
plofs.putDouble(iditer, loop);
mm.put(loop);
LOG.incrementProcessed(progressLOOPs);
}
LOG.ensureCompleted(progressLOOPs);
}
LOG.setCompleted(stepprog);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Probabilities", "loop-outlier", plofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore(mm.getMin(), mm.getMax(), 0.);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore in project elki by elki-project.
the class SOS method run.
/**
* Run the algorithm.
*
* @param relation data relation
* @return outlier detection result
*/
public OutlierResult run(Relation<O> relation) {
DistanceQuery<O> dq = relation.getDistanceQuery(getDistanceFunction());
final double logPerp = FastMath.log(perplexity);
ModifiableDoubleDBIDList dlist = DBIDUtil.newDistanceDBIDList(relation.size() - 1);
DoubleDBIDListMIter di = dlist.iter();
double[] p = new double[relation.size() - 1];
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("SOS scores", relation.size(), LOG) : null;
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB, 1.);
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
// Build sorted neighbors list.
dlist.clear();
for (DBIDIter i2 = relation.iterDBIDs(); i2.valid(); i2.advance()) {
if (DBIDUtil.equal(it, i2)) {
continue;
}
dlist.add(dq.distance(it, i2), i2);
}
dlist.sort();
// Compute affinities
computePi(it, di, p, perplexity, logPerp);
// Normalization factor:
double s = sumOfProbabilities(it, di, p);
if (s > 0) {
nominateNeighbors(it, di, p, 1. / s, scores);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// Find minimum and maximum.
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter it2 = relation.iterDBIDs(); it2.valid(); it2.advance()) {
minmax.put(scores.doubleValue(it2));
}
DoubleRelation scoreres = new MaterializedDoubleRelation("Stoachastic Outlier Selection", "sos-outlier", scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore(minmax.getMin(), minmax.getMax(), 0.);
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore in project elki by elki-project.
the class ByLabelOutlier method run.
/**
* Run the algorithm
*
* @param relation Relation to process.
* @return Result
*/
public OutlierResult run(Relation<?> relation) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
String label = relation.get(iditer).toString();
final double score = (pattern.matcher(label).matches()) ? 1 : 0;
scores.putDouble(iditer, score);
}
DoubleRelation scoreres = new MaterializedDoubleRelation("By label outlier scores", "label-outlier", scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore();
return new OutlierResult(meta, scoreres);
}
Aggregations