use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class LoOP method run.
/**
* Performs the LoOP algorithm on the given database.
*
* @param database Database to process
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress(5) : null;
Pair<KNNQuery<O>, KNNQuery<O>> pair = getKNNQueries(database, relation, stepprog);
KNNQuery<O> knnComp = pair.getFirst();
KNNQuery<O> knnReach = pair.getSecond();
// Assert we got something
if (knnComp == null) {
throw new AbortException("No kNN queries supported by database for comparison distance function.");
}
if (knnReach == null) {
throw new AbortException("No kNN queries supported by database for density estimation distance function.");
}
// FIXME: tie handling!
// Probabilistic distances
WritableDoubleDataStore pdists = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
LOG.beginStep(stepprog, 3, "Computing pdists");
computePDists(relation, knnReach, pdists);
// Compute PLOF values.
WritableDoubleDataStore plofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
LOG.beginStep(stepprog, 4, "Computing PLOF");
double nplof = computePLOFs(relation, knnComp, pdists, plofs);
// Normalize the outlier scores.
DoubleMinMax mm = new DoubleMinMax();
{
// compute LOOP_SCORE of each db object
LOG.beginStep(stepprog, 5, "Computing LoOP scores");
FiniteProgress progressLOOPs = LOG.isVerbose() ? new FiniteProgress("LoOP for objects", relation.size(), LOG) : null;
final double norm = 1. / (nplof * MathUtil.SQRT2);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double loop = NormalDistribution.erf((plofs.doubleValue(iditer) - 1.) * norm);
plofs.putDouble(iditer, loop);
mm.put(loop);
LOG.incrementProcessed(progressLOOPs);
}
LOG.ensureCompleted(progressLOOPs);
}
LOG.setCompleted(stepprog);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Probabilities", "loop-outlier", plofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore(mm.getMin(), mm.getMax(), 0.);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class KNNOutlier method run.
/**
* Runs the algorithm in the timed evaluation part.
*
* @param relation Data relation
*/
public OutlierResult run(Relation<O> relation) {
final DistanceQuery<O> distanceQuery = relation.getDistanceQuery(getDistanceFunction());
final KNNQuery<O> knnQuery = relation.getKNNQuery(distanceQuery, k);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), LOG) : null;
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore knno_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
// compute distance to the k nearest neighbor.
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
// distance to the kth nearest neighbor
// (assuming the query point is always included, with distance 0)
final double dkn = knnQuery.getKNNForDBID(it, k).getKNNDistance();
knno_score.putDouble(it, dkn);
minmax.put(dkn);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
DoubleRelation scoreres = new MaterializedDoubleRelation("kNN Outlier Score", "knn-outlier", knno_score, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class SOS method run.
/**
* Run the algorithm.
*
* @param relation data relation
* @return outlier detection result
*/
public OutlierResult run(Relation<O> relation) {
DistanceQuery<O> dq = relation.getDistanceQuery(getDistanceFunction());
final double logPerp = FastMath.log(perplexity);
ModifiableDoubleDBIDList dlist = DBIDUtil.newDistanceDBIDList(relation.size() - 1);
DoubleDBIDListMIter di = dlist.iter();
double[] p = new double[relation.size() - 1];
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("SOS scores", relation.size(), LOG) : null;
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB, 1.);
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
// Build sorted neighbors list.
dlist.clear();
for (DBIDIter i2 = relation.iterDBIDs(); i2.valid(); i2.advance()) {
if (DBIDUtil.equal(it, i2)) {
continue;
}
dlist.add(dq.distance(it, i2), i2);
}
dlist.sort();
// Compute affinities
computePi(it, di, p, perplexity, logPerp);
// Normalization factor:
double s = sumOfProbabilities(it, di, p);
if (s > 0) {
nominateNeighbors(it, di, p, 1. / s, scores);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// Find minimum and maximum.
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter it2 = relation.iterDBIDs(); it2.valid(); it2.advance()) {
minmax.put(scores.doubleValue(it2));
}
DoubleRelation scoreres = new MaterializedDoubleRelation("Stoachastic Outlier Selection", "sos-outlier", scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore(minmax.getMin(), minmax.getMax(), 0.);
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class HiCS method run.
/**
* Perform HiCS on a given database.
*
* @param relation the database
* @return The aggregated resulting scores that were assigned by the given
* outlier detection algorithm
*/
public OutlierResult run(Relation<V> relation) {
final DBIDs ids = relation.getDBIDs();
ArrayList<ArrayDBIDs> subspaceIndex = buildOneDimIndexes(relation);
Set<HiCSSubspace> subspaces = calculateSubspaces(relation, subspaceIndex, rnd.getSingleThreadedRandom());
if (LOG.isVerbose()) {
LOG.verbose("Number of high-contrast subspaces: " + subspaces.size());
}
List<DoubleRelation> results = new ArrayList<>();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Calculating Outlier scores for high Contrast subspaces", subspaces.size(), LOG) : null;
// relation instead of SubspaceEuclideanDistanceFunction?)
for (HiCSSubspace dimset : subspaces) {
if (LOG.isVerbose()) {
LOG.verbose("Performing outlier detection in subspace " + dimset);
}
ProxyDatabase pdb = new ProxyDatabase(ids);
pdb.addRelation(new ProjectedView<>(relation, new NumericalFeatureSelection<V>(dimset)));
// run LOF and collect the result
OutlierResult result = outlierAlgorithm.run(pdb);
results.add(result.getScores());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double sum = 0.0;
for (DoubleRelation r : results) {
final double s = r.doubleValue(iditer);
if (!Double.isNaN(s)) {
sum += s;
}
}
scores.putDouble(iditer, sum);
minmax.put(sum);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
DoubleRelation scoreres = new MaterializedDoubleRelation("HiCS", "HiCS-outlier", scores, relation.getDBIDs());
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class HiCS method calculateContrast.
/**
* Calculates the actual contrast of a given subspace.
*
* @param relation Relation to process
* @param subspace Subspace
* @param subspaceIndex Subspace indexes
*/
private void calculateContrast(Relation<? extends NumberVector> relation, HiCSSubspace subspace, ArrayList<ArrayDBIDs> subspaceIndex, Random random) {
final int card = subspace.cardinality();
final double alpha1 = FastMath.pow(alpha, (1.0 / card));
final int windowsize = (int) (relation.size() * alpha1);
final FiniteProgress prog = LOG.isDebugging() ? new FiniteProgress("Monte-Carlo iterations", m, LOG) : null;
int retries = 0;
double deviationSum = 0.0;
for (int i = 0; i < m; i++) {
// Choose a random set bit.
int chosen = -1;
for (int tmp = random.nextInt(card); tmp >= 0; tmp--) {
chosen = subspace.nextSetBit(chosen + 1);
}
// initialize sample
DBIDs conditionalSample = relation.getDBIDs();
for (int j = subspace.nextSetBit(0); j >= 0; j = subspace.nextSetBit(j + 1)) {
if (j == chosen) {
continue;
}
ArrayDBIDs sortedIndices = subspaceIndex.get(j);
ArrayModifiableDBIDs indexBlock = DBIDUtil.newArray(windowsize);
// initialize index block
DBIDArrayIter iter = sortedIndices.iter();
iter.seek(random.nextInt(relation.size() - windowsize));
for (int k = 0; k < windowsize; k++, iter.advance()) {
// select index block
indexBlock.add(iter);
}
conditionalSample = DBIDUtil.intersection(conditionalSample, indexBlock);
}
if (conditionalSample.size() < 10) {
retries++;
if (LOG.isDebugging()) {
LOG.debug("Sample size very small. Retry no. " + retries);
}
if (retries >= MAX_RETRIES) {
LOG.warning("Too many retries, for small samples: " + retries);
} else {
i--;
continue;
}
}
// Project conditional set
double[] sampleValues = new double[conditionalSample.size()];
{
int l = 0;
for (DBIDIter iter = conditionalSample.iter(); iter.valid(); iter.advance()) {
sampleValues[l] = relation.get(iter).doubleValue(chosen);
l++;
}
}
// Project full set
double[] fullValues = new double[relation.size()];
{
int l = 0;
for (DBIDIter iter = subspaceIndex.get(chosen).iter(); iter.valid(); iter.advance()) {
fullValues[l] = relation.get(iter).doubleValue(chosen);
l++;
}
}
double contrast = statTest.deviation(fullValues, sampleValues);
if (Double.isNaN(contrast)) {
i--;
LOG.warning("Contrast was NaN");
continue;
}
deviationSum += contrast;
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
subspace.contrast = deviationSum / m;
}
Aggregations