use of de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction in project elki by elki-project.
the class OUTRES method outresScore.
/**
* Main loop of OUTRES. Run for each object
*
* @param s start dimension
* @param subspace Current subspace
* @param id Current object ID
* @param kernel Kernel
* @return Score
*/
public double outresScore(final int s, long[] subspace, DBIDRef id, KernelDensityEstimator kernel) {
// Initial score is 1.0
double score = 1.0;
final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace);
MeanVariance meanv = new MeanVariance();
for (int i = s; i < kernel.dim; i++) {
if (BitsUtil.get(subspace, i)) {
// with i=0?
continue;
}
BitsUtil.setI(subspace, i);
df.setSelectedDimensions(subspace);
final double adjustedEps = kernel.adjustedEps(kernel.dim);
// Query with a larger window, to also get neighbors of neighbors
// Subspace euclidean is metric!
final double range = adjustedEps * 2.;
RangeQuery<V> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);
DoubleDBIDList neighc = rq.getRangeForDBID(id, range);
DoubleDBIDList neigh = refineRange(neighc, adjustedEps);
if (neigh.size() > 2) {
// Relevance test
if (relevantSubspace(subspace, neigh, kernel)) {
final double density = kernel.subspaceDensity(subspace, neigh);
// Compute mean and standard deviation for densities of neighbors.
meanv.reset();
for (DoubleDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
DoubleDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel);
meanv.put(kernel.subspaceDensity(subspace, n2));
}
final double deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev());
// High deviation:
if (deviation >= 1) {
score *= (density / deviation);
}
// Recursion
score *= outresScore(i + 1, subspace, id, kernel);
}
}
BitsUtil.clearI(subspace, i);
}
return score;
}
use of de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction in project elki by elki-project.
the class SOD method subspaceOutlierDegree.
/**
* Compute SOD score.
*
* @param queryObject Query object
* @param center Center vector
* @param weightVector Weight vector
* @return sod score
*/
private double subspaceOutlierDegree(V queryObject, double[] center, long[] weightVector) {
final int card = BitsUtil.cardinality(weightVector);
if (card == 0) {
return 0;
}
final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(weightVector);
double distance = df.distance(queryObject, DoubleVector.wrap(center));
// FIXME: defined and published as card, should be
distance /= card;
// sqrt(card), unfortunately
return distance;
}
use of de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction in project elki by elki-project.
the class FeatureBagging method run.
/**
* Run the algorithm on a data set.
*
* @param database Database context
* @param relation Relation to use
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<NumberVector> relation) {
final int dbdim = RelationUtil.dimensionality(relation);
final int mindim = dbdim >> 1;
final int maxdim = dbdim - 1;
final Random rand = rnd.getSingleThreadedRandom();
ArrayList<OutlierResult> results = new ArrayList<>(num);
{
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null;
for (int i = 0; i < num; i++) {
long[] dimset = randomSubspace(dbdim, mindim, maxdim, rand);
SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
LOF<NumberVector> lof = new LOF<>(k, df);
// run LOF and collect the result
OutlierResult result = lof.run(database, relation);
results.add(result);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
}
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
if (breadth) {
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
@SuppressWarnings("unchecked") Pair<DBIDIter, DoubleRelation>[] IDVectorOntoScoreVector = (Pair<DBIDIter, DoubleRelation>[]) new Pair[results.size()];
// Mapping score-sorted DBID-Iterators onto their corresponding scores.
// We need to initialize them now be able to iterate them "in parallel".
{
int i = 0;
for (OutlierResult r : results) {
IDVectorOntoScoreVector[i] = new Pair<DBIDIter, DoubleRelation>(r.getOrdering().order(relation.getDBIDs()).iter(), r.getScores());
i++;
}
}
// Iterating over the *lines* of the AS_t(i)-matrix.
for (int i = 0; i < relation.size(); i++) {
// Iterating over the elements of a line (breadth-first).
for (Pair<DBIDIter, DoubleRelation> pair : IDVectorOntoScoreVector) {
DBIDIter iter = pair.first;
// for every DBID).
if (iter.valid()) {
double score = pair.second.doubleValue(iter);
if (Double.isNaN(scores.doubleValue(iter))) {
scores.putDouble(iter, score);
minmax.put(score);
}
iter.advance();
} else {
LOG.warning("Incomplete result: Iterator does not contain |DB| DBIDs");
}
}
// Progress does not take the initial mapping into account.
LOG.incrementProcessed(cprog);
}
LOG.ensureCompleted(cprog);
} else {
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
double sum = 0.0;
for (OutlierResult r : results) {
final double s = r.getScores().doubleValue(iter);
if (!Double.isNaN(s)) {
sum += s;
}
}
scores.putDouble(iter, sum);
minmax.put(sum);
LOG.incrementProcessed(cprog);
}
LOG.ensureCompleted(cprog);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
DoubleRelation scoreres = new MaterializedDoubleRelation("Feature bagging", "fb-outlier", scores, relation.getDBIDs());
return new OutlierResult(meta, scoreres);
}
Aggregations