use of de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta in project elki by elki-project.
the class FeatureBagging method run.
/**
* Run the algorithm on a data set.
*
* @param database Database context
* @param relation Relation to use
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<NumberVector> relation) {
final int dbdim = RelationUtil.dimensionality(relation);
final int mindim = dbdim >> 1;
final int maxdim = dbdim - 1;
final Random rand = rnd.getSingleThreadedRandom();
ArrayList<OutlierResult> results = new ArrayList<>(num);
{
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null;
for (int i = 0; i < num; i++) {
long[] dimset = randomSubspace(dbdim, mindim, maxdim, rand);
SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
LOF<NumberVector> lof = new LOF<>(k, df);
// run LOF and collect the result
OutlierResult result = lof.run(database, relation);
results.add(result);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
}
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
if (breadth) {
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
@SuppressWarnings("unchecked") Pair<DBIDIter, DoubleRelation>[] IDVectorOntoScoreVector = (Pair<DBIDIter, DoubleRelation>[]) new Pair[results.size()];
// Mapping score-sorted DBID-Iterators onto their corresponding scores.
// We need to initialize them now be able to iterate them "in parallel".
{
int i = 0;
for (OutlierResult r : results) {
IDVectorOntoScoreVector[i] = new Pair<DBIDIter, DoubleRelation>(r.getOrdering().order(relation.getDBIDs()).iter(), r.getScores());
i++;
}
}
// Iterating over the *lines* of the AS_t(i)-matrix.
for (int i = 0; i < relation.size(); i++) {
// Iterating over the elements of a line (breadth-first).
for (Pair<DBIDIter, DoubleRelation> pair : IDVectorOntoScoreVector) {
DBIDIter iter = pair.first;
// for every DBID).
if (iter.valid()) {
double score = pair.second.doubleValue(iter);
if (Double.isNaN(scores.doubleValue(iter))) {
scores.putDouble(iter, score);
minmax.put(score);
}
iter.advance();
} else {
LOG.warning("Incomplete result: Iterator does not contain |DB| DBIDs");
}
}
// Progress does not take the initial mapping into account.
LOG.incrementProcessed(cprog);
}
LOG.ensureCompleted(cprog);
} else {
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
double sum = 0.0;
for (OutlierResult r : results) {
final double s = r.getScores().doubleValue(iter);
if (!Double.isNaN(s)) {
sum += s;
}
}
scores.putDouble(iter, sum);
minmax.put(sum);
LOG.incrementProcessed(cprog);
}
LOG.ensureCompleted(cprog);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
DoubleRelation scoreres = new MaterializedDoubleRelation("Feature bagging", "fb-outlier", scores, relation.getDBIDs());
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta in project elki by elki-project.
the class RescaleMetaOutlierAlgorithm method run.
@Override
public OutlierResult run(Database database) {
Result innerresult = algorithm.run(database);
OutlierResult or = getOutlierResult(database.getHierarchy(), innerresult);
final DoubleRelation scores = or.getScores();
if (scaling instanceof OutlierScalingFunction) {
((OutlierScalingFunction) scaling).prepare(or);
}
WritableDoubleDataStore scaledscores = DataStoreUtil.makeDoubleStorage(scores.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = scores.iterDBIDs(); iditer.valid(); iditer.advance()) {
double val = scaling.getScaled(scores.doubleValue(iditer));
scaledscores.putDouble(iditer, val);
minmax.put(val);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), scaling.getMin(), scaling.getMax());
DoubleRelation scoresult = new MaterializedDoubleRelation("Scaled Outlier", "scaled-outlier", scaledscores, scores.getDBIDs());
OutlierResult result = new OutlierResult(meta, scoresult);
result.addChildResult(innerresult);
return result;
}
use of de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta in project elki by elki-project.
the class TrimmedMeanApproach method run.
/**
* Run the algorithm.
*
* @param database Database
* @param nrel Neighborhood relation
* @param relation Data Relation (1 dimensional!)
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
assert (RelationUtil.dimensionality(relation) == 1) : "TrimmedMean can only process one-dimensional data sets.";
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
WritableDoubleDataStore errors = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Computing trimmed means", relation.size(), LOG) : null;
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DBIDs neighbors = npred.getNeighborDBIDs(iditer);
int num = 0;
double[] values = new double[neighbors.size()];
// calculate trimmedMean
for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
values[num] = relation.get(iter).doubleValue(0);
num++;
}
// calculate local trimmed Mean and error term
final double tm;
if (num > 0) {
int left = (int) Math.floor(p * (num - 1));
int right = (int) Math.floor((1 - p) * (num - 1));
Arrays.sort(values, 0, num);
Mean mean = new Mean();
for (int i = left; i <= right; i++) {
mean.put(values[i]);
}
tm = mean.getMean();
} else {
tm = relation.get(iditer).doubleValue(0);
}
// Error: deviation from trimmed mean
errors.putDouble(iditer, relation.get(iditer).doubleValue(0) - tm);
LOG.incrementProcessed(progress);
}
LOG.ensureCompleted(progress);
if (LOG.isVerbose()) {
LOG.verbose("Computing median error.");
}
double median_dev_from_median;
{
// calculate the median error
double[] ei = new double[relation.size()];
{
int i = 0;
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
ei[i] = errors.doubleValue(iditer);
i++;
}
}
double median_i = QuickSelect.median(ei);
// Update to deviation from median
for (int i = 0; i < ei.length; i++) {
ei[i] = Math.abs(ei[i] - median_i);
}
// Again, extract median
median_dev_from_median = QuickSelect.median(ei);
}
if (LOG.isVerbose()) {
LOG.verbose("Normalizing scores.");
}
// calculate score
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = Math.abs(errors.doubleValue(iditer)) * 0.6745 / median_dev_from_median;
scores.putDouble(iditer, score);
minmax.put(score);
}
//
DoubleRelation scoreResult = new MaterializedDoubleRelation("TrimmedMean", "Trimmed Mean Score", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
return or;
}
use of de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta in project elki by elki-project.
the class SLOM method run.
/**
* @param database Database to process
* @param spatial Spatial Relation to use.
* @param relation Relation to use.
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<N> spatial, Relation<O> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, spatial);
DistanceQuery<O> distFunc = getNonSpatialDistanceFunction().instantiate(relation);
WritableDoubleDataStore modifiedDistance = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// calculate D-Tilde
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double sum = 0;
double maxDist = 0;
int cnt = 0;
final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
if (DBIDUtil.equal(iditer, iter)) {
continue;
}
double dist = distFunc.distance(iditer, iter);
sum += dist;
cnt++;
maxDist = Math.max(maxDist, dist);
}
if (cnt > 1) {
modifiedDistance.putDouble(iditer, ((sum - maxDist) / (cnt - 1)));
} else {
// Use regular distance when the d-tilde trick is undefined.
// Note: this can be 0 when there were no neighbors.
modifiedDistance.putDouble(iditer, maxDist);
}
}
// Second step - compute actual SLOM values
DoubleMinMax slomminmax = new DoubleMinMax();
WritableDoubleDataStore sloms = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double sum = 0;
int cnt = 0;
final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
if (DBIDUtil.equal(iditer, iter)) {
continue;
}
sum += modifiedDistance.doubleValue(iter);
cnt++;
}
double slom;
if (cnt > 0) {
// With and without the object itself:
double avgPlus = (sum + modifiedDistance.doubleValue(iditer)) / (cnt + 1);
double avg = sum / cnt;
double beta = 0;
for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
final double dist = modifiedDistance.doubleValue(iter);
if (dist > avgPlus) {
beta += 1;
} else if (dist < avgPlus) {
beta -= 1;
}
}
// Include object itself
if (!neighbors.contains(iditer)) {
final double dist = modifiedDistance.doubleValue(iditer);
if (dist > avgPlus) {
beta += 1;
} else if (dist < avgPlus) {
beta -= 1;
}
}
beta = Math.abs(beta);
// note: cnt == size of N(x), not N+(x)
if (cnt > 1) {
beta = Math.max(beta, 1.0) / (cnt - 1);
} else {
// Workaround insufficiency in SLOM paper - div by zero
beta = 1.0;
}
beta = beta / (1 + avg);
slom = beta * modifiedDistance.doubleValue(iditer);
} else {
// No neighbors to compare to - no score.
slom = 0.0;
}
sloms.putDouble(iditer, slom);
slomminmax.put(slom);
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("SLOM", "slom-outlier", sloms, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(slomminmax.getMin(), slomminmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
return or;
}
use of de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta in project elki by elki-project.
the class TrivialAverageCoordinateOutlier method run.
/**
* Run the actual algorithm.
*
* @param relation Relation
* @return Result
*/
public OutlierResult run(Relation<? extends NumberVector> relation) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT);
DoubleMinMax minmax = new DoubleMinMax();
Mean m = new Mean();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
m.reset();
NumberVector nv = relation.get(iditer);
for (int i = 0; i < nv.getDimensionality(); i++) {
m.put(nv.doubleValue(i));
}
final double score = m.getMean();
scores.putDouble(iditer, score);
minmax.put(score);
}
DoubleRelation scoreres = new MaterializedDoubleRelation("Trivial mean score", "mean-outlier", scores, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
return new OutlierResult(meta, scoreres);
}
Aggregations