use of de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta in project elki by elki-project.
the class LOCI method run.
/**
* Run the algorithm
*
* @param database Database to process
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc);
DBIDs ids = relation.getDBIDs();
// LOCI preprocessing step
WritableDataStore<DoubleIntArrayList> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, DoubleIntArrayList.class);
precomputeInterestingRadii(ids, rangeQuery, interestingDistances);
// LOCI main step
FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null;
WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
// Shared instance, to save allocations.
MeanVariance mv_n_r_alpha = new MeanVariance();
for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
final DoubleIntArrayList cdist = interestingDistances.get(iditer);
final double maxdist = cdist.getDouble(cdist.size() - 1);
final int maxneig = cdist.getInt(cdist.size() - 1);
double maxmdefnorm = 0.0;
double maxnormr = 0;
if (maxneig >= nmin) {
// Compute the largest neighborhood we will need.
DoubleDBIDList maxneighbors = rangeQuery.getRangeForDBID(iditer, maxdist);
// For any critical distance, compute the normalized MDEF score.
for (int i = 0, size = cdist.size(); i < size; i++) {
// Only start when minimum size is fulfilled
if (cdist.getInt(i) < nmin) {
continue;
}
final double r = cdist.getDouble(i);
final double alpha_r = alpha * r;
// compute n(p_i, \alpha * r) from list (note: alpha_r is not cdist!)
final int n_alphar = cdist.getInt(cdist.find(alpha_r));
// compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
mv_n_r_alpha.reset();
for (DoubleDBIDListIter neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) {
// Stop at radius r
if (neighbor.doubleValue() > r) {
break;
}
DoubleIntArrayList cdist2 = interestingDistances.get(neighbor);
int rn_alphar = cdist2.getInt(cdist2.find(alpha_r));
mv_n_r_alpha.put(rn_alphar);
}
// We only use the average and standard deviation
final double nhat_r_alpha = mv_n_r_alpha.getMean();
final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev();
// Redundant divisions by nhat_r_alpha removed.
final double mdef = nhat_r_alpha - n_alphar;
final double sigmamdef = sigma_nhat_r_alpha;
final double mdefnorm = mdef / sigmamdef;
if (mdefnorm > maxmdefnorm) {
maxmdefnorm = mdefnorm;
maxnormr = r;
}
}
} else {
// FIXME: when nmin was not fulfilled - what is the proper value then?
maxmdefnorm = Double.POSITIVE_INFINITY;
maxnormr = maxdist;
}
mdef_norm.putDouble(iditer, maxmdefnorm);
mdef_radius.putDouble(iditer, maxnormr);
minmax.put(maxmdefnorm);
LOG.incrementProcessed(progressLOCI);
}
LOG.ensureCompleted(progressLOCI);
DoubleRelation scoreResult = new MaterializedDoubleRelation("LOCI normalized MDEF", "loci-mdef-outlier", mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
result.addChildResult(new MaterializedDoubleRelation("LOCI MDEF Radius", "loci-critical-radius", mdef_radius, relation.getDBIDs()));
return result;
}
use of de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta in project elki by elki-project.
the class ALOCI method run.
public OutlierResult run(Database database, Relation<O> relation) {
final int dim = RelationUtil.dimensionality(relation);
final Random random = rnd.getSingleThreadedRandom();
FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("Build aLOCI quadtress", g, LOG) : null;
// Compute extend of dataset.
double[] min, max;
{
double[][] hbbs = RelationUtil.computeMinMax(relation);
min = hbbs[0];
max = hbbs[1];
double maxd = 0;
for (int i = 0; i < dim; i++) {
maxd = MathUtil.max(maxd, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
for (int i = 0; i < dim; i++) {
double diff = (maxd - (max[i] - min[i])) * .5;
min[i] -= diff;
max[i] += diff;
}
}
List<ALOCIQuadTree> qts = new ArrayList<>(g);
double[] nshift = new double[dim];
ALOCIQuadTree qt = new ALOCIQuadTree(min, max, nshift, nmin, relation);
qts.add(qt);
LOG.incrementProcessed(progressPreproc);
/*
* create the remaining g-1 shifted QuadTrees. This not clearly described in
* the paper and therefore implemented in a way that achieves good results
* with the test data.
*/
for (int shift = 1; shift < g; shift++) {
double[] svec = new double[dim];
for (int i = 0; i < dim; i++) {
svec[i] = random.nextDouble() * (max[i] - min[i]);
}
qt = new ALOCIQuadTree(min, max, svec, nmin, relation);
qts.add(qt);
LOG.incrementProcessed(progressPreproc);
}
LOG.ensureCompleted(progressPreproc);
// aLOCI main loop: evaluate
FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), LOG) : null;
WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final O obj = relation.get(iditer);
double maxmdefnorm = 0;
// For each level
for (int l = 0; ; l++) {
// Find the closest C_i
Node ci = null;
for (int i = 0; i < g; i++) {
Node ci2 = qts.get(i).findClosestNode(obj, l);
if (ci2.getLevel() != l) {
continue;
}
// TODO: always use manhattan?
if (ci == null || distFunc.distance(ci, obj) > distFunc.distance(ci2, obj)) {
ci = ci2;
}
}
// LOG.debug("level:" + (ci != null ? ci.getLevel() : -1) +" l:"+l);
if (ci == null) {
// no matching tree for this level.
break;
}
// Find the closest C_j
Node cj = null;
for (int i = 0; i < g; i++) {
Node cj2 = qts.get(i).findClosestNode(ci, l - alpha);
// TODO: allow higher levels or not?
if (cj != null && cj2.getLevel() < cj.getLevel()) {
continue;
}
// TODO: always use manhattan?
if (cj == null || distFunc.distance(cj, ci) > distFunc.distance(cj2, ci)) {
cj = cj2;
}
}
// LOG.debug("level:" + (cj != null ? cj.getLevel() : -1) +" l:"+l);
if (cj == null) {
// no matching tree for this level.
continue;
}
double mdefnorm = calculate_MDEF_norm(cj, ci);
// LOG.warning("level:" + ci.getLevel() + "/" + cj.getLevel() +
// " mdef: " + mdefnorm);
maxmdefnorm = MathUtil.max(maxmdefnorm, mdefnorm);
}
// Store results
mdef_norm.putDouble(iditer, maxmdefnorm);
minmax.put(maxmdefnorm);
LOG.incrementProcessed(progressLOCI);
}
LOG.ensureCompleted(progressLOCI);
DoubleRelation scoreResult = new MaterializedDoubleRelation("aLOCI normalized MDEF", "aloci-mdef-outlier", mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
}
use of de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta in project elki by elki-project.
the class FlexibleLOF method doRunInTime.
/**
* Performs the Generalized LOF_SCORE algorithm on the given database and
* returns a {@link FlexibleLOF.LOFResult} encapsulating information that may
* be needed by an OnlineLOF algorithm.
*
* @param ids Object ids
* @param kNNRefer the kNN query w.r.t. reference neighborhood distance
* function
* @param kNNReach the kNN query w.r.t. reachability distance function
* @param stepprog Progress logger
* @return LOF result
*/
protected LOFResult<O> doRunInTime(DBIDs ids, KNNQuery<O> kNNRefer, KNNQuery<O> kNNReach, StepProgress stepprog) {
// Assert we got something
if (kNNRefer == null) {
throw new AbortException("No kNN queries supported by database for reference neighborhood distance function.");
}
if (kNNReach == null) {
throw new AbortException("No kNN queries supported by database for reachability distance function.");
}
// Compute LRDs
LOG.beginStep(stepprog, 2, "Computing LRDs.");
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
computeLRDs(kNNReach, ids, lrds);
// compute LOF_SCORE of each db object
LOG.beginStep(stepprog, 3, "Computing LOFs.");
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
computeLOFs(kNNRefer, ids, lrds, lofs, lofminmax);
LOG.setCompleted(stepprog);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Factor", "lof-outlier", lofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return new LOFResult<>(result, kNNRefer, kNNReach, lrds, lofs);
}
use of de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta in project elki by elki-project.
the class LDOF method run.
/**
* Run the algorithm
*
* @param database Database to process
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, k);
// track the maximum value for normalization
DoubleMinMax ldofminmax = new DoubleMinMax();
// compute the ldof values
WritableDoubleDataStore ldofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// compute LOF_SCORE of each db object
if (LOG.isVerbose()) {
LOG.verbose("Computing LDOFs");
}
FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF for objects", relation.size(), LOG) : null;
Mean dxp = new Mean(), Dxp = new Mean();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
KNNList neighbors = knnQuery.getKNNForDBID(iditer, k);
dxp.reset();
Dxp.reset();
DoubleDBIDListIter neighbor1 = neighbors.iter(), neighbor2 = neighbors.iter();
for (; neighbor1.valid(); neighbor1.advance()) {
// skip the point itself
if (DBIDUtil.equal(neighbor1, iditer)) {
continue;
}
dxp.put(neighbor1.doubleValue());
for (neighbor2.seek(neighbor1.getOffset() + 1); neighbor2.valid(); neighbor2.advance()) {
// skip the point itself
if (DBIDUtil.equal(neighbor2, iditer)) {
continue;
}
Dxp.put(distFunc.distance(neighbor1, neighbor2));
}
}
double ldof = dxp.getMean() / Dxp.getMean();
if (Double.isNaN(ldof) || Double.isInfinite(ldof)) {
ldof = 1.0;
}
ldofs.putDouble(iditer, ldof);
// update maximum
ldofminmax.put(ldof);
LOG.incrementProcessed(progressLDOFs);
}
LOG.ensureCompleted(progressLDOFs);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("LDOF Outlier Score", "ldof-outlier", ldofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ldofminmax.getMin(), ldofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, LDOF_BASELINE);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta in project elki by elki-project.
the class SimplifiedLOF method run.
/**
* Run the Simple LOF algorithm.
*
* @param database Database to query
* @param relation Data to process
* @return LOF outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("Simplified LOF", 3) : null;
DBIDs ids = relation.getDBIDs();
LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
// Compute LRDs
LOG.beginStep(stepprog, 2, "Computing densities.");
WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
computeSimplifiedLRDs(ids, knnq, dens);
// compute LOF_SCORE of each db object
LOG.beginStep(stepprog, 3, "Computing SLOFs.");
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
DoubleMinMax lofminmax = new DoubleMinMax();
computeSimplifiedLOFs(ids, knnq, dens, lofs, lofminmax);
LOG.setCompleted(stepprog);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Simplified Local Outlier Factor", "simplified-lof-outlier", lofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0., Double.POSITIVE_INFINITY, 1.);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
}
Aggregations