use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.
the class LOCI method run.
/**
* Run the algorithm
*
* @param database Database to process
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc);
DBIDs ids = relation.getDBIDs();
// LOCI preprocessing step
WritableDataStore<DoubleIntArrayList> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, DoubleIntArrayList.class);
precomputeInterestingRadii(ids, rangeQuery, interestingDistances);
// LOCI main step
FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null;
WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
// Shared instance, to save allocations.
MeanVariance mv_n_r_alpha = new MeanVariance();
for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
final DoubleIntArrayList cdist = interestingDistances.get(iditer);
final double maxdist = cdist.getDouble(cdist.size() - 1);
final int maxneig = cdist.getInt(cdist.size() - 1);
double maxmdefnorm = 0.0;
double maxnormr = 0;
if (maxneig >= nmin) {
// Compute the largest neighborhood we will need.
DoubleDBIDList maxneighbors = rangeQuery.getRangeForDBID(iditer, maxdist);
// For any critical distance, compute the normalized MDEF score.
for (int i = 0, size = cdist.size(); i < size; i++) {
// Only start when minimum size is fulfilled
if (cdist.getInt(i) < nmin) {
continue;
}
final double r = cdist.getDouble(i);
final double alpha_r = alpha * r;
// compute n(p_i, \alpha * r) from list (note: alpha_r is not cdist!)
final int n_alphar = cdist.getInt(cdist.find(alpha_r));
// compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
mv_n_r_alpha.reset();
for (DoubleDBIDListIter neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) {
// Stop at radius r
if (neighbor.doubleValue() > r) {
break;
}
DoubleIntArrayList cdist2 = interestingDistances.get(neighbor);
int rn_alphar = cdist2.getInt(cdist2.find(alpha_r));
mv_n_r_alpha.put(rn_alphar);
}
// We only use the average and standard deviation
final double nhat_r_alpha = mv_n_r_alpha.getMean();
final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev();
// Redundant divisions by nhat_r_alpha removed.
final double mdef = nhat_r_alpha - n_alphar;
final double sigmamdef = sigma_nhat_r_alpha;
final double mdefnorm = mdef / sigmamdef;
if (mdefnorm > maxmdefnorm) {
maxmdefnorm = mdefnorm;
maxnormr = r;
}
}
} else {
// FIXME: when nmin was not fulfilled - what is the proper value then?
maxmdefnorm = Double.POSITIVE_INFINITY;
maxnormr = maxdist;
}
mdef_norm.putDouble(iditer, maxmdefnorm);
mdef_radius.putDouble(iditer, maxnormr);
minmax.put(maxmdefnorm);
LOG.incrementProcessed(progressLOCI);
}
LOG.ensureCompleted(progressLOCI);
DoubleRelation scoreResult = new MaterializedDoubleRelation("LOCI normalized MDEF", "loci-mdef-outlier", mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
result.addChildResult(new MaterializedDoubleRelation("LOCI MDEF Radius", "loci-critical-radius", mdef_radius, relation.getDBIDs()));
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.
the class DBOutlierDetection method computeOutlierScores.
@Override
protected DoubleDataStore computeOutlierScores(Database database, Relation<O> relation, double d) {
DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
// Prefer kNN query if available, as this will usually stop earlier.
KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY);
RangeQuery<O> rangeQuery = knnQuery == null ? database.getRangeQuery(distFunc, DatabaseQuery.HINT_OPTIMIZED_ONLY, d) : null;
// maximum number of objects in the D-neighborhood of an outlier
int m = (int) Math.floor((distFunc.getRelation().size()) * (1 - p));
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(distFunc.getRelation().getDBIDs(), DataStoreFactory.HINT_STATIC);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("DBOutlier detection", distFunc.getRelation().size(), LOG) : null;
// is more than d -> object is outlier
if (knnQuery != null) {
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
KNNList knns = knnQuery.getKNNForDBID(iditer, m);
scores.putDouble(iditer, (knns.getKNNDistance() > d) ? 1. : 0.);
LOG.incrementProcessed(prog);
}
} else if (rangeQuery != null) {
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(iditer, d);
scores.putDouble(iditer, (neighbors.size() < m) ? 1. : 0.);
LOG.incrementProcessed(prog);
}
} else {
// Linear scan neighbors for each object, but stop early.
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
int count = 0;
for (DBIDIter iterator = relation.iterDBIDs(); iterator.valid(); iterator.advance()) {
double currentDistance = distFunc.distance(iditer, iterator);
if (currentDistance <= d) {
if (++count >= m) {
break;
}
}
}
scores.putDouble(iditer, (count < m) ? 1.0 : 0);
LOG.incrementProcessed(prog);
}
}
LOG.ensureCompleted(prog);
return scores;
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.
the class ReferenceBasedOutlierDetection method run.
/**
* Run the algorithm on the given relation.
*
* @param database Database
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<? extends NumberVector> relation) {
@SuppressWarnings("unchecked") PrimitiveDistanceQuery<? super NumberVector> distq = (PrimitiveDistanceQuery<? super NumberVector>) database.getDistanceQuery(relation, distanceFunction);
Collection<? extends NumberVector> refPoints = refp.getReferencePoints(relation);
if (refPoints.isEmpty()) {
throw new AbortException("Cannot compute ROS without reference points!");
}
DBIDs ids = relation.getDBIDs();
if (k >= ids.size()) {
throw new AbortException("k must not be chosen larger than the database size!");
}
// storage of distance/score values.
WritableDoubleDataStore rbod_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_HOT, Double.NaN);
// Compute density estimation:
for (NumberVector refPoint : refPoints) {
DoubleDBIDList referenceDists = computeDistanceVector(refPoint, relation, distq);
updateDensities(rbod_score, referenceDists);
}
// compute maximum density
DoubleMinMax mm = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
mm.put(rbod_score.doubleValue(iditer));
}
// compute ROS
double scale = mm.getMax() > 0. ? 1. / mm.getMax() : 1.;
// Reuse
mm.reset();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = 1 - (rbod_score.doubleValue(iditer) * scale);
mm.put(score);
rbod_score.putDouble(iditer, score);
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("Reference-points Outlier Scores", "reference-outlier", rbod_score, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., 1., 0.);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
// adds reference points to the result. header information for the
// visualizer to find the reference points in the result
result.addChildResult(new ReferencePointsResult<>("Reference points", "reference-points", refPoints));
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.
the class MaterializedKNNAndRKNNPreprocessorTest method testRKNNQueries.
private void testRKNNQueries(Relation<DoubleVector> rep, RKNNQuery<DoubleVector> lin_rknn_query, RKNNQuery<DoubleVector> preproc_rknn_query, int k) {
ArrayDBIDs sample = DBIDUtil.ensureArray(rep.getDBIDs());
List<? extends DoubleDBIDList> lin_rknn_ids = lin_rknn_query.getRKNNForBulkDBIDs(sample, k);
List<? extends DoubleDBIDList> preproc_rknn_ids = preproc_rknn_query.getRKNNForBulkDBIDs(sample, k);
for (int i = 0; i < rep.size(); i++) {
DoubleDBIDList lin_rknn = lin_rknn_ids.get(i);
DoubleDBIDList pre_rknn = preproc_rknn_ids.get(i);
DoubleDBIDListIter lin = lin_rknn.iter(), pre = pre_rknn.iter();
for (; lin.valid() && pre.valid(); lin.advance(), pre.advance(), i++) {
assertTrue(DBIDUtil.equal(lin, pre) || lin.doubleValue() == pre.doubleValue());
}
assertEquals("rkNN sizes do not agree for k=" + k, lin_rknn.size(), pre_rknn.size());
for (int j = 0; j < lin_rknn.size(); j++) {
assertTrue("rkNNs of linear scan and preprocessor do not match!", DBIDUtil.equal(lin_rknn.get(j), pre_rknn.get(j)));
assertEquals("rkNNs of linear scan and preprocessor do not match!", lin_rknn.get(j).doubleValue(), pre_rknn.get(j).doubleValue(), 0.);
}
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.
the class AbstractIndexStructureTest method testExactCosine.
/**
* Actual test routine, for cosine distance
*
* @param inputparams
*/
protected void testExactCosine(ListParameterization inputparams, Class<?> expectKNNQuery, Class<?> expectRangeQuery) {
// Use a fixed DBID - historically, we used 1 indexed - to reduce random
// variation in results due to different hash codes everywhere.
inputparams.addParameter(AbstractDatabaseConnection.Parameterizer.FILTERS_ID, new FixedDBIDsFilter(1));
Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds, inputparams);
Relation<DoubleVector> rep = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
DistanceQuery<DoubleVector> dist = db.getDistanceQuery(rep, CosineDistanceFunction.STATIC);
if (expectKNNQuery != null) {
// get the 10 next neighbors
DoubleVector dv = DoubleVector.wrap(querypoint);
KNNQuery<DoubleVector> knnq = db.getKNNQuery(dist, k);
assertTrue("Returned knn query is not of expected class: expected " + expectKNNQuery + " got " + knnq.getClass(), expectKNNQuery.isAssignableFrom(knnq.getClass()));
KNNList ids = knnq.getKNNForObject(dv, k);
assertEquals("Result size does not match expectation!", cosshouldd.length, ids.size());
// verify that the neighbors match.
int i = 0;
for (DoubleDBIDListIter res = ids.iter(); res.valid(); res.advance(), i++) {
// Verify distance
assertEquals("Expected distance doesn't match.", cosshouldd[i], res.doubleValue(), 1e-15);
// verify vector
DoubleVector c = rep.get(res);
DoubleVector c2 = DoubleVector.wrap(cosshouldc[i]);
assertEquals("Expected vector doesn't match: " + c.toString(), 0.0, dist.distance(c, c2), 1e-15);
}
}
if (expectRangeQuery != null) {
// Do a range query
DoubleVector dv = DoubleVector.wrap(querypoint);
RangeQuery<DoubleVector> rangeq = db.getRangeQuery(dist, coseps);
assertTrue("Returned range query is not of expected class: expected " + expectRangeQuery + " got " + rangeq.getClass(), expectRangeQuery.isAssignableFrom(rangeq.getClass()));
DoubleDBIDList ids = rangeq.getRangeForObject(dv, coseps);
assertEquals("Result size does not match expectation!", cosshouldd.length, ids.size());
// verify that the neighbors match.
int i = 0;
for (DoubleDBIDListIter res = ids.iter(); res.valid(); res.advance(), i++) {
// Verify distance
assertEquals("Expected distance doesn't match.", cosshouldd[i], res.doubleValue(), 1e-15);
// verify vector
DoubleVector c = rep.get(res);
DoubleVector c2 = DoubleVector.wrap(cosshouldc[i]);
assertEquals("Expected vector doesn't match: " + c.toString(), 0.0, dist.distance(c, c2), 1e-15);
}
}
}
Aggregations