use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.
the class OPTICSOF method run.
/**
* Perform OPTICS-based outlier detection.
*
* @param database Database
* @param relation Relation
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<O> relation) {
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, minpts);
RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
DBIDs ids = relation.getDBIDs();
// FIXME: implicit preprocessor.
WritableDataStore<KNNList> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNList.class);
WritableDoubleDataStore coreDistance = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
WritableIntegerDataStore minPtsNeighborhoodSize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
KNNList minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
double d = minptsNeighbours.getKNNDistance();
nMinPts.put(iditer, minptsNeighbours);
coreDistance.putDouble(iditer, d);
minPtsNeighborhoodSize.put(iditer, rangeQuery.getRangeForDBID(iditer, d).size());
}
// Pass 2
WritableDataStore<List<Double>> reachDistance = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, List.class);
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
List<Double> core = new ArrayList<>();
double lrd = 0;
// TODO: optimize for double distances
for (DoubleDBIDListIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
double coreDist = coreDistance.doubleValue(neighbor);
double dist = distQuery.distance(iditer, neighbor);
double rd = MathUtil.max(coreDist, dist);
lrd = rd + lrd;
core.add(rd);
}
lrd = minPtsNeighborhoodSize.intValue(iditer) / lrd;
reachDistance.put(iditer, core);
lrds.putDouble(iditer, lrd);
}
// Pass 3
DoubleMinMax ofminmax = new DoubleMinMax();
WritableDoubleDataStore ofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double of = 0;
for (DBIDIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
double lrd = lrds.doubleValue(iditer);
double lrdN = lrds.doubleValue(neighbor);
of = of + lrdN / lrd;
}
of = of / minPtsNeighborhoodSize.intValue(iditer);
ofs.putDouble(iditer, of);
// update minimum and maximum
ofminmax.put(of);
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("OPTICS Outlier Scores", "optics-outlier", ofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ofminmax.getMin(), ofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.
the class LinearScanRKNNQuery method getRKNNForDBID.
@Override
public DoubleDBIDList getRKNNForDBID(DBIDRef id, int k) {
ModifiableDoubleDBIDList rNNList = DBIDUtil.newDistanceDBIDList();
ArrayDBIDs allIDs = DBIDUtil.ensureArray(relation.getDBIDs());
List<? extends KNNList> kNNList = knnQuery.getKNNForBulkDBIDs(allIDs, k);
int i = 0;
for (DBIDIter iter = allIDs.iter(); iter.valid(); iter.advance()) {
KNNList knn = kNNList.get(i);
for (DoubleDBIDListIter n = knn.iter(); n.valid(); n.advance()) {
if (DBIDUtil.equal(n, id)) {
rNNList.add(n.doubleValue(), iter);
}
}
i++;
}
rNNList.sort();
return rNNList;
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.
the class KDEOS method estimateDensities.
/**
* Perform the kernel density estimation step.
*
* @param rel Relation to query
* @param knnq kNN query
* @param ids IDs to process
* @param densities Density storage
*/
protected void estimateDensities(Relation<O> rel, KNNQuery<O> knnq, final DBIDs ids, WritableDataStore<double[]> densities) {
final int dim = dimensionality(rel);
final int knum = kmax + 1 - kmin;
// Initialize storage:
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
densities.put(iter, new double[knum]);
}
// Distribute densities:
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing densities", ids.size(), LOG) : null;
double iminbw = (minBandwidth > 0.) ? 1. / (minBandwidth * scale) : Double.POSITIVE_INFINITY;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNList neighbors = knnq.getKNNForDBID(iter, kmax + 1);
int k = 1, idx = 0;
double sum = 0.;
for (DoubleDBIDListIter kneighbor = neighbors.iter(); k <= kmax && kneighbor.valid(); kneighbor.advance(), k++) {
sum += kneighbor.doubleValue();
if (k < kmin) {
continue;
}
final double ibw = Math.min(k / (sum * scale), iminbw);
final double sca = MathUtil.powi(ibw, dim);
for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
final double dens;
if (sca < Double.POSITIVE_INFINITY) {
// NaNs with duplicate points!
dens = sca * kernel.density(neighbor.doubleValue() * ibw);
} else {
dens = neighbor.doubleValue() == 0. ? 1. : 0.;
}
densities.get(neighbor)[idx] += dens;
if (dens < CUTOFF) {
break;
}
}
// Only if k >= kmin
++idx;
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.
the class KDEOS method computeOutlierScores.
/**
* Compute the final KDEOS scores.
*
* @param knnq kNN query
* @param ids IDs to process
* @param densities Density estimates
* @param kdeos Score outputs
* @param minmax Minimum and maximum scores
*/
protected void computeOutlierScores(KNNQuery<O> knnq, final DBIDs ids, WritableDataStore<double[]> densities, WritableDoubleDataStore kdeos, DoubleMinMax minmax) {
final int knum = kmax + 1 - kmin;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing KDEOS scores", ids.size(), LOG) : null;
double[][] scratch = new double[knum][kmax + 5];
MeanVariance mv = new MeanVariance();
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double[] dens = densities.get(iter);
KNNList neighbors = knnq.getKNNForDBID(iter, kmax + 1);
if (scratch[0].length < neighbors.size()) {
// Resize scratch. Add some extra margin again.
scratch = new double[knum][neighbors.size() + 5];
}
{
// Store density matrix of neighbors
int i = 0;
for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance(), i++) {
double[] ndens = densities.get(neighbor);
for (int k = 0; k < knum; k++) {
scratch[k][i] = ndens[k];
}
}
assert (i == neighbors.size());
}
// Compute means and stddevs for each k
double score = 0.;
for (int i = 0; i < knum; i++) {
mv.reset();
for (int j = 0; j < neighbors.size(); j++) {
mv.put(scratch[i][j]);
}
final double mean = mv.getMean(), stddev = mv.getSampleStddev();
if (stddev > 0.) {
score += (mean - dens[i]) / stddev;
}
}
// average
score /= knum;
score = NormalDistribution.standardNormalCDF(score);
minmax.put(score);
kdeos.put(iter, score);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter in project elki by elki-project.
the class COF method computeAverageChainingDistances.
/**
* Computes the average chaining distance, the average length of a path
* through the given set of points to each target. The authors of COF decided
* to approximate this value using a weighted mean that assumes every object
* is reached from the previous point (but actually every point could be best
* reachable from the first, in which case this does not make much sense.)
*
* TODO: can we accelerate this by using the kNN of the neighbors?
*
* @param knnq KNN query
* @param dq Distance query
* @param ids IDs to process
* @param acds Storage for average chaining distances
*/
protected void computeAverageChainingDistances(KNNQuery<O> knnq, DistanceQuery<O> dq, DBIDs ids, WritableDoubleDataStore acds) {
FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("Computing average chaining distances", ids.size(), LOG) : null;
// We do <i>not</i> bother to materialize the chaining order.
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
final KNNList neighbors = knnq.getKNNForDBID(iter, k);
final int r = neighbors.size();
DoubleDBIDListIter it1 = neighbors.iter(), it2 = neighbors.iter();
// Store the current lowest reachability.
final double[] mindists = new double[r];
for (int i = 0; it1.valid(); it1.advance(), ++i) {
mindists[i] = DBIDUtil.equal(it1, iter) ? Double.NaN : it1.doubleValue();
}
double acsum = 0.;
for (int j = ((r < k) ? r : k) - 1; j > 0; --j) {
// Find the minimum:
int minpos = -1;
double mindist = Double.NaN;
for (int i = 0; i < mindists.length; ++i) {
double curdist = mindists[i];
// Both values could be NaN, deliberately.
if (curdist == curdist && !(curdist > mindist)) {
minpos = i;
mindist = curdist;
}
}
// Weighted sum, decreasing weights
acsum += mindist * j;
mindists[minpos] = Double.NaN;
it1.seek(minpos);
// Update distances
it2.seek(0);
for (int i = 0; it2.valid(); it2.advance(), ++i) {
final double curdist = mindists[i];
if (curdist != curdist) {
// NaN = processed!
continue;
}
double newdist = dq.distance(it1, it2);
if (newdist < curdist) {
mindists[i] = newdist;
}
}
}
acds.putDouble(iter, acsum / (r * 0.5 * (r - 1.)));
LOG.incrementProcessed(lrdsProgress);
}
LOG.ensureCompleted(lrdsProgress);
}
Aggregations