use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class LOF method computeLOFScores.
/**
* Compute local outlier factors.
*
* @param knnq KNN query
* @param ids IDs to process
* @param lrds Local reachability distances
* @param lofs Local outlier factor storage
* @param lofminmax Score minimum/maximum tracker
*/
private void computeLOFScores(KNNQuery<O> knnq, DBIDs ids, DoubleDataStore lrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) {
FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Local Outlier Factor (LOF) scores", ids.size(), LOG) : null;
double lof;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
lof = computeLOFScore(knnq, iter, lrds);
lofs.putDouble(iter, lof);
// update minimum and maximum
lofminmax.put(lof);
LOG.incrementProcessed(progressLOFs);
}
LOG.ensureCompleted(progressLOFs);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class COF method computeAverageChainingDistances.
/**
* Computes the average chaining distance, the average length of a path
* through the given set of points to each target. The authors of COF decided
* to approximate this value using a weighted mean that assumes every object
* is reached from the previous point (but actually every point could be best
* reachable from the first, in which case this does not make much sense.)
*
* TODO: can we accelerate this by using the kNN of the neighbors?
*
* @param knnq KNN query
* @param dq Distance query
* @param ids IDs to process
* @param acds Storage for average chaining distances
*/
protected void computeAverageChainingDistances(KNNQuery<O> knnq, DistanceQuery<O> dq, DBIDs ids, WritableDoubleDataStore acds) {
FiniteProgress lrdsProgress = LOG.isVerbose() ? new FiniteProgress("Computing average chaining distances", ids.size(), LOG) : null;
// We do <i>not</i> bother to materialize the chaining order.
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
final KNNList neighbors = knnq.getKNNForDBID(iter, k);
final int r = neighbors.size();
DoubleDBIDListIter it1 = neighbors.iter(), it2 = neighbors.iter();
// Store the current lowest reachability.
final double[] mindists = new double[r];
for (int i = 0; it1.valid(); it1.advance(), ++i) {
mindists[i] = DBIDUtil.equal(it1, iter) ? Double.NaN : it1.doubleValue();
}
double acsum = 0.;
for (int j = ((r < k) ? r : k) - 1; j > 0; --j) {
// Find the minimum:
int minpos = -1;
double mindist = Double.NaN;
for (int i = 0; i < mindists.length; ++i) {
double curdist = mindists[i];
// Both values could be NaN, deliberately.
if (curdist == curdist && !(curdist > mindist)) {
minpos = i;
mindist = curdist;
}
}
// Weighted sum, decreasing weights
acsum += mindist * j;
mindists[minpos] = Double.NaN;
it1.seek(minpos);
// Update distances
it2.seek(0);
for (int i = 0; it2.valid(); it2.advance(), ++i) {
final double curdist = mindists[i];
if (curdist != curdist) {
// NaN = processed!
continue;
}
double newdist = dq.distance(it1, it2);
if (newdist < curdist) {
mindists[i] = newdist;
}
}
}
acds.putDouble(iter, acsum / (r * 0.5 * (r - 1.)));
LOG.incrementProcessed(lrdsProgress);
}
LOG.ensureCompleted(lrdsProgress);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class INFLO method computeINFLO.
/**
* Compute the final INFLO scores.
*
* @param relation Data relation
* @param pruned Pruned objects
* @param knn kNN query
* @param rNNminuskNNs reverse kNN storage
* @param inflos INFLO score storage
* @param inflominmax Output of minimum and maximum
*/
protected void computeINFLO(Relation<O> relation, ModifiableDBIDs pruned, KNNQuery<O> knnq, WritableDataStore<ModifiableDBIDs> rNNminuskNNs, WritableDoubleDataStore inflos, DoubleMinMax inflominmax) {
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Computing INFLOs", relation.size(), LOG) : null;
HashSetModifiableDBIDs set = DBIDUtil.newHashSet();
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
if (pruned.contains(iter)) {
inflos.putDouble(iter, 1.);
inflominmax.put(1.);
LOG.incrementProcessed(prog);
continue;
}
final KNNList knn = knnq.getKNNForDBID(iter, kplus1);
if (knn.getKNNDistance() == 0.) {
inflos.putDouble(iter, 1.);
inflominmax.put(1.);
LOG.incrementProcessed(prog);
continue;
}
set.clear();
set.addDBIDs(knn);
set.addDBIDs(rNNminuskNNs.get(iter));
// Compute mean density of NN \cup RNN
double sum = 0.;
int c = 0;
for (DBIDIter niter = set.iter(); niter.valid(); niter.advance()) {
if (DBIDUtil.equal(iter, niter)) {
continue;
}
final double kdist = knnq.getKNNForDBID(niter, kplus1).getKNNDistance();
if (kdist <= 0) {
sum = Double.POSITIVE_INFINITY;
c++;
break;
}
sum += 1. / kdist;
c++;
}
sum *= knn.getKNNDistance();
final double inflo = sum == 0 ? 1. : sum / c;
inflos.putDouble(iter, inflo);
inflominmax.put(inflo);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class LOCI method precomputeInterestingRadii.
/**
* Preprocessing step: determine the radii of interest for each point.
*
* @param ids IDs to process
* @param rangeQuery Range query
* @param interestingDistances Distances of interest
*/
protected void precomputeInterestingRadii(DBIDs ids, RangeQuery<O> rangeQuery, WritableDataStore<DoubleIntArrayList> interestingDistances) {
FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", ids.size(), LOG) : null;
for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(iditer, rmax);
// build list of critical distances
DoubleIntArrayList cdist = new DoubleIntArrayList(neighbors.size() << 1);
{
int i = 0;
DoubleDBIDListIter ni = neighbors.iter();
while (ni.valid()) {
final double curdist = ni.doubleValue();
++i;
ni.advance();
// Skip, if tied to the next object:
if (ni.valid() && curdist == ni.doubleValue()) {
continue;
}
cdist.append(curdist, i);
// Scale radius, and reinsert
if (alpha != 1.) {
final double ri = curdist / alpha;
if (ri <= rmax) {
cdist.append(ri, Integer.MIN_VALUE);
}
}
}
}
cdist.sort();
// fill the gaps to have fast lookups of number of neighbors at a given
// distance.
int lastk = 0;
for (int i = 0, size = cdist.size(); i < size; i++) {
final int k = cdist.getInt(i);
if (k == Integer.MIN_VALUE) {
cdist.setValue(i, lastk);
} else {
lastk = k;
}
}
// TODO: shrink the list, removing duplicate radii?
interestingDistances.put(iditer, cdist);
LOG.incrementProcessed(progressPreproc);
}
LOG.ensureCompleted(progressPreproc);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class LoOP method computePLOFs.
/**
* Compute the LOF values, using the pdist distances.
*
* @param relation Data relation
* @param knn kNN query
* @param pdists Precomputed distances
* @param plofs Storage for PLOFs.
* @return Normalization factor.
*/
protected double computePLOFs(Relation<O> relation, KNNQuery<O> knn, WritableDoubleDataStore pdists, WritableDoubleDataStore plofs) {
FiniteProgress progressPLOFs = LOG.isVerbose() ? new FiniteProgress("PLOFs for objects", relation.size(), LOG) : null;
double nplof = 0.;
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// + query
final KNNList neighbors = knn.getKNNForDBID(iditer, kcomp + 1);
// point
// use first kref neighbors as comparison set.
int ks = 0;
double sum = 0.;
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid() && ks < kcomp; neighbor.advance()) {
if (DBIDUtil.equal(neighbor, iditer)) {
continue;
}
sum += pdists.doubleValue(neighbor);
ks++;
}
double plof = MathUtil.max(pdists.doubleValue(iditer) * ks / sum, 1.0);
if (Double.isNaN(plof) || Double.isInfinite(plof)) {
plof = 1.0;
}
plofs.putDouble(iditer, plof);
nplof += (plof - 1.0) * (plof - 1.0);
LOG.incrementProcessed(progressPLOFs);
}
LOG.ensureCompleted(progressPLOFs);
nplof = lambda * FastMath.sqrt(nplof / relation.size());
if (LOG.isDebuggingFine()) {
LOG.debugFine("nplof normalization factor is " + nplof);
}
return nplof > 0. ? nplof : 1.;
}
Aggregations