use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class PrecomputedDistanceMatrix method initialize.
@Override
public void initialize() {
size = ids.size();
if (size > 65536) {
throw new AbortException("Distance matrixes currently have a limit of 65536 objects (~16 GB). After this, the array size exceeds the Java integer range, and a different data structure needs to be used.");
}
distanceQuery = distanceFunction.instantiate(relation);
final int msize = triangleSize(size);
matrix = new double[msize];
DBIDArrayIter ix = ids.iter(), iy = ids.iter();
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Precomputing distance matrix", msize, LOG) : null;
int pos = 0;
for (ix.seek(0); ix.valid(); ix.advance()) {
// y < x -- must match {@link #getOffset}!
for (iy.seek(0); iy.getOffset() < ix.getOffset(); iy.advance()) {
matrix[pos] = distanceQuery.distance(ix, iy);
pos++;
}
if (prog != null) {
prog.setProcessed(prog.getProcessed() + ix.getOffset(), LOG);
}
}
LOG.ensureCompleted(prog);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class COF method computeCOFScores.
/**
* Compute Connectivity outlier factors.
*
* @param knnq KNN query
* @param ids IDs to process
* @param acds Average chaining distances
* @param cofs Connectivity outlier factor storage
* @param cofminmax Score minimum/maximum tracker
*/
private void computeCOFScores(KNNQuery<O> knnq, DBIDs ids, DoubleDataStore acds, WritableDoubleDataStore cofs, DoubleMinMax cofminmax) {
FiniteProgress progressCOFs = LOG.isVerbose() ? new FiniteProgress("COF for objects", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
final KNNList neighbors = knnq.getKNNForDBID(iter, k);
// Aggregate the average chaining distances of all neighbors:
double sum = 0.;
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
if (DBIDUtil.equal(neighbor, iter)) {
continue;
}
sum += acds.doubleValue(neighbor);
}
final double cof = (sum > 0.) ? (acds.doubleValue(iter) * k / sum) : (acds.doubleValue(iter) > 0. ? Double.POSITIVE_INFINITY : 1.);
cofs.putDouble(iter, cof);
// update minimum and maximum
cofminmax.put(cof);
LOG.incrementProcessed(progressCOFs);
}
LOG.ensureCompleted(progressCOFs);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class INFLO method computeNeighborhoods.
/**
* Compute the reverse kNN minus the kNN.
*
* This is based on algorithm 2 (two-way search) from the INFLO paper, but
* unfortunately this algorithm does not compute the RkNN correctly, but
* rather \( RkNN \cap kNN \), which is quite useless given that we will use
* the union of that with kNN later on. Therefore, we decided to rather follow
* what appears to be the idea of the method, not the literal pseudocode
* included.
*
* @param relation Data relation
* @param knnQuery kNN query function
* @param pruned Pruned objects: with too many neighbors
* @param rNNminuskNNs reverse kNN storage
*/
private void computeNeighborhoods(Relation<O> relation, KNNQuery<O> knnQuery, ModifiableDBIDs pruned, WritableDataStore<ModifiableDBIDs> rNNminuskNNs) {
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Finding RkNN", relation.size(), LOG) : null;
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
DBIDs knn = knnQuery.getKNNForDBID(iter, kplus1);
// The point itself.
int count = 1;
for (DBIDIter niter = knn.iter(); niter.valid(); niter.advance()) {
// Ignore the query point itself.
if (DBIDUtil.equal(iter, niter)) {
continue;
}
// memory in return. Even just populating this will be similar.
if (knnQuery.getKNNForDBID(niter, kplus1).contains(iter)) {
count++;
} else {
// In contrast to INFLO pseudocode, we only update if it is not found,
// i.e., if it is in RkNN \setminus kNN, to save memory.
rNNminuskNNs.get(niter).add(iter);
}
}
// INFLO pruning rule
if (count >= knn.size() * m) {
pruned.add(iter);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class LDF method run.
/**
* Run the naive kernel density LOF algorithm.
*
* @param database Database to query
* @param relation Data to process
* @return LOF outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("LDF", 3) : null;
final int dim = RelationUtil.dimensionality(relation);
DBIDs ids = relation.getDBIDs();
LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
// Compute LDEs
LOG.beginStep(stepprog, 2, "Computing LDEs.");
WritableDoubleDataStore ldes = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final KNNList neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
// Fast version for double distances
for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(neighbor, it)) {
continue;
}
final double nkdist = knnq.getKNNForDBID(neighbor, k).getKNNDistance();
if (!(nkdist > 0.) || nkdist == Double.POSITIVE_INFINITY) {
sum = Double.POSITIVE_INFINITY;
count++;
break;
}
final double v = MathUtil.max(nkdist, neighbor.doubleValue()) / (h * nkdist);
sum += kernel.density(v) / MathUtil.powi(h * nkdist, dim);
count++;
}
ldes.putDouble(it, sum / count);
LOG.incrementProcessed(densProgress);
}
LOG.ensureCompleted(densProgress);
// Compute local density factors.
LOG.beginStep(stepprog, 3, "Computing LDFs.");
WritableDoubleDataStore ldfs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Local Density Factors", ids.size(), LOG) : null;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final double lrdp = ldes.doubleValue(it);
final KNNList neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
if (DBIDUtil.equal(neighbor, it)) {
continue;
}
sum += ldes.doubleValue(neighbor);
count++;
}
sum /= count;
final double div = lrdp + c * sum;
double ldf = div == Double.POSITIVE_INFINITY ? (sum < Double.POSITIVE_INFINITY ? 0. : 1) : (div > 0) ? sum / div : 0;
ldfs.putDouble(it, ldf);
// update minimum and maximum
lofminmax.put(ldf);
LOG.incrementProcessed(progressLOFs);
}
LOG.ensureCompleted(progressLOFs);
LOG.setCompleted(stepprog);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Density Factor", "ldf-outlier", ldfs, ids);
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, 1. / c, 1 / (1 + c));
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class LOCI method run.
/**
* Run the algorithm
*
* @param database Database to process
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc);
DBIDs ids = relation.getDBIDs();
// LOCI preprocessing step
WritableDataStore<DoubleIntArrayList> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, DoubleIntArrayList.class);
precomputeInterestingRadii(ids, rangeQuery, interestingDistances);
// LOCI main step
FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null;
WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
// Shared instance, to save allocations.
MeanVariance mv_n_r_alpha = new MeanVariance();
for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
final DoubleIntArrayList cdist = interestingDistances.get(iditer);
final double maxdist = cdist.getDouble(cdist.size() - 1);
final int maxneig = cdist.getInt(cdist.size() - 1);
double maxmdefnorm = 0.0;
double maxnormr = 0;
if (maxneig >= nmin) {
// Compute the largest neighborhood we will need.
DoubleDBIDList maxneighbors = rangeQuery.getRangeForDBID(iditer, maxdist);
// For any critical distance, compute the normalized MDEF score.
for (int i = 0, size = cdist.size(); i < size; i++) {
// Only start when minimum size is fulfilled
if (cdist.getInt(i) < nmin) {
continue;
}
final double r = cdist.getDouble(i);
final double alpha_r = alpha * r;
// compute n(p_i, \alpha * r) from list (note: alpha_r is not cdist!)
final int n_alphar = cdist.getInt(cdist.find(alpha_r));
// compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
mv_n_r_alpha.reset();
for (DoubleDBIDListIter neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) {
// Stop at radius r
if (neighbor.doubleValue() > r) {
break;
}
DoubleIntArrayList cdist2 = interestingDistances.get(neighbor);
int rn_alphar = cdist2.getInt(cdist2.find(alpha_r));
mv_n_r_alpha.put(rn_alphar);
}
// We only use the average and standard deviation
final double nhat_r_alpha = mv_n_r_alpha.getMean();
final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev();
// Redundant divisions by nhat_r_alpha removed.
final double mdef = nhat_r_alpha - n_alphar;
final double sigmamdef = sigma_nhat_r_alpha;
final double mdefnorm = mdef / sigmamdef;
if (mdefnorm > maxmdefnorm) {
maxmdefnorm = mdefnorm;
maxnormr = r;
}
}
} else {
// FIXME: when nmin was not fulfilled - what is the proper value then?
maxmdefnorm = Double.POSITIVE_INFINITY;
maxnormr = maxdist;
}
mdef_norm.putDouble(iditer, maxmdefnorm);
mdef_radius.putDouble(iditer, maxnormr);
minmax.put(maxmdefnorm);
LOG.incrementProcessed(progressLOCI);
}
LOG.ensureCompleted(progressLOCI);
DoubleRelation scoreResult = new MaterializedDoubleRelation("LOCI normalized MDEF", "loci-mdef-outlier", mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
result.addChildResult(new MaterializedDoubleRelation("LOCI MDEF Radius", "loci-critical-radius", mdef_radius, relation.getDBIDs()));
return result;
}
Aggregations