use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class SimpleOutlierEnsemble method run.
@Override
public OutlierResult run(Database database) throws IllegalStateException {
int num = algorithms.size();
// Run inner outlier algorithms
ModifiableDBIDs ids = DBIDUtil.newHashSet();
ArrayList<OutlierResult> results = new ArrayList<>(num);
{
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Inner outlier algorithms", num, LOG) : null;
for (Algorithm alg : algorithms) {
Result res = alg.run(database);
List<OutlierResult> ors = OutlierResult.getOutlierResults(res);
for (OutlierResult or : ors) {
results.add(or);
ids.addDBIDs(or.getScores().getDBIDs());
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
}
// Combine
WritableDoubleDataStore sumscore = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
{
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", ids.size(), LOG) : null;
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
double[] scores = new double[num];
int i = 0;
for (OutlierResult r : results) {
double score = r.getScores().doubleValue(id);
if (!Double.isNaN(score)) {
scores[i] = score;
i++;
} else {
LOG.warning("DBID " + id + " was not given a score by result " + r);
}
}
if (i > 0) {
// Shrink array if necessary.
if (i < scores.length) {
scores = Arrays.copyOf(scores, i);
}
double combined = voting.combine(scores);
sumscore.putDouble(id, combined);
minmax.put(combined);
} else {
LOG.warning("DBID " + id + " was not given any score at all.");
}
LOG.incrementProcessed(cprog);
}
LOG.ensureCompleted(cprog);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
DoubleRelation scores = new MaterializedDoubleRelation("Simple Outlier Ensemble", "ensemble-outlier", sumscore, ids);
return new OutlierResult(meta, scores);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class SimplifiedLOF method computeSimplifiedLOFs.
/**
* Compute the simplified LOF factors.
*
* @param ids IDs to compute for
* @param knnq kNN query class
* @param slrds Object densities
* @param lofs SLOF output storage
* @param lofminmax Minimum and maximum scores
*/
private void computeSimplifiedLOFs(DBIDs ids, KNNQuery<O> knnq, WritableDoubleDataStore slrds, WritableDoubleDataStore lofs, DoubleMinMax lofminmax) {
FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("Simplified LOF scores", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
final double lof;
final double lrdp = slrds.doubleValue(iter);
final KNNList neighbors = knnq.getKNNForDBID(iter, k);
if (!Double.isInfinite(lrdp)) {
double sum = 0.;
int count = 0;
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
if (DBIDUtil.equal(neighbor, iter)) {
continue;
}
final double val = slrds.doubleValue(neighbor);
sum += val;
count++;
if (Double.isInfinite(val)) {
break;
}
}
lof = sum / (lrdp * count);
} else {
lof = 1.0;
}
lofs.putDouble(iter, lof);
// update minimum and maximum
lofminmax.put(lof);
LOG.incrementProcessed(progressLOFs);
}
LOG.ensureCompleted(progressLOFs);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class FeatureBagging method run.
/**
* Run the algorithm on a data set.
*
* @param database Database context
* @param relation Relation to use
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<NumberVector> relation) {
final int dbdim = RelationUtil.dimensionality(relation);
final int mindim = dbdim >> 1;
final int maxdim = dbdim - 1;
final Random rand = rnd.getSingleThreadedRandom();
ArrayList<OutlierResult> results = new ArrayList<>(num);
{
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("LOF iterations", num, LOG) : null;
for (int i = 0; i < num; i++) {
long[] dimset = randomSubspace(dbdim, mindim, maxdim, rand);
SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(dimset);
LOF<NumberVector> lof = new LOF<>(k, df);
// run LOF and collect the result
OutlierResult result = lof.run(database, relation);
results.add(result);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
}
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
if (breadth) {
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
@SuppressWarnings("unchecked") Pair<DBIDIter, DoubleRelation>[] IDVectorOntoScoreVector = (Pair<DBIDIter, DoubleRelation>[]) new Pair[results.size()];
// Mapping score-sorted DBID-Iterators onto their corresponding scores.
// We need to initialize them now be able to iterate them "in parallel".
{
int i = 0;
for (OutlierResult r : results) {
IDVectorOntoScoreVector[i] = new Pair<DBIDIter, DoubleRelation>(r.getOrdering().order(relation.getDBIDs()).iter(), r.getScores());
i++;
}
}
// Iterating over the *lines* of the AS_t(i)-matrix.
for (int i = 0; i < relation.size(); i++) {
// Iterating over the elements of a line (breadth-first).
for (Pair<DBIDIter, DoubleRelation> pair : IDVectorOntoScoreVector) {
DBIDIter iter = pair.first;
// for every DBID).
if (iter.valid()) {
double score = pair.second.doubleValue(iter);
if (Double.isNaN(scores.doubleValue(iter))) {
scores.putDouble(iter, score);
minmax.put(score);
}
iter.advance();
} else {
LOG.warning("Incomplete result: Iterator does not contain |DB| DBIDs");
}
}
// Progress does not take the initial mapping into account.
LOG.incrementProcessed(cprog);
}
LOG.ensureCompleted(cprog);
} else {
FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Combining results", relation.size(), LOG) : null;
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
double sum = 0.0;
for (OutlierResult r : results) {
final double s = r.getScores().doubleValue(iter);
if (!Double.isNaN(s)) {
sum += s;
}
}
scores.putDouble(iter, sum);
minmax.put(sum);
LOG.incrementProcessed(cprog);
}
LOG.ensureCompleted(cprog);
}
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
DoubleRelation scoreres = new MaterializedDoubleRelation("Feature bagging", "fb-outlier", scores, relation.getDBIDs());
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class SimpleKernelDensityLOF method run.
/**
* Run the naive kernel density LOF algorithm.
*
* @param database Database to query
* @param relation Data to process
* @return LOF outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("KernelDensityLOF", 3) : null;
final int dim = RelationUtil.dimensionality(relation);
DBIDs ids = relation.getDBIDs();
LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
// Compute LRDs
LOG.beginStep(stepprog, 2, "Computing densities.");
WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
FiniteProgress densProgress = LOG.isVerbose() ? new FiniteProgress("Densities", ids.size(), LOG) : null;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final KNNList neighbors = knnq.getKNNForDBID(it, k);
int count = 0;
double sum = 0.0;
// Fast version for double distances
for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(neighbor, it)) {
continue;
}
double max = knnq.getKNNForDBID(neighbor, k).getKNNDistance();
if (max == 0) {
sum = Double.POSITIVE_INFINITY;
break;
}
final double v = neighbor.doubleValue() / max;
sum += kernel.density(v) / MathUtil.powi(max, dim);
count++;
}
final double density = count > 0 ? sum / count : 0.;
dens.putDouble(it, density);
LOG.incrementProcessed(densProgress);
}
LOG.ensureCompleted(densProgress);
// compute LOF_SCORE of each db object
LOG.beginStep(stepprog, 3, "Computing KLOFs.");
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
FiniteProgress progressLOFs = LOG.isVerbose() ? new FiniteProgress("KLOF_SCORE for objects", ids.size(), LOG) : null;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final double lrdp = dens.doubleValue(it);
final double lof;
if (lrdp > 0) {
final KNNList neighbors = knnq.getKNNForDBID(it, k);
double sum = 0.0;
int count = 0;
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// skip the point itself
if (DBIDUtil.equal(neighbor, it)) {
continue;
}
sum += dens.doubleValue(neighbor);
count++;
}
lof = (lrdp == Double.POSITIVE_INFINITY) ? (sum == Double.POSITIVE_INFINITY ? 1 : 0.) : sum / (count * lrdp);
} else {
lof = 1.0;
}
lofs.putDouble(it, lof);
// update minimum and maximum
lofminmax.put(lof);
LOG.incrementProcessed(progressLOFs);
}
LOG.ensureCompleted(progressLOFs);
LOG.setCompleted(stepprog);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Kernel Density Local Outlier Factor", "kernel-density-slof-outlier", lofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
}
use of de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress in project elki by elki-project.
the class VarianceOfVolume method computeVolumes.
/**
* Compute volumes
*
* @param knnq KNN query
* @param dim Data dimensionality
* @param ids IDs to process
* @param vols Volume storage
*/
private void computeVolumes(KNNQuery<O> knnq, int dim, DBIDs ids, WritableDoubleDataStore vols) {
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Volume", ids.size(), LOG) : null;
double scaleconst = MathUtil.SQRTPI * FastMath.pow(GammaDistribution.gamma(1 + dim * .5), -1. / dim);
boolean warned = false;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double dk = knnq.getKNNForDBID(iter, k).getKNNDistance();
double vol = dk > 0 ? MathUtil.powi(dk * scaleconst, dim) : 0.;
if (vol == Double.POSITIVE_INFINITY && !warned) {
LOG.warning("Variance of Volumes has hit double precision limits, results are not reliable.");
warned = true;
}
vols.putDouble(iter, vol);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
}
Aggregations