use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.
the class OnlineLOF method run.
/**
* Performs the Generalized LOF_SCORE algorithm on the given database by
* calling {@code #doRunInTime(Database)} and adds a {@link LOFKNNListener} to
* the preprocessors.
*/
@Override
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("OnlineLOF", 3) : null;
Pair<Pair<KNNQuery<O>, KNNQuery<O>>, Pair<RKNNQuery<O>, RKNNQuery<O>>> queries = getKNNAndRkNNQueries(database, relation, stepprog);
KNNQuery<O> kNNRefer = queries.getFirst().getFirst();
KNNQuery<O> kNNReach = queries.getFirst().getSecond();
RKNNQuery<O> rkNNRefer = queries.getSecond().getFirst();
RKNNQuery<O> rkNNReach = queries.getSecond().getSecond();
LOFResult<O> lofResult = super.doRunInTime(relation.getDBIDs(), kNNRefer, kNNReach, stepprog);
lofResult.setRkNNRefer(rkNNRefer);
lofResult.setRkNNReach(rkNNReach);
// add listener
KNNListener l = new LOFKNNListener(lofResult);
((MaterializeKNNPreprocessor<O>) ((PreprocessorKNNQuery<O>) lofResult.getKNNRefer()).getPreprocessor()).addKNNListener(l);
((MaterializeKNNPreprocessor<O>) ((PreprocessorKNNQuery<O>) lofResult.getKNNReach()).getPreprocessor()).addKNNListener(l);
return lofResult.getResult();
}
use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.
the class DWOF method run.
/**
* Performs the Generalized DWOF_SCORE algorithm on the given database by
* calling all the other methods in the proper order.
*
* @param database Database to query
* @param relation Data to process
* @return new OutlierResult instance
*/
public OutlierResult run(Database database, Relation<O> relation) {
final DBIDs ids = relation.getDBIDs();
DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
// Get k nearest neighbor and range query on the relation.
KNNQuery<O> knnq = database.getKNNQuery(distFunc, k, DatabaseQuery.HINT_HEAVY_USE);
RangeQuery<O> rnnQuery = database.getRangeQuery(distFunc, DatabaseQuery.HINT_HEAVY_USE);
StepProgress stepProg = LOG.isVerbose() ? new StepProgress("DWOF", 2) : null;
// DWOF output score storage.
WritableDoubleDataStore dwofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_HOT, 0.);
if (stepProg != null) {
stepProg.beginStep(1, "Initializing objects' Radii", LOG);
}
WritableDoubleDataStore radii = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, 0.);
// Find an initial radius for each object:
initializeRadii(ids, knnq, distFunc, radii);
WritableIntegerDataStore oldSizes = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT, 1);
WritableIntegerDataStore newSizes = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT, 1);
int countUnmerged = relation.size();
if (stepProg != null) {
stepProg.beginStep(2, "Clustering-Evaluating Cycles.", LOG);
}
IndefiniteProgress clusEvalProgress = LOG.isVerbose() ? new IndefiniteProgress("Evaluating DWOFs", LOG) : null;
while (countUnmerged > 0) {
LOG.incrementProcessed(clusEvalProgress);
// Increase radii
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
radii.putDouble(iter, radii.doubleValue(iter) * delta);
}
// stores the clustering label for each object
WritableDataStore<ModifiableDBIDs> labels = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_TEMP, ModifiableDBIDs.class);
// Cluster objects based on the current radius
clusterData(ids, rnnQuery, radii, labels);
// simple reference swap
WritableIntegerDataStore temp = newSizes;
newSizes = oldSizes;
oldSizes = temp;
// Update the cluster size count for each object.
countUnmerged = updateSizes(ids, labels, newSizes);
labels.destroy();
// Update DWOF scores.
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
double newScore = (newSizes.intValue(iter) > 0) ? ((double) (oldSizes.intValue(iter) - 1) / (double) newSizes.intValue(iter)) : 0.0;
dwofs.putDouble(iter, dwofs.doubleValue(iter) + newScore);
}
}
LOG.setCompleted(clusEvalProgress);
LOG.setCompleted(stepProg);
// Build result representation.
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
minmax.put(dwofs.doubleValue(iter));
}
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
DoubleRelation rel = new MaterializedDoubleRelation("Dynamic-Window Outlier Factors", "dwof-outlier", dwofs, ids);
return new OutlierResult(meta, rel);
}
use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.
the class SimplifiedLOF method run.
/**
* Run the Simple LOF algorithm.
*
* @param database Database to query
* @param relation Data to process
* @return LOF outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("Simplified LOF", 3) : null;
DBIDs ids = relation.getDBIDs();
LOG.beginStep(stepprog, 1, "Materializing neighborhoods w.r.t. distance function.");
KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
// Compute LRDs
LOG.beginStep(stepprog, 2, "Computing densities.");
WritableDoubleDataStore dens = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
computeSimplifiedLRDs(ids, knnq, dens);
// compute LOF_SCORE of each db object
LOG.beginStep(stepprog, 3, "Computing SLOFs.");
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
DoubleMinMax lofminmax = new DoubleMinMax();
computeSimplifiedLOFs(ids, knnq, dens, lofs, lofminmax);
LOG.setCompleted(stepprog);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Simplified Local Outlier Factor", "simplified-lof-outlier", lofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0., Double.POSITIVE_INFINITY, 1.);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
}
use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.
the class VarianceOfVolume method run.
/**
* Runs the VOV algorithm on the given database.
*
* @param database Database to query
* @param relation Data to process
* @return VOV outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("VOV", 3) : null;
DBIDs ids = relation.getDBIDs();
int dim = RelationUtil.dimensionality(relation);
LOG.beginStep(stepprog, 1, "Materializing nearest-neighbor sets.");
KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, relation, getDistanceFunction(), k);
// Compute Volumes
LOG.beginStep(stepprog, 2, "Computing Volumes.");
WritableDoubleDataStore vols = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
computeVolumes(knnq, dim, ids, vols);
// compute VOV of each object
LOG.beginStep(stepprog, 3, "Computing Variance of Volumes (VOV).");
WritableDoubleDataStore vovs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
// track the maximum value for normalization.
DoubleMinMax vovminmax = new DoubleMinMax();
computeVOVs(knnq, ids, vols, vovs, vovminmax);
LOG.setCompleted(stepprog);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Variance of Volume", "vov-outlier", vovs, ids);
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(vovminmax.getMin(), vovminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.logging.progress.StepProgress in project elki by elki-project.
the class DistanceStatisticsWithClasses method run.
@Override
public HistogramResult run(Database database) {
final Relation<O> relation = database.getRelation(getInputTypeRestriction()[0]);
final DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
final StepProgress stepprog = LOG.isVerbose() ? new StepProgress("Distance statistics", 2) : null;
// determine binning ranges.
DoubleMinMax gminmax = new DoubleMinMax();
// Cluster by labels
Collection<Cluster<Model>> split = (new ByLabelOrAllInOneClustering()).run(database).getAllClusters();
// global in-cluster min/max
DoubleMinMax giminmax = new DoubleMinMax();
// global other-cluster min/max
DoubleMinMax gominmax = new DoubleMinMax();
// in-cluster distances
MeanVariance mimin = new MeanVariance();
MeanVariance mimax = new MeanVariance();
MeanVariance midif = new MeanVariance();
// other-cluster distances
MeanVariance momin = new MeanVariance();
MeanVariance momax = new MeanVariance();
MeanVariance modif = new MeanVariance();
// Histogram
final ObjHistogram<long[]> histogram;
LOG.beginStep(stepprog, 1, "Prepare histogram.");
if (exact) {
gminmax = exactMinMax(relation, distFunc);
histogram = new LongArrayStaticHistogram(numbin, gminmax.getMin(), gminmax.getMax(), 2);
} else if (sampling) {
gminmax = sampleMinMax(relation, distFunc);
histogram = new LongArrayStaticHistogram(numbin, gminmax.getMin(), gminmax.getMax(), 2);
} else {
histogram = new AbstractObjDynamicHistogram<long[]>(numbin) {
@Override
protected long[] downsample(Object[] data, int start, int end, int size) {
long[] ret = new long[2];
for (int i = start; i < end; i++) {
long[] existing = (long[]) data[i];
if (existing != null) {
for (int c = 0; c < 2; c++) {
ret[c] += existing[c];
}
}
}
return ret;
}
@Override
protected long[] aggregate(long[] first, long[] second) {
for (int c = 0; c < 2; c++) {
first[c] += second[c];
}
return first;
}
@Override
protected long[] cloneForCache(long[] data) {
return data.clone();
}
@Override
protected long[] makeObject() {
return new long[2];
}
};
}
LOG.beginStep(stepprog, 2, "Build histogram.");
final FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("Distance computations", relation.size(), LOG) : null;
// iterate per cluster
final long[] incFirst = new long[] { 1L, 0L };
final long[] incSecond = new long[] { 0L, 1L };
for (Cluster<?> c1 : split) {
for (DBIDIter id1 = c1.getIDs().iter(); id1.valid(); id1.advance()) {
// in-cluster distances
DoubleMinMax iminmax = new DoubleMinMax();
for (DBIDIter iter2 = c1.getIDs().iter(); iter2.valid(); iter2.advance()) {
// skip the point itself.
if (DBIDUtil.equal(id1, iter2)) {
continue;
}
double d = distFunc.distance(id1, iter2);
histogram.putData(d, incFirst);
iminmax.put(d);
}
// aggregate
mimin.put(iminmax.getMin());
mimax.put(iminmax.getMax());
midif.put(iminmax.getDiff());
// min/max
giminmax.put(iminmax.getMin());
giminmax.put(iminmax.getMax());
// other-cluster distances
DoubleMinMax ominmax = new DoubleMinMax();
for (Cluster<?> c2 : split) {
if (c2 == c1) {
continue;
}
for (DBIDIter iter2 = c2.getIDs().iter(); iter2.valid(); iter2.advance()) {
// skip the point itself (shouldn't happen though)
if (DBIDUtil.equal(id1, iter2)) {
continue;
}
double d = distFunc.distance(id1, iter2);
histogram.putData(d, incSecond);
ominmax.put(d);
}
}
// aggregate
momin.put(ominmax.getMin());
momax.put(ominmax.getMax());
modif.put(ominmax.getDiff());
// min/max
gominmax.put(ominmax.getMin());
gominmax.put(ominmax.getMax());
LOG.incrementProcessed(progress);
}
}
LOG.ensureCompleted(progress);
// Update values (only needed for sampling case).
gminmax.put(gominmax);
LOG.setCompleted(stepprog);
// count the number of samples we have in the data
long inum = 0;
long onum = 0;
for (ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
inum += iter.getValue()[0];
onum += iter.getValue()[1];
}
long bnum = inum + onum;
Collection<double[]> binstat = new ArrayList<>(numbin);
for (ObjHistogram.Iter<long[]> iter = histogram.iter(); iter.valid(); iter.advance()) {
final long[] value = iter.getValue();
final double icof = (inum == 0) ? 0 : ((double) value[0]) / inum / histogram.getBinsize();
final double icaf = ((double) value[0]) / bnum / histogram.getBinsize();
final double ocof = (onum == 0) ? 0 : ((double) value[1]) / onum / histogram.getBinsize();
final double ocaf = ((double) value[1]) / bnum / histogram.getBinsize();
binstat.add(new double[] { iter.getCenter(), icof, icaf, ocof, ocaf });
}
HistogramResult result = new HistogramResult("Distance Histogram", "distance-histogram", binstat);
result.addHeader("Absolute minimum distance (abs): " + gminmax.getMin());
result.addHeader("Absolute maximum distance (abs): " + gminmax.getMax());
result.addHeader("In-Cluster minimum distance (abs, avg, stddev): " + giminmax.getMin() + " " + mimin.getMean() + " " + mimin.getSampleStddev());
result.addHeader("In-Cluster maximum distance (abs, avg, stddev): " + giminmax.getMax() + " " + mimax.getMean() + " " + mimax.getSampleStddev());
result.addHeader("Other-Cluster minimum distance (abs, avg, stddev): " + gominmax.getMin() + " " + momin.getMean() + " " + momin.getSampleStddev());
result.addHeader("Other-Cluster maximum distance (abs, avg, stddev): " + gominmax.getMax() + " " + momax.getMean() + " " + momax.getSampleStddev());
result.addHeader("Column description: bin center, in-cluster only frequency, in-cluster all frequency, other-cluster only frequency, other cluster all frequency");
result.addHeader("In-cluster value count: " + inum + " other cluster value count: " + onum);
return result;
}
Aggregations