use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.
the class ComputeKNNOutlierScores method runForEachK.
/**
* Iterate over the k range.
*
* @param prefix Prefix string
* @param startk Start k
* @param stepk Step k
* @param maxk Max k
* @param runner Runner to run
* @param out Output function
*/
private void runForEachK(String prefix, int startk, int stepk, int maxk, IntFunction<OutlierResult> runner, BiConsumer<String, OutlierResult> out) {
if (isDisabled(prefix)) {
LOG.verbose("Skipping (disabled): " + prefix);
// Disabled
return;
}
LOG.verbose("Running " + prefix);
final int digits = (int) FastMath.ceil(FastMath.log10(maxk + 1));
final String format = "%s-%0" + digits + "d";
for (int k = startk; k <= maxk; k += stepk) {
Duration time = LOG.newDuration(this.getClass().getCanonicalName() + "." + prefix + ".k" + k + ".runtime").begin();
OutlierResult result = runner.apply(k);
LOG.statistics(time.end());
if (result != null) {
out.accept(String.format(Locale.ROOT, format, prefix, k), result);
result.getHierarchy().removeSubtree(result);
}
}
}
use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.
the class AutomaticEvaluation method autoEvaluateOutliers.
protected void autoEvaluateOutliers(ResultHierarchy hier, Result newResult) {
Collection<OutlierResult> outliers = ResultUtil.filterResults(hier, newResult, OutlierResult.class);
if (LOG.isDebugging()) {
LOG.debug("Number of new outlier results: " + outliers.size());
}
if (!outliers.isEmpty()) {
Database db = ResultUtil.findDatabase(hier);
ensureClusteringResult(db, db);
Collection<Clustering<?>> clusterings = ResultUtil.filterResults(hier, db, Clustering.class);
if (clusterings.isEmpty()) {
LOG.warning("Could not find a clustering result, even after running 'ensureClusteringResult'?!?");
return;
}
Clustering<?> basec = clusterings.iterator().next();
// Find minority class label
int min = Integer.MAX_VALUE;
int total = 0;
String label = null;
if (basec.getAllClusters().size() > 1) {
for (Cluster<?> c : basec.getAllClusters()) {
final int csize = c.getIDs().size();
total += csize;
if (csize < min) {
min = csize;
label = c.getName();
}
}
}
if (label == null) {
LOG.warning("Could not evaluate outlier results, as I could not find a minority label.");
return;
}
if (min == 1) {
LOG.warning("The minority class label had a single object. Try using 'ClassLabelFilter' to identify the class label column.");
}
if (min > 0.05 * total) {
LOG.warning("The minority class I discovered (labeled '" + label + "') has " + (min * 100. / total) + "% of objects. Outlier classes should be more rare!");
}
LOG.verbose("Evaluating using minority class: " + label);
Pattern pat = Pattern.compile("^" + Pattern.quote(label) + "$");
// Evaluate rankings.
new OutlierRankingEvaluation(pat).processNewResult(hier, newResult);
// Compute ROC curve
new OutlierROCCurve(pat).processNewResult(hier, newResult);
// Compute Precision at k
new OutlierPrecisionAtKCurve(pat, min << 1).processNewResult(hier, newResult);
// Compute ROC curve
new OutlierPrecisionRecallCurve(pat).processNewResult(hier, newResult);
// Compute outlier histogram
new ComputeOutlierHistogram(pat, 50, new LinearScaling(), false).processNewResult(hier, newResult);
}
}
use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.
the class OPTICSOF method run.
/**
* Perform OPTICS-based outlier detection.
*
* @param database Database
* @param relation Relation
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<O> relation) {
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, minpts);
RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
DBIDs ids = relation.getDBIDs();
// FIXME: implicit preprocessor.
WritableDataStore<KNNList> nMinPts = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, KNNList.class);
WritableDoubleDataStore coreDistance = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
WritableIntegerDataStore minPtsNeighborhoodSize = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, -1);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
KNNList minptsNeighbours = knnQuery.getKNNForDBID(iditer, minpts);
double d = minptsNeighbours.getKNNDistance();
nMinPts.put(iditer, minptsNeighbours);
coreDistance.putDouble(iditer, d);
minPtsNeighborhoodSize.put(iditer, rangeQuery.getRangeForDBID(iditer, d).size());
}
// Pass 2
WritableDataStore<List<Double>> reachDistance = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, List.class);
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
List<Double> core = new ArrayList<>();
double lrd = 0;
// TODO: optimize for double distances
for (DoubleDBIDListIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
double coreDist = coreDistance.doubleValue(neighbor);
double dist = distQuery.distance(iditer, neighbor);
double rd = MathUtil.max(coreDist, dist);
lrd = rd + lrd;
core.add(rd);
}
lrd = minPtsNeighborhoodSize.intValue(iditer) / lrd;
reachDistance.put(iditer, core);
lrds.putDouble(iditer, lrd);
}
// Pass 3
DoubleMinMax ofminmax = new DoubleMinMax();
WritableDoubleDataStore ofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double of = 0;
for (DBIDIter neighbor = nMinPts.get(iditer).iter(); neighbor.valid(); neighbor.advance()) {
double lrd = lrds.doubleValue(iditer);
double lrdN = lrds.doubleValue(neighbor);
of = of + lrdN / lrd;
}
of = of / minPtsNeighborhoodSize.intValue(iditer);
ofs.putDouble(iditer, of);
// update minimum and maximum
ofminmax.put(of);
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("OPTICS Outlier Scores", "optics-outlier", ofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ofminmax.getMin(), ofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.
the class CBLOF method run.
/**
* Runs the CBLOF algorithm on the given database.
*
* @param database Database to query
* @param relation Data to process
* @return CBLOF outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
StepProgress stepprog = LOG.isVerbose() ? new StepProgress("CBLOF", 3) : null;
DBIDs ids = relation.getDBIDs();
LOG.beginStep(stepprog, 1, "Computing clustering.");
Clustering<MeanModel> clustering = clusteringAlgorithm.run(database);
LOG.beginStep(stepprog, 2, "Computing boundary between large and small clusters.");
List<? extends Cluster<MeanModel>> clusters = clustering.getAllClusters();
Collections.sort(clusters, new Comparator<Cluster<MeanModel>>() {
@Override
public int compare(Cluster<MeanModel> o1, Cluster<MeanModel> o2) {
// Sort in descending order by size
return Integer.compare(o2.size(), o1.size());
}
});
int clusterBoundary = getClusterBoundary(relation, clusters);
List<? extends Cluster<MeanModel>> largeClusters = clusters.subList(0, clusterBoundary + 1);
List<? extends Cluster<MeanModel>> smallClusters = clusters.subList(clusterBoundary + 1, clusters.size());
LOG.beginStep(stepprog, 3, "Computing Cluster-Based Local Outlier Factors (CBLOF).");
WritableDoubleDataStore cblofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
DoubleMinMax cblofMinMax = new DoubleMinMax();
computeCBLOFs(relation, distance, cblofs, cblofMinMax, largeClusters, smallClusters);
LOG.setCompleted(stepprog);
DoubleRelation scoreResult = new MaterializedDoubleRelation("Cluster-Based Local Outlier Factor", "cblof-outlier", cblofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(cblofMinMax.getMin(), cblofMinMax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.
the class ParallelKNNWeightOutlier method run.
/**
* Run the parallel kNN weight outlier detector.
*
* @param database Database to process
* @param relation Relation to analyze
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<O> relation) {
DBIDs ids = relation.getDBIDs();
WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
DistanceQuery<O> distq = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnq = database.getKNNQuery(distq, k + 1);
// Find kNN
KNNProcessor<O> knnm = new KNNProcessor<>(k + 1, knnq);
SharedObject<KNNList> knnv = new SharedObject<>();
knnm.connectKNNOutput(knnv);
// Extract outlier score
KNNWeightProcessor kdistm = new KNNWeightProcessor(k + 1);
SharedDouble kdistv = new SharedDouble();
kdistm.connectKNNInput(knnv);
kdistm.connectOutput(kdistv);
// Store in output result
WriteDoubleDataStoreProcessor storem = new WriteDoubleDataStoreProcessor(store);
storem.connectInput(kdistv);
// And gather statistics for metadata
DoubleMinMaxProcessor mmm = new DoubleMinMaxProcessor();
mmm.connectInput(kdistv);
ParallelExecutor.run(ids, knnm, kdistm, storem, mmm);
DoubleMinMax minmax = mmm.getMinMax();
DoubleRelation scoreres = new MaterializedDoubleRelation("kNN weight Outlier Score", "knnw-outlier", store, ids);
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
return new OutlierResult(meta, scoreres);
}
Aggregations