use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.
the class ALOCI method run.
public OutlierResult run(Database database, Relation<O> relation) {
final int dim = RelationUtil.dimensionality(relation);
final Random random = rnd.getSingleThreadedRandom();
FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("Build aLOCI quadtress", g, LOG) : null;
// Compute extend of dataset.
double[] min, max;
{
double[][] hbbs = RelationUtil.computeMinMax(relation);
min = hbbs[0];
max = hbbs[1];
double maxd = 0;
for (int i = 0; i < dim; i++) {
maxd = MathUtil.max(maxd, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
for (int i = 0; i < dim; i++) {
double diff = (maxd - (max[i] - min[i])) * .5;
min[i] -= diff;
max[i] += diff;
}
}
List<ALOCIQuadTree> qts = new ArrayList<>(g);
double[] nshift = new double[dim];
ALOCIQuadTree qt = new ALOCIQuadTree(min, max, nshift, nmin, relation);
qts.add(qt);
LOG.incrementProcessed(progressPreproc);
/*
* create the remaining g-1 shifted QuadTrees. This not clearly described in
* the paper and therefore implemented in a way that achieves good results
* with the test data.
*/
for (int shift = 1; shift < g; shift++) {
double[] svec = new double[dim];
for (int i = 0; i < dim; i++) {
svec[i] = random.nextDouble() * (max[i] - min[i]);
}
qt = new ALOCIQuadTree(min, max, svec, nmin, relation);
qts.add(qt);
LOG.incrementProcessed(progressPreproc);
}
LOG.ensureCompleted(progressPreproc);
// aLOCI main loop: evaluate
FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), LOG) : null;
WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final O obj = relation.get(iditer);
double maxmdefnorm = 0;
// For each level
for (int l = 0; ; l++) {
// Find the closest C_i
Node ci = null;
for (int i = 0; i < g; i++) {
Node ci2 = qts.get(i).findClosestNode(obj, l);
if (ci2.getLevel() != l) {
continue;
}
// TODO: always use manhattan?
if (ci == null || distFunc.distance(ci, obj) > distFunc.distance(ci2, obj)) {
ci = ci2;
}
}
// LOG.debug("level:" + (ci != null ? ci.getLevel() : -1) +" l:"+l);
if (ci == null) {
// no matching tree for this level.
break;
}
// Find the closest C_j
Node cj = null;
for (int i = 0; i < g; i++) {
Node cj2 = qts.get(i).findClosestNode(ci, l - alpha);
// TODO: allow higher levels or not?
if (cj != null && cj2.getLevel() < cj.getLevel()) {
continue;
}
// TODO: always use manhattan?
if (cj == null || distFunc.distance(cj, ci) > distFunc.distance(cj2, ci)) {
cj = cj2;
}
}
// LOG.debug("level:" + (cj != null ? cj.getLevel() : -1) +" l:"+l);
if (cj == null) {
// no matching tree for this level.
continue;
}
double mdefnorm = calculate_MDEF_norm(cj, ci);
// LOG.warning("level:" + ci.getLevel() + "/" + cj.getLevel() +
// " mdef: " + mdefnorm);
maxmdefnorm = MathUtil.max(maxmdefnorm, mdefnorm);
}
// Store results
mdef_norm.putDouble(iditer, maxmdefnorm);
minmax.put(maxmdefnorm);
LOG.incrementProcessed(progressLOCI);
}
LOG.ensureCompleted(progressLOCI);
DoubleRelation scoreResult = new MaterializedDoubleRelation("aLOCI normalized MDEF", "aloci-mdef-outlier", mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
}
use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.
the class FlexibleLOF method doRunInTime.
/**
* Performs the Generalized LOF_SCORE algorithm on the given database and
* returns a {@link FlexibleLOF.LOFResult} encapsulating information that may
* be needed by an OnlineLOF algorithm.
*
* @param ids Object ids
* @param kNNRefer the kNN query w.r.t. reference neighborhood distance
* function
* @param kNNReach the kNN query w.r.t. reachability distance function
* @param stepprog Progress logger
* @return LOF result
*/
protected LOFResult<O> doRunInTime(DBIDs ids, KNNQuery<O> kNNRefer, KNNQuery<O> kNNReach, StepProgress stepprog) {
// Assert we got something
if (kNNRefer == null) {
throw new AbortException("No kNN queries supported by database for reference neighborhood distance function.");
}
if (kNNReach == null) {
throw new AbortException("No kNN queries supported by database for reachability distance function.");
}
// Compute LRDs
LOG.beginStep(stepprog, 2, "Computing LRDs.");
WritableDoubleDataStore lrds = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
computeLRDs(kNNReach, ids, lrds);
// compute LOF_SCORE of each db object
LOG.beginStep(stepprog, 3, "Computing LOFs.");
WritableDoubleDataStore lofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
// track the maximum value for normalization.
DoubleMinMax lofminmax = new DoubleMinMax();
computeLOFs(kNNRefer, ids, lrds, lofs, lofminmax);
LOG.setCompleted(stepprog);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Local Outlier Factor", "lof-outlier", lofs, ids);
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return new LOFResult<>(result, kNNRefer, kNNReach, lrds, lofs);
}
use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.
the class KDEOS method run.
/**
* Run the KDEOS outlier detection algorithm.
*
* @param database Database to query
* @param rel Relation to process
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<O> rel) {
final DBIDs ids = rel.getDBIDs();
LOG.verbose("Running kNN preprocessor.");
KNNQuery<O> knnq = DatabaseUtil.precomputedKNNQuery(database, rel, getDistanceFunction(), kmax + 1);
// Initialize store for densities
WritableDataStore<double[]> densities = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, double[].class);
estimateDensities(rel, knnq, ids, densities);
// Compute scores:
WritableDoubleDataStore kofs = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB);
DoubleMinMax minmax = new DoubleMinMax();
computeOutlierScores(knnq, ids, densities, kofs, minmax);
DoubleRelation scoreres = new MaterializedDoubleRelation("Kernel Density Estimation Outlier Scores", "kdeos-outlier", kofs, ids);
OutlierScoreMeta meta = new ProbabilisticOutlierScore(minmax.getMin(), minmax.getMax());
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.
the class LDOF method run.
/**
* Run the algorithm
*
* @param database Database to process
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, k);
// track the maximum value for normalization
DoubleMinMax ldofminmax = new DoubleMinMax();
// compute the ldof values
WritableDoubleDataStore ldofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// compute LOF_SCORE of each db object
if (LOG.isVerbose()) {
LOG.verbose("Computing LDOFs");
}
FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF for objects", relation.size(), LOG) : null;
Mean dxp = new Mean(), Dxp = new Mean();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
KNNList neighbors = knnQuery.getKNNForDBID(iditer, k);
dxp.reset();
Dxp.reset();
DoubleDBIDListIter neighbor1 = neighbors.iter(), neighbor2 = neighbors.iter();
for (; neighbor1.valid(); neighbor1.advance()) {
// skip the point itself
if (DBIDUtil.equal(neighbor1, iditer)) {
continue;
}
dxp.put(neighbor1.doubleValue());
for (neighbor2.seek(neighbor1.getOffset() + 1); neighbor2.valid(); neighbor2.advance()) {
// skip the point itself
if (DBIDUtil.equal(neighbor2, iditer)) {
continue;
}
Dxp.put(distFunc.distance(neighbor1, neighbor2));
}
}
double ldof = dxp.getMean() / Dxp.getMean();
if (Double.isNaN(ldof) || Double.isInfinite(ldof)) {
ldof = 1.0;
}
ldofs.putDouble(iditer, ldof);
// update maximum
ldofminmax.put(ldof);
LOG.incrementProcessed(progressLDOFs);
}
LOG.ensureCompleted(progressLDOFs);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("LDOF Outlier Score", "ldof-outlier", ldofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ldofminmax.getMin(), ldofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, LDOF_BASELINE);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.math.DoubleMinMax in project elki by elki-project.
the class ExternalDoubleOutlierScore method run.
/**
* Run the algorithm.
*
* @param database Database to use
* @param relation Relation to use
* @return Result
*/
public OutlierResult run(Database database, Relation<?> relation) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
try (//
InputStream in = FileUtil.tryGzipInput(new FileInputStream(file));
TokenizedReader reader = CSVReaderFormat.DEFAULT_FORMAT.makeReader()) {
Tokenizer tokenizer = reader.getTokenizer();
CharSequence buf = reader.getBuffer();
Matcher mi = idpattern.matcher(buf), ms = scorepattern.matcher(buf);
reader.reset(in);
while (reader.nextLineExceptComments()) {
Integer id = null;
double score = Double.NaN;
for (; /* initialized by nextLineExceptComments */
tokenizer.valid(); tokenizer.advance()) {
mi.region(tokenizer.getStart(), tokenizer.getEnd());
ms.region(tokenizer.getStart(), tokenizer.getEnd());
final boolean mif = mi.find();
final boolean msf = ms.find();
if (mif && msf) {
throw new AbortException("ID pattern and score pattern both match value: " + tokenizer.getSubstring());
}
if (mif) {
if (id != null) {
throw new AbortException("ID pattern matched twice: previous value " + id + " second value: " + tokenizer.getSubstring());
}
id = ParseUtil.parseIntBase10(buf, mi.end(), tokenizer.getEnd());
}
if (msf) {
if (!Double.isNaN(score)) {
throw new AbortException("Score pattern matched twice: previous value " + score + " second value: " + tokenizer.getSubstring());
}
score = ParseUtil.parseDouble(buf, ms.end(), tokenizer.getEnd());
}
}
if (id != null && !Double.isNaN(score)) {
scores.putDouble(DBIDUtil.importInteger(id), score);
minmax.put(score);
} else if (id == null && Double.isNaN(score)) {
LOG.warning("Line did not match either ID nor score nor comment: " + reader.getLineNumber());
} else {
throw new AbortException("Line matched only ID or only SCORE patterns: " + reader.getLineNumber());
}
}
} catch (IOException e) {
throw new AbortException("Could not load outlier scores: " + e.getMessage() + " when loading " + file, e);
}
OutlierScoreMeta meta;
if (inverted) {
meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax());
} else {
meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
}
DoubleRelation scoresult = new MaterializedDoubleRelation("External Outlier", "external-outlier", scores, relation.getDBIDs());
OutlierResult or = new OutlierResult(meta, scoresult);
// Apply scaling
if (scaling instanceof OutlierScalingFunction) {
((OutlierScalingFunction) scaling).prepare(or);
}
DoubleMinMax mm = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double val = scoresult.doubleValue(iditer);
val = scaling.getScaled(val);
scores.putDouble(iditer, val);
mm.put(val);
}
meta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax());
or = new OutlierResult(meta, scoresult);
return or;
}
Aggregations