use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class IDOS method computeIDOS.
/**
* Computes all IDOS scores.
*
* @param ids the DBIDs to process
* @param knnQ the KNN query
* @param intDims Precomputed intrinsic dimensionalities
* @param idosminmax Output of minimum and maximum, for metadata
* @return ID scores
*/
protected DoubleDataStore computeIDOS(DBIDs ids, KNNQuery<O> knnQ, DoubleDataStore intDims, DoubleMinMax idosminmax) {
WritableDoubleDataStore ldms = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("ID Outlier Scores for objects", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
final KNNList neighbors = knnQ.getKNNForDBID(iter, k_r);
double sum = 0.;
int cnt = 0;
for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(iter, neighbor)) {
continue;
}
final double id = intDims.doubleValue(neighbor);
sum += id > 0 ? 1.0 / id : 0.;
if (++cnt == k_r) {
// Always stop after at most k_r elements.
break;
}
}
final double id_q = intDims.doubleValue(iter);
final double idos = id_q > 0 ? id_q * sum / cnt : 0.;
ldms.putDouble(iter, idos);
idosminmax.put(idos);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
return ldms;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class IntrinsicDimensionalityOutlier method run.
/**
* Run the algorithm
*
* @param database Database
* @param relation Data relation
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
final KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), LOG) : null;
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore id_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double id = 0.;
try {
id = estimator.estimate(knnQuery, iditer, k + 1);
} catch (ArithmeticException e) {
id = 0.;
}
id_score.putDouble(iditer, id);
minmax.put(id);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
DoubleRelation scoreres = new MaterializedDoubleRelation("Intrinsic dimensionality", "id-score", id_score, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class ALOCI method run.
public OutlierResult run(Database database, Relation<O> relation) {
final int dim = RelationUtil.dimensionality(relation);
final Random random = rnd.getSingleThreadedRandom();
FiniteProgress progressPreproc = LOG.isVerbose() ? new FiniteProgress("Build aLOCI quadtress", g, LOG) : null;
// Compute extend of dataset.
double[] min, max;
{
double[][] hbbs = RelationUtil.computeMinMax(relation);
min = hbbs[0];
max = hbbs[1];
double maxd = 0;
for (int i = 0; i < dim; i++) {
maxd = MathUtil.max(maxd, max[i] - min[i]);
}
// Enlarge bounding box to have equal lengths.
for (int i = 0; i < dim; i++) {
double diff = (maxd - (max[i] - min[i])) * .5;
min[i] -= diff;
max[i] += diff;
}
}
List<ALOCIQuadTree> qts = new ArrayList<>(g);
double[] nshift = new double[dim];
ALOCIQuadTree qt = new ALOCIQuadTree(min, max, nshift, nmin, relation);
qts.add(qt);
LOG.incrementProcessed(progressPreproc);
/*
* create the remaining g-1 shifted QuadTrees. This not clearly described in
* the paper and therefore implemented in a way that achieves good results
* with the test data.
*/
for (int shift = 1; shift < g; shift++) {
double[] svec = new double[dim];
for (int i = 0; i < dim; i++) {
svec[i] = random.nextDouble() * (max[i] - min[i]);
}
qt = new ALOCIQuadTree(min, max, svec, nmin, relation);
qts.add(qt);
LOG.incrementProcessed(progressPreproc);
}
LOG.ensureCompleted(progressPreproc);
// aLOCI main loop: evaluate
FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("Compute aLOCI scores", relation.size(), LOG) : null;
WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final O obj = relation.get(iditer);
double maxmdefnorm = 0;
// For each level
for (int l = 0; ; l++) {
// Find the closest C_i
Node ci = null;
for (int i = 0; i < g; i++) {
Node ci2 = qts.get(i).findClosestNode(obj, l);
if (ci2.getLevel() != l) {
continue;
}
// TODO: always use manhattan?
if (ci == null || distFunc.distance(ci, obj) > distFunc.distance(ci2, obj)) {
ci = ci2;
}
}
// LOG.debug("level:" + (ci != null ? ci.getLevel() : -1) +" l:"+l);
if (ci == null) {
// no matching tree for this level.
break;
}
// Find the closest C_j
Node cj = null;
for (int i = 0; i < g; i++) {
Node cj2 = qts.get(i).findClosestNode(ci, l - alpha);
// TODO: allow higher levels or not?
if (cj != null && cj2.getLevel() < cj.getLevel()) {
continue;
}
// TODO: always use manhattan?
if (cj == null || distFunc.distance(cj, ci) > distFunc.distance(cj2, ci)) {
cj = cj2;
}
}
// LOG.debug("level:" + (cj != null ? cj.getLevel() : -1) +" l:"+l);
if (cj == null) {
// no matching tree for this level.
continue;
}
double mdefnorm = calculate_MDEF_norm(cj, ci);
// LOG.warning("level:" + ci.getLevel() + "/" + cj.getLevel() +
// " mdef: " + mdefnorm);
maxmdefnorm = MathUtil.max(maxmdefnorm, mdefnorm);
}
// Store results
mdef_norm.putDouble(iditer, maxmdefnorm);
minmax.put(maxmdefnorm);
LOG.incrementProcessed(progressLOCI);
}
LOG.ensureCompleted(progressLOCI);
DoubleRelation scoreResult = new MaterializedDoubleRelation("aLOCI normalized MDEF", "aloci-mdef-outlier", mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class LDOF method run.
/**
* Run the algorithm
*
* @param database Database to process
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, k);
// track the maximum value for normalization
DoubleMinMax ldofminmax = new DoubleMinMax();
// compute the ldof values
WritableDoubleDataStore ldofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// compute LOF_SCORE of each db object
if (LOG.isVerbose()) {
LOG.verbose("Computing LDOFs");
}
FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF for objects", relation.size(), LOG) : null;
Mean dxp = new Mean(), Dxp = new Mean();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
KNNList neighbors = knnQuery.getKNNForDBID(iditer, k);
dxp.reset();
Dxp.reset();
DoubleDBIDListIter neighbor1 = neighbors.iter(), neighbor2 = neighbors.iter();
for (; neighbor1.valid(); neighbor1.advance()) {
// skip the point itself
if (DBIDUtil.equal(neighbor1, iditer)) {
continue;
}
dxp.put(neighbor1.doubleValue());
for (neighbor2.seek(neighbor1.getOffset() + 1); neighbor2.valid(); neighbor2.advance()) {
// skip the point itself
if (DBIDUtil.equal(neighbor2, iditer)) {
continue;
}
Dxp.put(distFunc.distance(neighbor1, neighbor2));
}
}
double ldof = dxp.getMean() / Dxp.getMean();
if (Double.isNaN(ldof) || Double.isInfinite(ldof)) {
ldof = 1.0;
}
ldofs.putDouble(iditer, ldof);
// update maximum
ldofminmax.put(ldof);
LOG.incrementProcessed(progressLDOFs);
}
LOG.ensureCompleted(progressLDOFs);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("LDOF Outlier Score", "ldof-outlier", ldofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ldofminmax.getMin(), ldofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, LDOF_BASELINE);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDIter in project elki by elki-project.
the class ExternalDoubleOutlierScore method run.
/**
* Run the algorithm.
*
* @param database Database to use
* @param relation Relation to use
* @return Result
*/
public OutlierResult run(Database database, Relation<?> relation) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
try (//
InputStream in = FileUtil.tryGzipInput(new FileInputStream(file));
TokenizedReader reader = CSVReaderFormat.DEFAULT_FORMAT.makeReader()) {
Tokenizer tokenizer = reader.getTokenizer();
CharSequence buf = reader.getBuffer();
Matcher mi = idpattern.matcher(buf), ms = scorepattern.matcher(buf);
reader.reset(in);
while (reader.nextLineExceptComments()) {
Integer id = null;
double score = Double.NaN;
for (; /* initialized by nextLineExceptComments */
tokenizer.valid(); tokenizer.advance()) {
mi.region(tokenizer.getStart(), tokenizer.getEnd());
ms.region(tokenizer.getStart(), tokenizer.getEnd());
final boolean mif = mi.find();
final boolean msf = ms.find();
if (mif && msf) {
throw new AbortException("ID pattern and score pattern both match value: " + tokenizer.getSubstring());
}
if (mif) {
if (id != null) {
throw new AbortException("ID pattern matched twice: previous value " + id + " second value: " + tokenizer.getSubstring());
}
id = ParseUtil.parseIntBase10(buf, mi.end(), tokenizer.getEnd());
}
if (msf) {
if (!Double.isNaN(score)) {
throw new AbortException("Score pattern matched twice: previous value " + score + " second value: " + tokenizer.getSubstring());
}
score = ParseUtil.parseDouble(buf, ms.end(), tokenizer.getEnd());
}
}
if (id != null && !Double.isNaN(score)) {
scores.putDouble(DBIDUtil.importInteger(id), score);
minmax.put(score);
} else if (id == null && Double.isNaN(score)) {
LOG.warning("Line did not match either ID nor score nor comment: " + reader.getLineNumber());
} else {
throw new AbortException("Line matched only ID or only SCORE patterns: " + reader.getLineNumber());
}
}
} catch (IOException e) {
throw new AbortException("Could not load outlier scores: " + e.getMessage() + " when loading " + file, e);
}
OutlierScoreMeta meta;
if (inverted) {
meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax());
} else {
meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
}
DoubleRelation scoresult = new MaterializedDoubleRelation("External Outlier", "external-outlier", scores, relation.getDBIDs());
OutlierResult or = new OutlierResult(meta, scoresult);
// Apply scaling
if (scaling instanceof OutlierScalingFunction) {
((OutlierScalingFunction) scaling).prepare(or);
}
DoubleMinMax mm = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double val = scoresult.doubleValue(iditer);
val = scaling.getScaled(val);
scores.putDouble(iditer, val);
mm.put(val);
}
meta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax());
or = new OutlierResult(meta, scoresult);
return or;
}
Aggregations