use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.
the class KNNWeightOutlier method run.
/**
* Runs the algorithm in the timed evaluation part.
*
* @param database Database context
* @param relation Data relation
*/
public OutlierResult run(Database database, Relation<O> relation) {
final DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, getDistanceFunction());
// + query point
KNNQuery<O> knnQuery = database.getKNNQuery(distanceQuery, k + 1);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Compute kNN weights", relation.size(), LOG) : null;
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore knnw_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// + query point
final KNNList knn = knnQuery.getKNNForDBID(iditer, k + 1);
// sum of the distances to the k nearest neighbors
double skn = 0;
// number of neighbors so far
int i = 0;
for (DoubleDBIDListIter neighbor = knn.iter(); i < k && neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(iditer, neighbor)) {
continue;
}
skn += neighbor.doubleValue();
++i;
}
if (i < k) {
// Less than k neighbors found
// Approximative index, or k > data set size!
skn = Double.POSITIVE_INFINITY;
}
knnw_score.putDouble(iditer, skn);
minmax.put(skn);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
DoubleRelation res = new MaterializedDoubleRelation("kNN weight Outlier Score", "knnw-outlier", knnw_score, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
return new OutlierResult(meta, res);
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.
the class IDOS method computeIDOS.
/**
* Computes all IDOS scores.
*
* @param ids the DBIDs to process
* @param knnQ the KNN query
* @param intDims Precomputed intrinsic dimensionalities
* @param idosminmax Output of minimum and maximum, for metadata
* @return ID scores
*/
protected DoubleDataStore computeIDOS(DBIDs ids, KNNQuery<O> knnQ, DoubleDataStore intDims, DoubleMinMax idosminmax) {
WritableDoubleDataStore ldms = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("ID Outlier Scores for objects", ids.size(), LOG) : null;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
final KNNList neighbors = knnQ.getKNNForDBID(iter, k_r);
double sum = 0.;
int cnt = 0;
for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(iter, neighbor)) {
continue;
}
final double id = intDims.doubleValue(neighbor);
sum += id > 0 ? 1.0 / id : 0.;
if (++cnt == k_r) {
// Always stop after at most k_r elements.
break;
}
}
final double id_q = intDims.doubleValue(iter);
final double idos = id_q > 0 ? id_q * sum / cnt : 0.;
ldms.putDouble(iter, idos);
idosminmax.put(idos);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
return ldms;
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.
the class LDOF method run.
/**
* Run the algorithm
*
* @param database Database to process
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnQuery = database.getKNNQuery(distFunc, k);
// track the maximum value for normalization
DoubleMinMax ldofminmax = new DoubleMinMax();
// compute the ldof values
WritableDoubleDataStore ldofs = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
// compute LOF_SCORE of each db object
if (LOG.isVerbose()) {
LOG.verbose("Computing LDOFs");
}
FiniteProgress progressLDOFs = LOG.isVerbose() ? new FiniteProgress("LDOF for objects", relation.size(), LOG) : null;
Mean dxp = new Mean(), Dxp = new Mean();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
KNNList neighbors = knnQuery.getKNNForDBID(iditer, k);
dxp.reset();
Dxp.reset();
DoubleDBIDListIter neighbor1 = neighbors.iter(), neighbor2 = neighbors.iter();
for (; neighbor1.valid(); neighbor1.advance()) {
// skip the point itself
if (DBIDUtil.equal(neighbor1, iditer)) {
continue;
}
dxp.put(neighbor1.doubleValue());
for (neighbor2.seek(neighbor1.getOffset() + 1); neighbor2.valid(); neighbor2.advance()) {
// skip the point itself
if (DBIDUtil.equal(neighbor2, iditer)) {
continue;
}
Dxp.put(distFunc.distance(neighbor1, neighbor2));
}
}
double ldof = dxp.getMean() / Dxp.getMean();
if (Double.isNaN(ldof) || Double.isInfinite(ldof)) {
ldof = 1.0;
}
ldofs.putDouble(iditer, ldof);
// update maximum
ldofminmax.put(ldof);
LOG.incrementProcessed(progressLDOFs);
}
LOG.ensureCompleted(progressLDOFs);
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("LDOF Outlier Score", "ldof-outlier", ldofs, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(ldofminmax.getMin(), ldofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, LDOF_BASELINE);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.
the class LOF method computeLRD.
/**
* Compute a single local reachability distance.
*
* @param knnq kNN Query
* @param curr Current object
* @return Local Reachability Density
*/
protected double computeLRD(KNNQuery<O> knnq, DBIDIter curr) {
final KNNList neighbors = knnq.getKNNForDBID(curr, k);
double sum = 0.0;
int count = 0;
for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(curr, neighbor)) {
continue;
}
KNNList neighborsNeighbors = knnq.getKNNForDBID(neighbor, k);
sum += MathUtil.max(neighbor.doubleValue(), neighborsNeighbors.getKNNDistance());
count++;
}
// Avoid division by 0
return (sum > 0) ? (count / sum) : Double.POSITIVE_INFINITY;
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.
the class CTLuGLSBackwardSearchAlgorithm method singleIteration.
/**
* Run a single iteration of the GLS-SOD modeling step
*
* @param relationx Geo relation
* @param relationy Attribute relation
* @return Top outlier and associated score
*/
private Pair<DBIDVar, Double> singleIteration(Relation<V> relationx, Relation<? extends NumberVector> relationy) {
final int dim = RelationUtil.dimensionality(relationx);
final int dimy = RelationUtil.dimensionality(relationy);
assert (dim == 2);
KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relationx, getDistanceFunction(), k + 1);
// We need stable indexed DBIDs
ArrayModifiableDBIDs ids = DBIDUtil.newArray(relationx.getDBIDs());
// Sort, so we can do a binary search below.
ids.sort();
// init F,X,Z
double[][] X = new double[ids.size()][6];
double[][] F = new double[ids.size()][ids.size()];
double[][] Y = new double[ids.size()][dimy];
{
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
// Fill the data matrix
{
V vec = relationx.get(id);
double la = vec.doubleValue(0);
double lo = vec.doubleValue(1);
X[i][0] = 1.0;
X[i][1] = la;
X[i][2] = lo;
X[i][3] = la * lo;
X[i][4] = la * la;
X[i][5] = lo * lo;
}
{
final NumberVector vecy = relationy.get(id);
for (int d = 0; d < dimy; d++) {
double idy = vecy.doubleValue(d);
Y[i][d] = idy;
}
}
// Fill the neighborhood matrix F:
{
KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs neighborhood = DBIDUtil.newArray(neighbors.size());
for (DBIDIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
if (DBIDUtil.equal(id, neighbor)) {
continue;
}
neighborhood.add(neighbor);
}
// Weight object itself positively.
F[i][i] = 1.0;
final int nweight = -1 / neighborhood.size();
// unfortunately.
for (DBIDIter iter = neighborhood.iter(); iter.valid(); iter.advance()) {
int pos = ids.binarySearch(iter);
assert (pos >= 0);
F[pos][i] = nweight;
}
}
}
}
// Estimate the parameter beta
// Common term that we can save recomputing.
double[][] common = times(transposeTimesTranspose(X, F), F);
double[][] b = times(inverse(times(common, X)), times(common, Y));
// Estimate sigma_0 and sigma:
// sigma_sum_square = sigma_0*sigma_0 + sigma*sigma
double[][] sigmaMat = times(F, minusEquals(times(X, b), times(F, Y)));
final double sigma_sum_square = normF(sigmaMat) / (relationx.size() - 6 - 1);
final double norm = 1 / FastMath.sqrt(sigma_sum_square);
// calculate the absolute values of standard residuals
double[][] E = timesEquals(times(F, minus(Y, times(X, b))), norm);
DBIDVar worstid = DBIDUtil.newVar();
double worstscore = Double.NEGATIVE_INFINITY;
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
double err = squareSum(getRow(E, i));
// double err = Math.abs(E.get(i, 0));
if (err > worstscore) {
worstscore = err;
worstid.set(id);
}
}
return new Pair<>(worstid, FastMath.sqrt(worstscore));
}
Aggregations