use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class LOCI method run.
/**
* Run the algorithm
*
* @param database Database to process
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<O> relation) {
DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc);
DBIDs ids = relation.getDBIDs();
// LOCI preprocessing step
WritableDataStore<DoubleIntArrayList> interestingDistances = DataStoreUtil.makeStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED, DoubleIntArrayList.class);
precomputeInterestingRadii(ids, rangeQuery, interestingDistances);
// LOCI main step
FiniteProgress progressLOCI = LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null;
WritableDoubleDataStore mdef_norm = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
WritableDoubleDataStore mdef_radius = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
// Shared instance, to save allocations.
MeanVariance mv_n_r_alpha = new MeanVariance();
for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
final DoubleIntArrayList cdist = interestingDistances.get(iditer);
final double maxdist = cdist.getDouble(cdist.size() - 1);
final int maxneig = cdist.getInt(cdist.size() - 1);
double maxmdefnorm = 0.0;
double maxnormr = 0;
if (maxneig >= nmin) {
// Compute the largest neighborhood we will need.
DoubleDBIDList maxneighbors = rangeQuery.getRangeForDBID(iditer, maxdist);
// For any critical distance, compute the normalized MDEF score.
for (int i = 0, size = cdist.size(); i < size; i++) {
// Only start when minimum size is fulfilled
if (cdist.getInt(i) < nmin) {
continue;
}
final double r = cdist.getDouble(i);
final double alpha_r = alpha * r;
// compute n(p_i, \alpha * r) from list (note: alpha_r is not cdist!)
final int n_alphar = cdist.getInt(cdist.find(alpha_r));
// compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
mv_n_r_alpha.reset();
for (DoubleDBIDListIter neighbor = maxneighbors.iter(); neighbor.valid(); neighbor.advance()) {
// Stop at radius r
if (neighbor.doubleValue() > r) {
break;
}
DoubleIntArrayList cdist2 = interestingDistances.get(neighbor);
int rn_alphar = cdist2.getInt(cdist2.find(alpha_r));
mv_n_r_alpha.put(rn_alphar);
}
// We only use the average and standard deviation
final double nhat_r_alpha = mv_n_r_alpha.getMean();
final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev();
// Redundant divisions by nhat_r_alpha removed.
final double mdef = nhat_r_alpha - n_alphar;
final double sigmamdef = sigma_nhat_r_alpha;
final double mdefnorm = mdef / sigmamdef;
if (mdefnorm > maxmdefnorm) {
maxmdefnorm = mdefnorm;
maxnormr = r;
}
}
} else {
// FIXME: when nmin was not fulfilled - what is the proper value then?
maxmdefnorm = Double.POSITIVE_INFINITY;
maxnormr = maxdist;
}
mdef_norm.putDouble(iditer, maxmdefnorm);
mdef_radius.putDouble(iditer, maxnormr);
minmax.put(maxmdefnorm);
LOG.incrementProcessed(progressLOCI);
}
LOG.ensureCompleted(progressLOCI);
DoubleRelation scoreResult = new MaterializedDoubleRelation("LOCI normalized MDEF", "loci-mdef-outlier", mdef_norm, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new QuotientOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
result.addChildResult(new MaterializedDoubleRelation("LOCI MDEF Radius", "loci-critical-radius", mdef_radius, relation.getDBIDs()));
return result;
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class CTLuMedianAlgorithm method run.
/**
* Main method.
*
* @param database Database
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance mv = new MeanVariance();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DBIDs neighbors = npred.getNeighborDBIDs(iditer);
final double median;
{
double[] fi = new double[neighbors.size()];
// calculate and store Median of neighborhood
int c = 0;
for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
if (DBIDUtil.equal(iditer, iter)) {
continue;
}
fi[c] = relation.get(iter).doubleValue(0);
c++;
}
if (c > 0) {
median = QuickSelect.median(fi, 0, c);
} else {
median = relation.get(iditer).doubleValue(0);
}
}
double h = relation.get(iditer).doubleValue(0) - median;
scores.putDouble(iditer, h);
mv.put(h);
}
// Normalize scores
final double mean = mv.getMean();
final double stddev = mv.getNaiveStddev();
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = Math.abs((scores.doubleValue(iditer) - mean) / stddev);
minmax.put(score);
scores.putDouble(iditer, score);
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("MO", "Median-outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
return or;
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class ABOD method run.
/**
* Run ABOD on the data set.
*
* @param relation Relation to process
* @return Outlier detection result
*/
public OutlierResult run(Database db, Relation<V> relation) {
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
// Build a kernel matrix, to make O(n^3) slightly less bad.
SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction);
KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
DoubleMinMax minmaxabod = new DoubleMinMax();
MeanVariance s = new MeanVariance();
DBIDArrayIter pA = ids.iter(), pB = ids.iter(), pC = ids.iter();
for (; pA.valid(); pA.advance()) {
final double abof = computeABOF(kernelMatrix, pA, pB, pC, s);
minmaxabod.put(abof);
abodvalues.putDouble(pA, abof);
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-Based Outlier Degree", "abod-outlier", abodvalues, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class LBABOD method run.
/**
* Run LB-ABOD on the data set.
*
* @param relation Relation to process
* @return Outlier detection result
*/
@Override
public OutlierResult run(Database db, Relation<V> relation) {
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
DBIDArrayIter pB = ids.iter(), pC = ids.iter();
SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction);
KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
// Output storage.
WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
DoubleMinMax minmaxabod = new DoubleMinMax();
double max = 0.;
// Storage for squared distances (will be reused!)
WritableDoubleDataStore sqDists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// Nearest neighbor heap (will be reused!)
KNNHeap nn = DBIDUtil.newHeap(k);
// Priority queue for candidates
ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList(relation.size());
// get Candidate Ranking
for (DBIDIter pA = relation.iterDBIDs(); pA.valid(); pA.advance()) {
// Compute nearest neighbors and distances.
nn.clear();
double simAA = kernelMatrix.getSimilarity(pA, pA);
// Sum of 1./(|AB|) and 1./(|AB|^2); for computing R2.
double sumid = 0., sumisqd = 0.;
for (pB.seek(0); pB.valid(); pB.advance()) {
if (DBIDUtil.equal(pB, pA)) {
continue;
}
double simBB = kernelMatrix.getSimilarity(pB, pB);
double simAB = kernelMatrix.getSimilarity(pA, pB);
double sqdAB = simAA + simBB - simAB - simAB;
sqDists.putDouble(pB, sqdAB);
final double isqdAB = 1. / sqdAB;
sumid += FastMath.sqrt(isqdAB);
sumisqd += isqdAB;
// Update heap
nn.insert(sqdAB, pB);
}
// Compute FastABOD approximation, adjust for lower bound.
// LB-ABOF is defined via a numerically unstable formula.
// Variance as E(X^2)-E(X)^2 suffers from catastrophic cancellation!
// TODO: ensure numerical precision!
double nnsum = 0., nnsumsq = 0., nnsumisqd = 0.;
KNNList nl = nn.toKNNList();
DoubleDBIDListIter iB = nl.iter(), iC = nl.iter();
for (; iB.valid(); iB.advance()) {
double sqdAB = iB.doubleValue();
double simAB = kernelMatrix.getSimilarity(pA, iB);
if (!(sqdAB > 0.)) {
continue;
}
for (iC.seek(iB.getOffset() + 1); iC.valid(); iC.advance()) {
double sqdAC = iC.doubleValue();
double simAC = kernelMatrix.getSimilarity(pA, iC);
if (!(sqdAC > 0.)) {
continue;
}
// Exploit bilinearity of scalar product:
// <B-A, C-A> = <B, C-A> - <A,C-A>
// = <B,C> - <B,A> - <A,C> + <A,A>
double simBC = kernelMatrix.getSimilarity(iB, iC);
double numerator = simBC - simAB - simAC + simAA;
double sqweight = 1. / (sqdAB * sqdAC);
double weight = FastMath.sqrt(sqweight);
double val = numerator * sqweight;
nnsum += val * weight;
nnsumsq += val * val * weight;
nnsumisqd += sqweight;
}
}
// Remaining weight, term R2:
double r2 = sumisqd * sumisqd - 2. * nnsumisqd;
double tmp = (2. * nnsum + r2) / (sumid * sumid);
double lbabof = 2. * nnsumsq / (sumid * sumid) - tmp * tmp;
// Track maximum?
if (lbabof > max) {
max = lbabof;
}
abodvalues.putDouble(pA, lbabof);
candidates.add(lbabof, pA);
}
// Put maximum from approximate values.
minmaxabod.put(max);
candidates.sort();
// refine Candidates
int refinements = 0;
DoubleMinHeap topscores = new DoubleMinHeap(l);
MeanVariance s = new MeanVariance();
for (DoubleDBIDListIter pA = candidates.iter(); pA.valid(); pA.advance()) {
// Stop refining
if (topscores.size() >= k && pA.doubleValue() > topscores.peek()) {
break;
}
final double abof = computeABOF(kernelMatrix, pA, pB, pC, s);
// Store refined score:
abodvalues.putDouble(pA, abof);
minmaxabod.put(abof);
// Update the heap tracking the top scores.
if (topscores.size() < k) {
topscores.add(abof);
} else {
if (topscores.peek() > abof) {
topscores.replaceTopElement(abof);
}
}
refinements += 1;
}
if (LOG.isStatistics()) {
LoggingConfiguration.setVerbose(Level.VERYVERBOSE);
LOG.statistics(new LongStatistic("lb-abod.refinements", refinements));
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-based Outlier Detection", "abod-outlier", abodvalues, ids);
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.math.MeanVariance in project elki by elki-project.
the class CTLuScatterplotOutlier method run.
/**
* Main method.
*
* @param database Database
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
WritableDoubleDataStore means = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
// Calculate average of neighborhood for each object and perform a linear
// regression using the covariance matrix
CovarianceMatrix covm = new CovarianceMatrix(2);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final double local = relation.get(iditer).doubleValue(0);
// Compute mean of neighbors
Mean mean = new Mean();
DBIDs neighbors = npred.getNeighborDBIDs(iditer);
for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
if (DBIDUtil.equal(iditer, iter)) {
continue;
}
mean.put(relation.get(iter).doubleValue(0));
}
final double m;
if (mean.getCount() > 0) {
m = mean.getMean();
} else {
// if object id has no neighbors ==> avg = non-spatial attribute of id
m = local;
}
// Store the mean for the score calculation
means.putDouble(iditer, m);
covm.put(new double[] { local, m });
}
// Finalize covariance matrix, compute linear regression
final double slope, inter;
{
double[] meanv = covm.getMeanVector();
double[][] fmat = covm.destroyToSampleMatrix();
final double covxx = fmat[0][0], covxy = fmat[0][1];
slope = covxy / covxx;
inter = meanv[1] - slope * meanv[0];
}
// calculate mean and variance for error
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance mv = new MeanVariance();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// Compute the error from the linear regression
double y_i = relation.get(iditer).doubleValue(0);
double e = means.doubleValue(iditer) - (slope * y_i + inter);
scores.putDouble(iditer, e);
mv.put(e);
}
// Normalize scores
DoubleMinMax minmax = new DoubleMinMax();
{
final double mean = mv.getMean();
final double variance = mv.getNaiveStddev();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = Math.abs((scores.doubleValue(iditer) - mean) / variance);
minmax.put(score);
scores.putDouble(iditer, score);
}
}
// build representation
DoubleRelation scoreResult = new MaterializedDoubleRelation("SPO", "Scatterplot-Outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
return or;
}
Aggregations