use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.
the class CTLuMedianMultipleAttributes method run.
/**
* Run the algorithm
*
* @param database Database
* @param spatial Spatial relation
* @param attributes Attributes relation
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<N> spatial, Relation<O> attributes) {
final int dim = RelationUtil.dimensionality(attributes);
if (LOG.isDebugging()) {
LOG.debug("Dimensionality: " + dim);
}
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, spatial);
CovarianceMatrix covmaker = new CovarianceMatrix(dim);
WritableDataStore<double[]> deltas = DataStoreUtil.makeStorage(attributes.getDBIDs(), DataStoreFactory.HINT_TEMP, double[].class);
for (DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
final O obj = attributes.get(iditer);
final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
// Compute the median vector
final double[] median = new double[dim];
{
double[][] data = new double[dim][neighbors.size()];
int i = 0;
// Load data
for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
// TODO: skip object itself within neighbors?
O nobj = attributes.get(iter);
for (int d = 0; d < dim; d++) {
data[d][i] = nobj.doubleValue(d);
}
i++;
}
for (int d = 0; d < dim; d++) {
median[d] = QuickSelect.median(data[d]);
}
}
// Delta vector "h"
double[] delta = minusEquals(obj.toArray(), median);
deltas.put(iditer, delta);
covmaker.put(delta);
}
// Finalize covariance matrix:
double[] mean = covmaker.getMeanVector();
double[][] cmati = inverse(covmaker.destroyToSampleMatrix());
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC);
for (DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
final double score = mahalanobisDistance(cmati, deltas.get(iditer), mean);
minmax.put(score);
scores.putDouble(iditer, score);
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("Median multiple attributes outlier", "median-outlier", scores, attributes.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
return or;
}
use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.
the class CTLuZTestOutlier method run.
/**
* Main method.
*
* @param database Database
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance zmv = new MeanVariance();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DBIDs neighbors = npred.getNeighborDBIDs(iditer);
// Compute Mean of neighborhood
Mean localmean = new Mean();
for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
if (DBIDUtil.equal(iditer, iter)) {
continue;
}
localmean.put(relation.get(iter).doubleValue(0));
}
final double localdiff;
if (localmean.getCount() > 0) {
localdiff = relation.get(iditer).doubleValue(0) - localmean.getMean();
} else {
localdiff = 0.0;
}
scores.putDouble(iditer, localdiff);
zmv.put(localdiff);
}
// Normalize scores using mean and variance
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = Math.abs(scores.doubleValue(iditer) - zmv.getMean()) / zmv.getSampleStddev();
minmax.put(score);
scores.putDouble(iditer, score);
}
// Wrap result
DoubleRelation scoreResult = new MaterializedDoubleRelation("ZTest", "Z Test score", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
return or;
}
use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.
the class KNNSOS method run.
/**
* Run the algorithm.
*
* @param relation data relation
* @return outlier detection result
*/
public OutlierResult run(Relation<O> relation) {
// Query size
final int k1 = k + 1;
final double perplexity = k / 3.;
KNNQuery<O> knnq = relation.getKNNQuery(getDistanceFunction(), k1);
final double logPerp = FastMath.log(perplexity);
double[] p = new double[k + 10];
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("KNNSOS scores", relation.size(), LOG) : null;
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB, 1.);
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
KNNList knns = knnq.getKNNForDBID(it, k1);
if (p.length < knns.size() + 1) {
p = new double[knns.size() + 10];
}
final DoubleDBIDListIter ki = knns.iter();
// Compute affinities
SOS.computePi(it, ki, p, perplexity, logPerp);
// Normalization factor:
double s = SOS.sumOfProbabilities(it, ki, p);
if (s > 0) {
ISOS.nominateNeighbors(it, ki, p, 1. / s, scores);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// Find minimum and maximum.
DoubleMinMax minmax = ISOS.transformScores(scores, relation.getDBIDs(), logPerp, phi);
DoubleRelation scoreres = new MaterializedDoubleRelation("kNN Stoachastic Outlier Selection", "knnsos-outlier", scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore(minmax.getMin(), minmax.getMax(), 0.);
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.
the class COP method run.
/**
* Process a single relation.
*
* @param relation Relation to process
* @return Outlier detection result
*/
public OutlierResult run(Relation<V> relation) {
final DBIDs ids = relation.getDBIDs();
KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k + 1);
final int dim = RelationUtil.dimensionality(relation);
if (k <= dim + 1) {
LOG.warning("PCA is underspecified with a too low k! k should be at much larger than " + dim);
}
WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableDataStore<double[]> cop_err_v = null;
WritableIntegerDataStore cop_dim = null;
if (models) {
cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[].class);
cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
}
// compute neighbors of each db object
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", relation.size(), LOG) : null;
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs nids = DBIDUtil.newHashSet(neighbors);
// Do not use query object
nids.remove(id);
double[] centroid = Centroid.make(relation, nids).getArrayRef();
double[] relative = minusEquals(relation.get(id).toArray(), centroid);
PCAResult pcares = pca.processIds(nids, relation);
double[][] evecs = pcares.getEigenvectors();
double[] projected = transposeTimes(evecs, relative);
double[] evs = pcares.getEigenvalues();
double min = Double.POSITIVE_INFINITY;
int vdim = dim;
switch(dist) {
case CHISQUARED:
{
double sqdevs = 0;
for (int d = 0; d < dim; d++) {
// Scale with Stddev
double dev = projected[d];
// Accumulate
sqdevs += dev * dev / evs[d];
// Evaluate
double score = 1 - ChiSquaredDistribution.cdf(sqdevs, d + 1);
if (score < min) {
min = score;
vdim = d + 1;
}
}
break;
}
case GAMMA:
{
double[][] dists = new double[dim][nids.size()];
int j = 0;
double[] srel = new double[dim];
for (DBIDIter s = nids.iter(); s.valid() && j < nids.size(); s.advance()) {
V vec = relation.get(s);
for (int d = 0; d < dim; d++) {
srel[d] = vec.doubleValue(d) - centroid[d];
}
double[] serr = transposeTimes(evecs, srel);
double sqdist = 0.0;
for (int d = 0; d < dim; d++) {
double serrd = serr[d];
sqdist += serrd * serrd / evs[d];
dists[d][j] = sqdist;
}
j++;
}
double sqdevs = 0;
for (int d = 0; d < dim; d++) {
// Scale with Stddev
final double dev = projected[d];
// Accumulate
sqdevs += dev * dev / evs[d];
// Sort, so we can trim the top 15% below.
Arrays.sort(dists[d]);
// Evaluate
double score = 1 - GammaChoiWetteEstimator.STATIC.estimate(dists[d], SHORTENED_ARRAY).cdf(sqdevs);
if (score < min) {
min = score;
vdim = d + 1;
}
}
break;
}
}
// Normalize the value
final double prob = expect * (1 - min) / (expect + min);
// Construct the error vector:
for (int d = vdim; d < dim; d++) {
projected[d] = 0.;
}
double[] ev = timesEquals(times(evecs, projected), -1 * prob);
cop_score.putDouble(id, prob);
if (models) {
cop_err_v.put(id, ev);
cop_dim.putInt(id, dim + 1 - vdim);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// combine results.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Correlation Outlier Probabilities", COP_SCORES, cop_score, ids);
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
if (models) {
result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
result.addChildResult(new MaterializedRelation<>("Error vectors", COP_ERRORVEC, TypeUtil.DOUBLE_ARRAY, cop_err_v, ids));
}
return result;
}
use of de.lmu.ifi.dbs.elki.result.outlier.OutlierResult in project elki by elki-project.
the class EMOutlier method run.
/**
* Runs the algorithm in the timed evaluation part.
*
* @param database Database to process
* @param relation Relation to process
* @return Outlier result
*/
public OutlierResult run(Database database, Relation<V> relation) {
emClustering.setSoft(true);
Clustering<?> emresult = emClustering.run(database, relation);
Relation<double[]> soft = null;
for (It<Relation<double[]>> iter = emresult.getHierarchy().iterChildren(emresult).filter(Relation.class); iter.valid(); iter.advance()) {
if (iter.get().getDataTypeInformation() == EM.SOFT_TYPE) {
soft = iter.get();
}
}
double globmax = 0.0;
WritableDoubleDataStore emo_score = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double maxProb = Double.POSITIVE_INFINITY;
double[] probs = soft.get(iditer);
for (double prob : probs) {
maxProb = Math.min(1. - prob, maxProb);
}
emo_score.putDouble(iditer, maxProb);
globmax = Math.max(maxProb, globmax);
}
DoubleRelation scoreres = new MaterializedDoubleRelation("EM outlier scores", "em-outlier", emo_score, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore(0.0, globmax);
// combine results.
OutlierResult result = new OutlierResult(meta, scoreres);
// TODO: add a keep-EM flag?
result.addChildResult(emresult);
return result;
}
Aggregations