use of de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate in project elki by elki-project.
the class CTLuMedianAlgorithm method run.
/**
* Main method.
*
* @param database Database
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance mv = new MeanVariance();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DBIDs neighbors = npred.getNeighborDBIDs(iditer);
final double median;
{
double[] fi = new double[neighbors.size()];
// calculate and store Median of neighborhood
int c = 0;
for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
if (DBIDUtil.equal(iditer, iter)) {
continue;
}
fi[c] = relation.get(iter).doubleValue(0);
c++;
}
if (c > 0) {
median = QuickSelect.median(fi, 0, c);
} else {
median = relation.get(iditer).doubleValue(0);
}
}
double h = relation.get(iditer).doubleValue(0) - median;
scores.putDouble(iditer, h);
mv.put(h);
}
// Normalize scores
final double mean = mv.getMean();
final double stddev = mv.getNaiveStddev();
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = Math.abs((scores.doubleValue(iditer) - mean) / stddev);
minmax.put(score);
scores.putDouble(iditer, score);
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("MO", "Median-outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
return or;
}
use of de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate in project elki by elki-project.
the class CTLuScatterplotOutlier method run.
/**
* Main method.
*
* @param database Database
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
WritableDoubleDataStore means = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP);
// Calculate average of neighborhood for each object and perform a linear
// regression using the covariance matrix
CovarianceMatrix covm = new CovarianceMatrix(2);
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
final double local = relation.get(iditer).doubleValue(0);
// Compute mean of neighbors
Mean mean = new Mean();
DBIDs neighbors = npred.getNeighborDBIDs(iditer);
for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
if (DBIDUtil.equal(iditer, iter)) {
continue;
}
mean.put(relation.get(iter).doubleValue(0));
}
final double m;
if (mean.getCount() > 0) {
m = mean.getMean();
} else {
// if object id has no neighbors ==> avg = non-spatial attribute of id
m = local;
}
// Store the mean for the score calculation
means.putDouble(iditer, m);
covm.put(new double[] { local, m });
}
// Finalize covariance matrix, compute linear regression
final double slope, inter;
{
double[] meanv = covm.getMeanVector();
double[][] fmat = covm.destroyToSampleMatrix();
final double covxx = fmat[0][0], covxy = fmat[0][1];
slope = covxy / covxx;
inter = meanv[1] - slope * meanv[0];
}
// calculate mean and variance for error
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance mv = new MeanVariance();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// Compute the error from the linear regression
double y_i = relation.get(iditer).doubleValue(0);
double e = means.doubleValue(iditer) - (slope * y_i + inter);
scores.putDouble(iditer, e);
mv.put(e);
}
// Normalize scores
DoubleMinMax minmax = new DoubleMinMax();
{
final double mean = mv.getMean();
final double variance = mv.getNaiveStddev();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = Math.abs((scores.doubleValue(iditer) - mean) / variance);
minmax.put(score);
scores.putDouble(iditer, score);
}
}
// build representation
DoubleRelation scoreResult = new MaterializedDoubleRelation("SPO", "Scatterplot-Outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
return or;
}
use of de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate in project elki by elki-project.
the class CTLuMedianMultipleAttributes method run.
/**
* Run the algorithm
*
* @param database Database
* @param spatial Spatial relation
* @param attributes Attributes relation
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<N> spatial, Relation<O> attributes) {
final int dim = RelationUtil.dimensionality(attributes);
if (LOG.isDebugging()) {
LOG.debug("Dimensionality: " + dim);
}
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, spatial);
CovarianceMatrix covmaker = new CovarianceMatrix(dim);
WritableDataStore<double[]> deltas = DataStoreUtil.makeStorage(attributes.getDBIDs(), DataStoreFactory.HINT_TEMP, double[].class);
for (DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
final O obj = attributes.get(iditer);
final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
// Compute the median vector
final double[] median = new double[dim];
{
double[][] data = new double[dim][neighbors.size()];
int i = 0;
// Load data
for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
// TODO: skip object itself within neighbors?
O nobj = attributes.get(iter);
for (int d = 0; d < dim; d++) {
data[d][i] = nobj.doubleValue(d);
}
i++;
}
for (int d = 0; d < dim; d++) {
median[d] = QuickSelect.median(data[d]);
}
}
// Delta vector "h"
double[] delta = minusEquals(obj.toArray(), median);
deltas.put(iditer, delta);
covmaker.put(delta);
}
// Finalize covariance matrix:
double[] mean = covmaker.getMeanVector();
double[][] cmati = inverse(covmaker.destroyToSampleMatrix());
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC);
for (DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
final double score = mahalanobisDistance(cmati, deltas.get(iditer), mean);
minmax.put(score);
scores.putDouble(iditer, score);
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("Median multiple attributes outlier", "median-outlier", scores, attributes.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
return or;
}
use of de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate in project elki by elki-project.
the class CTLuZTestOutlier method run.
/**
* Main method.
*
* @param database Database
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
MeanVariance zmv = new MeanVariance();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
DBIDs neighbors = npred.getNeighborDBIDs(iditer);
// Compute Mean of neighborhood
Mean localmean = new Mean();
for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
if (DBIDUtil.equal(iditer, iter)) {
continue;
}
localmean.put(relation.get(iter).doubleValue(0));
}
final double localdiff;
if (localmean.getCount() > 0) {
localdiff = relation.get(iditer).doubleValue(0) - localmean.getMean();
} else {
localdiff = 0.0;
}
scores.putDouble(iditer, localdiff);
zmv.put(localdiff);
}
// Normalize scores using mean and variance
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = Math.abs(scores.doubleValue(iditer) - zmv.getMean()) / zmv.getSampleStddev();
minmax.put(score);
scores.putDouble(iditer, score);
}
// Wrap result
DoubleRelation scoreResult = new MaterializedDoubleRelation("ZTest", "Z Test score", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
return or;
}
use of de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate in project elki by elki-project.
the class CTLuMoranScatterplotOutlier method run.
/**
* Main method.
*
* @param database Database
* @param nrel Neighborhood relation
* @param relation Data relation (1d!)
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<N> nrel, Relation<? extends NumberVector> relation) {
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, nrel);
// Compute the global mean and variance
MeanVariance globalmv = new MeanVariance();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
globalmv.put(relation.get(iditer).doubleValue(0));
}
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
// calculate neighborhood average of normalized attribute values.
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
// Compute global z score
final double globalZ = (relation.get(iditer).doubleValue(0) - globalmv.getMean()) / globalmv.getNaiveStddev();
// Compute local average z score
Mean localm = new Mean();
for (DBIDIter iter = npred.getNeighborDBIDs(iditer).iter(); iter.valid(); iter.advance()) {
if (DBIDUtil.equal(iditer, iter)) {
continue;
}
localm.put((relation.get(iter).doubleValue(0) - globalmv.getMean()) / globalmv.getNaiveStddev());
}
// if neighors.size == 0
final double localZ;
if (localm.getCount() > 0) {
localZ = localm.getMean();
} else {
// if s has no neighbors => Wzi = zi
localZ = globalZ;
}
// compute score
// Note: in the original moran scatterplot, any object with a score < 0 would be an outlier.
final double score = Math.max(-globalZ * localZ, 0);
minmax.put(score);
scores.putDouble(iditer, score);
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("MoranOutlier", "Moran Scatterplot Outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
return or;
}
Aggregations