use of de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta in project elki by elki-project.
the class CTLuMeanMultipleAttributes method run.
/**
* Run the algorithm
*
* @param database Database
* @param spatial Spatial relation
* @param attributes Numerical attributes
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<N> spatial, Relation<O> attributes) {
if (LOG.isDebugging()) {
LOG.debug("Dimensionality: " + RelationUtil.dimensionality(attributes));
}
final NeighborSetPredicate npred = getNeighborSetPredicateFactory().instantiate(database, spatial);
CovarianceMatrix covmaker = new CovarianceMatrix(RelationUtil.dimensionality(attributes));
WritableDataStore<double[]> deltas = DataStoreUtil.makeStorage(attributes.getDBIDs(), DataStoreFactory.HINT_TEMP, double[].class);
for (DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
final O obj = attributes.get(iditer);
final DBIDs neighbors = npred.getNeighborDBIDs(iditer);
// TODO: remove object itself from neighbors?
// Mean vector "g"
double[] mean = Centroid.make(attributes, neighbors).getArrayRef();
// Delta vector "h"
double[] delta = minusEquals(obj.toArray(), mean);
deltas.put(iditer, delta);
covmaker.put(delta);
}
// Finalize covariance matrix:
double[] mean = covmaker.getMeanVector();
double[][] cmati = inverse(covmaker.destroyToSampleMatrix());
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(attributes.getDBIDs(), DataStoreFactory.HINT_STATIC);
for (DBIDIter iditer = attributes.iterDBIDs(); iditer.valid(); iditer.advance()) {
final double score = mahalanobisDistance(cmati, deltas.get(iditer), mean);
minmax.put(score);
scores.putDouble(iditer, score);
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("mean multiple attributes spatial outlier", "mean-multipleattributes-outlier", scores, attributes.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0);
OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
or.addChildResult(npred);
return or;
}
use of de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta in project elki by elki-project.
the class CTLuRandomWalkEC method run.
/**
* Run the algorithm.
*
* @param spatial Spatial neighborhood relation
* @param relation Attribute value relation
* @return Outlier result
*/
public OutlierResult run(Relation<P> spatial, Relation<? extends NumberVector> relation) {
DistanceQuery<P> distFunc = getDistanceFunction().instantiate(spatial);
WritableDataStore<double[]> similarityVectors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, double[].class);
WritableDataStore<DBIDs> neighbors = DataStoreUtil.makeStorage(spatial.getDBIDs(), DataStoreFactory.HINT_TEMP, DBIDs.class);
// Make a static IDs array for matrix column indexing
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
// construct the relation Matrix of the ec-graph
double[][] E = new double[ids.size()][ids.size()];
KNNHeap heap = DBIDUtil.newHeap(k);
{
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
final double val = relation.get(id).doubleValue(0);
assert (heap.size() == 0);
int j = 0;
for (DBIDIter n = ids.iter(); n.valid(); n.advance(), j++) {
if (i == j) {
continue;
}
final double e;
final double distance = distFunc.distance(id, n);
heap.insert(distance, n);
if (distance == 0) {
LOG.warning("Zero distances are not supported - skipping: " + DBIDUtil.toString(id) + " " + DBIDUtil.toString(n));
e = 0;
} else {
double diff = Math.abs(val - relation.get(n).doubleValue(0));
double exp = FastMath.exp(FastMath.pow(diff, alpha));
// Implementation note: not inverting exp worked a lot better.
// Therefore we diverge from the article here.
e = exp / distance;
}
E[j][i] = e;
}
// Convert kNN Heap into DBID array
ModifiableDBIDs nids = DBIDUtil.newArray(heap.size());
while (heap.size() > 0) {
nids.add(heap.poll());
}
neighbors.put(id, nids);
}
}
// Also do the -c multiplication in this process.
for (int i = 0; i < E[0].length; i++) {
double sum = 0.0;
for (int j = 0; j < E.length; j++) {
sum += E[j][i];
}
if (sum == 0) {
sum = 1.0;
}
for (int j = 0; j < E.length; j++) {
E[j][i] = -c * E[j][i] / sum;
}
}
// Add identity matrix. The diagonal should still be 0s, so this is trivial.
assert (E.length == E[0].length);
for (int col = 0; col < E[0].length; col++) {
assert (E[col][col] == 0.0);
E[col][col] = 1.0;
}
E = timesEquals(inverse(E), 1 - c);
// Split the matrix into columns
{
int i = 0;
for (DBIDIter id = ids.iter(); id.valid(); id.advance(), i++) {
// Note: matrix times ith unit vector = ith column
double[] sim = getCol(E, i);
similarityVectors.put(id, sim);
}
}
E = null;
// compute the relevance scores between specified Object and its neighbors
DoubleMinMax minmax = new DoubleMinMax();
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(spatial.getDBIDs(), DataStoreFactory.HINT_STATIC);
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
double gmean = 1.0;
int cnt = 0;
for (DBIDIter iter = neighbors.get(id).iter(); iter.valid(); iter.advance()) {
if (DBIDUtil.equal(id, iter)) {
continue;
}
double sim = VMath.angle(similarityVectors.get(id), similarityVectors.get(iter));
gmean *= sim;
cnt++;
}
final double score = FastMath.pow(gmean, 1.0 / cnt);
minmax.put(score);
scores.putDouble(id, score);
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("randomwalkec", "RandomWalkEC", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta in project elki by elki-project.
the class LibSVMOneClassOutlierDetection method run.
/**
* Run one-class SVM.
*
* @param relation Data relation
* @return Outlier result.
*/
public OutlierResult run(Relation<V> relation) {
final int dim = RelationUtil.dimensionality(relation);
final ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
svm.svm_set_print_string_function(LOG_HELPER);
svm_parameter param = new svm_parameter();
param.svm_type = svm_parameter.ONE_CLASS;
param.kernel_type = svm_parameter.LINEAR;
param.degree = 3;
switch(kernel) {
case LINEAR:
param.kernel_type = svm_parameter.LINEAR;
break;
case QUADRATIC:
param.kernel_type = svm_parameter.POLY;
param.degree = 2;
break;
case CUBIC:
param.kernel_type = svm_parameter.POLY;
param.degree = 3;
break;
case RBF:
param.kernel_type = svm_parameter.RBF;
break;
case SIGMOID:
param.kernel_type = svm_parameter.SIGMOID;
break;
default:
throw new AbortException("Invalid kernel parameter: " + kernel);
}
// TODO: expose additional parameters to the end user!
param.nu = nu;
param.coef0 = 0.;
param.cache_size = 10000;
param.C = 1;
// not used by one-class?
param.eps = 1e-4;
// not used by one-class?
param.p = 0.1;
param.shrinking = 0;
param.probability = 0;
param.nr_weight = 0;
param.weight_label = new int[0];
param.weight = new double[0];
param.gamma = 1. / dim;
// Transform data:
svm_problem prob = new svm_problem();
prob.l = relation.size();
prob.x = new svm_node[prob.l][];
prob.y = new double[prob.l];
{
DBIDIter iter = ids.iter();
for (int i = 0; i < prob.l && iter.valid(); iter.advance(), i++) {
V vec = relation.get(iter);
// TODO: support compact sparse vectors, too!
svm_node[] x = new svm_node[dim];
for (int d = 0; d < dim; d++) {
x[d] = new svm_node();
x[d].index = d + 1;
x[d].value = vec.doubleValue(d);
}
prob.x[i] = x;
prob.y[i] = +1;
}
}
if (LOG.isVerbose()) {
LOG.verbose("Training one-class SVM...");
}
String err = svm.svm_check_parameter(prob, param);
if (err != null) {
LOG.warning("svm_check_parameter: " + err);
}
svm_model model = svm.svm_train(prob, param);
if (LOG.isVerbose()) {
LOG.verbose("Predicting...");
}
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
DoubleMinMax mm = new DoubleMinMax();
{
DBIDIter iter = ids.iter();
double[] buf = new double[svm.svm_get_nr_class(model)];
for (int i = 0; i < prob.l && iter.valid(); iter.advance(), i++) {
V vec = relation.get(iter);
svm_node[] x = new svm_node[dim];
for (int d = 0; d < dim; d++) {
x[d] = new svm_node();
x[d].index = d + 1;
x[d].value = vec.doubleValue(d);
}
svm.svm_predict_values(model, x, buf);
// / param.gamma; // Heuristic rescaling, sorry.
double score = -buf[0];
// Unfortunately, libsvm one-class currently yields a binary decision.
scores.putDouble(iter, score);
mm.put(score);
}
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("One-Class SVM Decision", "svm-outlier", scores, ids);
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta in project elki by elki-project.
the class KNNDD method run.
/**
* Runs the algorithm in the timed evaluation part.
*
* @param relation Data relation
*/
public OutlierResult run(Relation<O> relation) {
final DistanceQuery<O> distanceQuery = relation.getDistanceQuery(getDistanceFunction());
final KNNQuery<O> knnQuery = relation.getKNNQuery(distanceQuery, k);
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("kNN distance for objects", relation.size(), LOG) : null;
WritableDoubleDataStore knnDist = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
WritableDBIDDataStore neighbor = DataStoreUtil.makeDBIDStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
DBIDVar var = DBIDUtil.newVar();
// Find nearest neighbors, and store the distances.
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
final KNNList knn = knnQuery.getKNNForDBID(it, k);
knnDist.putDouble(it, knn.getKNNDistance());
neighbor.put(it, knn.assignVar(knn.size() - 1, var));
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
prog = LOG.isVerbose() ? new FiniteProgress("kNN distance descriptor", relation.size(), LOG) : null;
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
DoubleMinMax minmax = new DoubleMinMax();
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
// Distance
double d = knnDist.doubleValue(it);
// Distance of neighbor
double nd = knnDist.doubleValue(neighbor.assignVar(it, var));
double knndd = nd > 0 ? d / nd : d > 0 ? Double.POSITIVE_INFINITY : 1.;
scores.put(it, knndd);
minmax.put(knndd);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
DoubleRelation scoreres = new MaterializedDoubleRelation("kNN Data Descriptor", "knndd-outlier", scores, relation.getDBIDs());
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0., Double.POSITIVE_INFINITY, 1.);
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta in project elki by elki-project.
the class GaussianModel method run.
/**
* Run the algorithm
*
* @param relation Data relation
* @return Outlier result
*/
public OutlierResult run(Relation<V> relation) {
DoubleMinMax mm = new DoubleMinMax();
// resulting scores
WritableDoubleDataStore oscores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// Compute mean and covariance Matrix
CovarianceMatrix temp = CovarianceMatrix.make(relation);
double[] mean = temp.getMeanVector(relation).toArray();
// debugFine(mean.toString());
double[][] covarianceMatrix = temp.destroyToPopulationMatrix();
// debugFine(covarianceMatrix.toString());
double[][] covarianceTransposed = inverse(covarianceMatrix);
// Normalization factors for Gaussian PDF
double det = new LUDecomposition(covarianceMatrix).det();
final double fakt = 1.0 / FastMath.sqrt(MathUtil.powi(MathUtil.TWOPI, RelationUtil.dimensionality(relation)) * det);
// for each object compute Mahalanobis distance
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double[] x = minusEquals(relation.get(iditer).toArray(), mean);
// Gaussian PDF
final double mDist = transposeTimesTimes(x, covarianceTransposed, x);
final double prob = fakt * FastMath.exp(-mDist * .5);
mm.put(prob);
oscores.putDouble(iditer, prob);
}
final OutlierScoreMeta meta;
if (invert) {
double max = mm.getMax() != 0 ? mm.getMax() : 1.;
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
oscores.putDouble(iditer, (max - oscores.doubleValue(iditer)) / max);
}
meta = new BasicOutlierScoreMeta(0.0, 1.0);
} else {
meta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), 0.0, Double.POSITIVE_INFINITY);
}
DoubleRelation res = new MaterializedDoubleRelation("Gaussian Model Outlier Score", "gaussian-model-outlier", oscores, relation.getDBIDs());
return new OutlierResult(meta, res);
}
Aggregations