use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class OutRankS1 method run.
@Override
public OutlierResult run(Database database) {
DBIDs ids = database.getRelation(TypeUtil.ANY).getDBIDs();
// Run the primary algorithm
Clustering<? extends SubspaceModel> clustering = clusteralg.run(database);
WritableDoubleDataStore score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT);
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
score.putDouble(iter, 0);
}
int maxdim = 0, maxsize = 0;
// Find maximum dimensionality and cluster size
for (Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
maxsize = Math.max(maxsize, cluster.size());
maxdim = Math.max(maxdim, BitsUtil.cardinality(cluster.getModel().getDimensions()));
}
// Iterate over all clusters:
DoubleMinMax minmax = new DoubleMinMax();
for (Cluster<? extends SubspaceModel> cluster : clustering.getAllClusters()) {
double relsize = cluster.size() / (double) maxsize;
double reldim = BitsUtil.cardinality(cluster.getModel().getDimensions()) / (double) maxdim;
// Process objects in the cluster
for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
double newscore = score.doubleValue(iter) + alpha * relsize + (1 - alpha) * reldim;
score.putDouble(iter, newscore);
minmax.put(newscore);
}
}
DoubleRelation scoreResult = new MaterializedDoubleRelation("OutRank-S1", "OUTRANK_S1", score, ids);
OutlierScoreMeta meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
OutlierResult res = new OutlierResult(meta, scoreResult);
res.addChildResult(clustering);
return res;
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class KMeansOutlierDetection method run.
/**
* Run the outlier detection algorithm.
*
* @param database Database
* @param relation Relation
* @return Outlier detection result
*/
public OutlierResult run(Database database, Relation<O> relation) {
DistanceFunction<? super O> df = clusterer.getDistanceFunction();
DistanceQuery<O> dq = database.getDistanceQuery(relation, df);
// TODO: improve ELKI api to ensure we're using the same DBIDs!
Clustering<?> c = clusterer.run(database, relation);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
DoubleMinMax mm = new DoubleMinMax();
@SuppressWarnings("unchecked") NumberVector.Factory<O> factory = (NumberVector.Factory<O>) RelationUtil.assumeVectorField(relation).getFactory();
List<? extends Cluster<?>> clusters = c.getAllClusters();
for (Cluster<?> cluster : clusters) {
// FIXME: use a primitive distance function on number vectors instead.
O mean = factory.newNumberVector(ModelUtil.getPrototype(cluster.getModel(), relation));
for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
double dist = dq.distance(mean, iter);
scores.put(iter, dist);
mm.put(dist);
}
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("KMeans outlier scores", "kmeans-outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., Double.POSITIVE_INFINITY, 0.);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class DistanceStddevOutlier method run.
/**
* Run the outlier detection algorithm
*
* @param database Database to use
* @param relation Relation to analyze
* @return Outlier score result
*/
public OutlierResult run(Database database, Relation<O> relation) {
// Get a nearest neighbor query on the relation.
KNNQuery<O> knnq = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k);
// Output data storage
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
// Track minimum and maximum scores
DoubleMinMax minmax = new DoubleMinMax();
// Iterate over all objects
for (DBIDIter iter = relation.iterDBIDs(); iter.valid(); iter.advance()) {
KNNList neighbors = knnq.getKNNForDBID(iter, k);
// Aggregate distances
MeanVariance mv = new MeanVariance();
for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
// Skip the object itself. The 0 is not very informative.
if (DBIDUtil.equal(iter, neighbor)) {
continue;
}
mv.put(neighbor.doubleValue());
}
// Store score
scores.putDouble(iter, mv.getSampleStddev());
}
// Wrap the result in the standard containers
// Actual min-max, theoretical min-max!
OutlierScoreMeta meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax(), 0, Double.POSITIVE_INFINITY);
DoubleRelation rel = new MaterializedDoubleRelation(relation.getDBIDs(), "stddev-outlier", scores);
return new OutlierResult(meta, rel);
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class TrivialGeneratedOutlier method run.
/**
* Run the algorithm
*
* @param models Model relation
* @param vecs Vector relation
* @param labels Label relation
* @return Outlier result
*/
public OutlierResult run(Relation<Model> models, Relation<NumberVector> vecs, Relation<?> labels) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT);
HashSet<GeneratorSingleCluster> generators = new HashSet<>();
for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
Model model = models.get(iditer);
if (model instanceof GeneratorSingleCluster) {
generators.add((GeneratorSingleCluster) model);
}
}
if (generators.isEmpty()) {
LOG.warning("No generator models found for dataset - all points will be considered outliers.");
}
for (GeneratorSingleCluster gen : generators) {
for (int i = 0; i < gen.getDim(); i++) {
Distribution dist = gen.getDistribution(i);
if (!(dist instanceof NormalDistribution)) {
throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
}
}
}
for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = 1.;
double[] v = vecs.get(iditer).toArray();
for (GeneratorSingleCluster gen : generators) {
double[] tv = v;
// Transform backwards
if (gen.getTransformation() != null) {
tv = gen.getTransformation().applyInverse(v);
}
final int dim = tv.length;
double lensq = 0.0;
int norm = 0;
for (int i = 0; i < dim; i++) {
Distribution dist = gen.getDistribution(i);
if (dist instanceof NormalDistribution) {
NormalDistribution d = (NormalDistribution) dist;
double delta = (tv[i] - d.getMean()) / d.getStddev();
lensq += delta * delta;
norm += 1;
} else {
throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
}
}
if (norm > 0.) {
// The squared distances are ChiSquared distributed
score = Math.min(score, ChiSquaredDistribution.cdf(lensq, norm));
} else {
score = 0.;
}
}
if (expect < 1) {
score = expect * score / (1 - score + expect);
}
scores.putDouble(iditer, score);
}
DoubleRelation scoreres = new MaterializedDoubleRelation("Model outlier scores", "model-outlier", scores, models.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore(0., 1.);
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.
the class FarthestSumPointsInitialMeans method chooseInitialMeans.
@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
// Get a distance query
DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
DBIDs ids = relation.getDBIDs();
WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, 0.);
// Chose first mean
List<T> means = new ArrayList<>(k);
DBIDRef first = DBIDUtil.randomSample(ids, rnd);
T prevmean = relation.get(first);
means.add(prevmean);
// Find farthest object each.
DBIDVar best = DBIDUtil.newVar(first);
for (int i = (dropfirst ? 0 : 1); i < k; i++) {
double maxdist = Double.NEGATIVE_INFINITY;
for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
final double prev = store.doubleValue(it);
if (prev != prev) {
// NaN: already chosen!
continue;
}
double dsum = prev + distQ.distance(prevmean, it);
// Don't store distance to first mean, when it will be dropped below.
if (i > 0) {
store.putDouble(it, dsum);
}
if (dsum > maxdist) {
maxdist = dsum;
best.set(it);
}
}
// Add new mean (and drop the initial mean when desired)
if (i == 0) {
// Remove temporary first element.
means.clear();
}
// So it won't be chosen twice.
store.putDouble(best, Double.NaN);
prevmean = relation.get(best);
means.add(prevmean);
}
// Explicitly destroy temporary data.
store.destroy();
return unboxVectors(means);
}
Aggregations