use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.
the class VarianceOfVolume method computeVOVs.
/**
* Compute variance of volumes.
*
* @param knnq KNN query
* @param ids IDs to process
* @param vols Volumes
* @param vovs Variance of Volume storage
* @param vovminmax Score minimum/maximum tracker
*/
private void computeVOVs(KNNQuery<O> knnq, DBIDs ids, DoubleDataStore vols, WritableDoubleDataStore vovs, DoubleMinMax vovminmax) {
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Variance of Volume", ids.size(), LOG) : null;
boolean warned = false;
for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
KNNList knns = knnq.getKNNForDBID(iter, k);
DoubleDBIDListIter it = knns.iter();
double vbar = 0.;
for (; it.valid(); it.advance()) {
vbar += vols.doubleValue(it);
}
// Average
vbar /= knns.size();
double vov = 0.;
for (it.seek(0); it.valid(); it.advance()) {
double v = vols.doubleValue(it) - vbar;
vov += v * v;
}
if (!(vov < Double.POSITIVE_INFINITY) && !warned) {
LOG.warning("Variance of Volumes has hit double precision limits, results are not reliable.");
warned = true;
}
vov = (knns.size() > 1 && vov < Double.POSITIVE_INFINITY) ? vov / (knns.size() - 1) : Double.POSITIVE_INFINITY;
vovs.putDouble(iter, vov);
// update minimum and maximum
vovminmax.put(vov);
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.
the class KNNSOS method run.
/**
* Run the algorithm.
*
* @param relation data relation
* @return outlier detection result
*/
public OutlierResult run(Relation<O> relation) {
// Query size
final int k1 = k + 1;
final double perplexity = k / 3.;
KNNQuery<O> knnq = relation.getKNNQuery(getDistanceFunction(), k1);
final double logPerp = FastMath.log(perplexity);
double[] p = new double[k + 10];
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("KNNSOS scores", relation.size(), LOG) : null;
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB, 1.);
for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
KNNList knns = knnq.getKNNForDBID(it, k1);
if (p.length < knns.size() + 1) {
p = new double[knns.size() + 10];
}
final DoubleDBIDListIter ki = knns.iter();
// Compute affinities
SOS.computePi(it, ki, p, perplexity, logPerp);
// Normalization factor:
double s = SOS.sumOfProbabilities(it, ki, p);
if (s > 0) {
ISOS.nominateNeighbors(it, ki, p, 1. / s, scores);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// Find minimum and maximum.
DoubleMinMax minmax = ISOS.transformScores(scores, relation.getDBIDs(), logPerp, phi);
DoubleRelation scoreres = new MaterializedDoubleRelation("kNN Stoachastic Outlier Selection", "knnsos-outlier", scores, relation.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore(minmax.getMin(), minmax.getMax(), 0.);
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.
the class COP method run.
/**
* Process a single relation.
*
* @param relation Relation to process
* @return Outlier detection result
*/
public OutlierResult run(Relation<V> relation) {
final DBIDs ids = relation.getDBIDs();
KNNQuery<V> knnQuery = QueryUtil.getKNNQuery(relation, getDistanceFunction(), k + 1);
final int dim = RelationUtil.dimensionality(relation);
if (k <= dim + 1) {
LOG.warning("PCA is underspecified with a too low k! k should be at much larger than " + dim);
}
WritableDoubleDataStore cop_score = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC);
WritableDataStore<double[]> cop_err_v = null;
WritableIntegerDataStore cop_dim = null;
if (models) {
cop_err_v = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, double[].class);
cop_dim = DataStoreUtil.makeIntegerStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_STATIC, -1);
}
// compute neighbors of each db object
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Correlation Outlier Probabilities", relation.size(), LOG) : null;
for (DBIDIter id = ids.iter(); id.valid(); id.advance()) {
KNNList neighbors = knnQuery.getKNNForDBID(id, k + 1);
ModifiableDBIDs nids = DBIDUtil.newHashSet(neighbors);
// Do not use query object
nids.remove(id);
double[] centroid = Centroid.make(relation, nids).getArrayRef();
double[] relative = minusEquals(relation.get(id).toArray(), centroid);
PCAResult pcares = pca.processIds(nids, relation);
double[][] evecs = pcares.getEigenvectors();
double[] projected = transposeTimes(evecs, relative);
double[] evs = pcares.getEigenvalues();
double min = Double.POSITIVE_INFINITY;
int vdim = dim;
switch(dist) {
case CHISQUARED:
{
double sqdevs = 0;
for (int d = 0; d < dim; d++) {
// Scale with Stddev
double dev = projected[d];
// Accumulate
sqdevs += dev * dev / evs[d];
// Evaluate
double score = 1 - ChiSquaredDistribution.cdf(sqdevs, d + 1);
if (score < min) {
min = score;
vdim = d + 1;
}
}
break;
}
case GAMMA:
{
double[][] dists = new double[dim][nids.size()];
int j = 0;
double[] srel = new double[dim];
for (DBIDIter s = nids.iter(); s.valid() && j < nids.size(); s.advance()) {
V vec = relation.get(s);
for (int d = 0; d < dim; d++) {
srel[d] = vec.doubleValue(d) - centroid[d];
}
double[] serr = transposeTimes(evecs, srel);
double sqdist = 0.0;
for (int d = 0; d < dim; d++) {
double serrd = serr[d];
sqdist += serrd * serrd / evs[d];
dists[d][j] = sqdist;
}
j++;
}
double sqdevs = 0;
for (int d = 0; d < dim; d++) {
// Scale with Stddev
final double dev = projected[d];
// Accumulate
sqdevs += dev * dev / evs[d];
// Sort, so we can trim the top 15% below.
Arrays.sort(dists[d]);
// Evaluate
double score = 1 - GammaChoiWetteEstimator.STATIC.estimate(dists[d], SHORTENED_ARRAY).cdf(sqdevs);
if (score < min) {
min = score;
vdim = d + 1;
}
}
break;
}
}
// Normalize the value
final double prob = expect * (1 - min) / (expect + min);
// Construct the error vector:
for (int d = vdim; d < dim; d++) {
projected[d] = 0.;
}
double[] ev = timesEquals(times(evecs, projected), -1 * prob);
cop_score.putDouble(id, prob);
if (models) {
cop_err_v.put(id, ev);
cop_dim.putInt(id, dim + 1 - vdim);
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
// combine results.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Correlation Outlier Probabilities", COP_SCORES, cop_score, ids);
OutlierScoreMeta scoreMeta = new ProbabilisticOutlierScore();
OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
if (models) {
result.addChildResult(new MaterializedRelation<>("Local Dimensionality", COP_DIM, TypeUtil.INTEGER, cop_dim, ids));
result.addChildResult(new MaterializedRelation<>("Error vectors", COP_ERRORVEC, TypeUtil.DOUBLE_ARRAY, cop_err_v, ids));
}
return result;
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.
the class MaterializedKNNAndRKNNPreprocessorTest method testKNNQueries.
private void testKNNQueries(Relation<DoubleVector> rep, KNNQuery<DoubleVector> lin_knn_query, KNNQuery<DoubleVector> preproc_knn_query, int k) {
ArrayDBIDs sample = DBIDUtil.ensureArray(rep.getDBIDs());
List<? extends KNNList> lin_knn_ids = lin_knn_query.getKNNForBulkDBIDs(sample, k);
List<? extends KNNList> preproc_knn_ids = preproc_knn_query.getKNNForBulkDBIDs(sample, k);
for (int i = 0; i < rep.size(); i++) {
KNNList lin_knn = lin_knn_ids.get(i);
KNNList pre_knn = preproc_knn_ids.get(i);
DoubleDBIDListIter lin = lin_knn.iter(), pre = pre_knn.iter();
for (; lin.valid() && pre.valid(); lin.advance(), pre.advance(), i++) {
assertTrue(DBIDUtil.equal(lin, pre) || lin.doubleValue() == pre.doubleValue());
}
assertEquals("kNN sizes do not agree.", lin_knn.size(), pre_knn.size());
for (int j = 0; j < lin_knn.size(); j++) {
assertTrue("kNNs of linear scan and preprocessor do not match!", DBIDUtil.equal(lin_knn.get(j), pre_knn.get(j)));
assertEquals("kNNs of linear scan and preprocessor do not match!", lin_knn.get(j).doubleValue(), pre_knn.get(j).doubleValue(), 0.);
}
}
}
use of de.lmu.ifi.dbs.elki.database.ids.KNNList in project elki by elki-project.
the class DeLiClu method run.
public ClusterOrder run(Database database, Relation<NV> relation) {
Collection<DeLiCluTreeIndex<NV>> indexes = ResultUtil.filterResults(database.getHierarchy(), relation, DeLiCluTreeIndex.class);
if (indexes.size() != 1) {
throw new MissingPrerequisitesException("DeLiClu found " + indexes.size() + " DeLiCluTree indexes. DeLiClu needs a special index to operate, therefore you need to add this index to your database.");
}
DeLiCluTreeIndex<NV> index = indexes.iterator().next();
if (!(getDistanceFunction() instanceof SpatialPrimitiveDistanceFunction<?>)) {
throw new IllegalArgumentException("Distance Function must be an instance of " + SpatialPrimitiveDistanceFunction.class.getName());
}
@SuppressWarnings("unchecked") SpatialPrimitiveDistanceFunction<NV> distFunction = (SpatialPrimitiveDistanceFunction<NV>) getDistanceFunction();
// first do the knn-Join
if (LOG.isVerbose()) {
LOG.verbose("knnJoin...");
}
Relation<KNNList> knns = knnJoin.run(relation);
DBIDs ids = relation.getDBIDs();
final int size = ids.size();
FiniteProgress progress = LOG.isVerbose() ? new FiniteProgress("DeLiClu", size, LOG) : null;
ClusterOrder clusterOrder = new ClusterOrder(ids, "DeLiClu Clustering", "deliclu-clustering");
heap = new UpdatableHeap<>();
// add start object to cluster order and (root, root) to priority queue
DBID startID = DBIDUtil.deref(ids.iter());
clusterOrder.add(startID, Double.POSITIVE_INFINITY, null);
int numHandled = 1;
index.setHandled(startID, relation.get(startID));
SpatialDirectoryEntry rootEntry = (SpatialDirectoryEntry) index.getRootEntry();
SpatialObjectPair spatialObjectPair = new SpatialObjectPair(0., rootEntry, rootEntry, true);
heap.add(spatialObjectPair);
while (numHandled < size) {
if (heap.isEmpty()) {
throw new AbortException("DeLiClu heap was empty when it shouldn't have been.");
}
SpatialObjectPair dataPair = heap.poll();
// pair of nodes
if (dataPair.isExpandable) {
expandNodes(index, distFunction, dataPair, knns);
} else // pair of objects
{
// set handled
LeafEntry e1 = (LeafEntry) dataPair.entry1;
LeafEntry e2 = (LeafEntry) dataPair.entry2;
final DBID e1id = e1.getDBID();
IndexTreePath<DeLiCluEntry> path = index.setHandled(e1id, relation.get(e1id));
if (path == null) {
throw new RuntimeException("snh: parent(" + e1id + ") = null!!!");
}
// add to cluster order
clusterOrder.add(e1id, dataPair.distance, e2.getDBID());
numHandled++;
// reinsert expanded leafs
reinsertExpanded(distFunction, index, path, knns);
if (progress != null) {
progress.setProcessed(numHandled, LOG);
}
}
}
LOG.ensureCompleted(progress);
return clusterOrder;
}
Aggregations