use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class AbstractHDBSCAN method convertToPointerRepresentation.
/**
* Convert spanning tree to a pointer representation.
*
* Note: the heap must use the correct encoding of indexes.
*
* @param ids IDs indexed
* @param heap Heap
* @param pi Parent array
* @param lambda Distance array
*/
protected void convertToPointerRepresentation(ArrayDBIDs ids, DoubleLongHeap heap, WritableDBIDDataStore pi, WritableDoubleDataStore lambda) {
final Logging LOG = getLogger();
// Initialize parent array:
for (DBIDArrayIter iter = ids.iter(); iter.valid(); iter.advance()) {
// Initialize
pi.put(iter, iter);
}
DBIDVar p = DBIDUtil.newVar(), q = DBIDUtil.newVar(), n = DBIDUtil.newVar();
FiniteProgress pprog = LOG.isVerbose() ? new FiniteProgress("Converting MST to pointer representation", heap.size(), LOG) : null;
while (!heap.isEmpty()) {
final double dist = heap.peekKey();
final long pair = heap.peekValue();
final int i = (int) (pair >>> 31), j = (int) (pair & 0x7FFFFFFFL);
ids.assignVar(i, p);
// Follow p to its parent.
while (!DBIDUtil.equal(p, pi.assignVar(p, n))) {
p.set(n);
}
// Follow q to its parent.
ids.assignVar(j, q);
while (!DBIDUtil.equal(q, pi.assignVar(q, n))) {
q.set(n);
}
// By definition of the pointer representation, the largest element in
// each cluster is the cluster lead.
// The extraction methods currently rely on this!
int c = DBIDUtil.compare(p, q);
if (c < 0) {
// p joins q:
pi.put(p, q);
lambda.put(p, dist);
} else {
assert (c != 0) : "This should never happen!";
// q joins p:
pi.put(q, p);
lambda.put(q, dist);
}
heap.poll();
LOG.incrementProcessed(pprog);
}
LOG.ensureCompleted(pprog);
// does not fulfill the property that the last element has the largest id.
for (DBIDArrayIter iter = ids.iter(); iter.valid(); iter.advance()) {
double d = lambda.doubleValue(iter);
// Parent:
pi.assignVar(iter, p);
q.set(p);
// Follow parent while tied.
while (d >= lambda.doubleValue(q) && !DBIDUtil.equal(q, pi.assignVar(q, n))) {
q.set(n);
}
if (!DBIDUtil.equal(p, q)) {
if (LOG.isDebuggingFinest()) {
LOG.finest("Correcting parent: " + p + " -> " + q);
}
pi.put(iter, q);
}
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class ABOD method run.
/**
* Run ABOD on the data set.
*
* @param relation Relation to process
* @return Outlier detection result
*/
public OutlierResult run(Database db, Relation<V> relation) {
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
// Build a kernel matrix, to make O(n^3) slightly less bad.
SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction);
KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
DoubleMinMax minmaxabod = new DoubleMinMax();
MeanVariance s = new MeanVariance();
DBIDArrayIter pA = ids.iter(), pB = ids.iter(), pC = ids.iter();
for (; pA.valid(); pA.advance()) {
final double abof = computeABOF(kernelMatrix, pA, pB, pC, s);
minmaxabod.put(abof);
abodvalues.putDouble(pA, abof);
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-Based Outlier Degree", "abod-outlier", abodvalues, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class LBABOD method run.
/**
* Run LB-ABOD on the data set.
*
* @param relation Relation to process
* @return Outlier detection result
*/
@Override
public OutlierResult run(Database db, Relation<V> relation) {
ArrayDBIDs ids = DBIDUtil.ensureArray(relation.getDBIDs());
DBIDArrayIter pB = ids.iter(), pC = ids.iter();
SimilarityQuery<V> sq = db.getSimilarityQuery(relation, kernelFunction);
KernelMatrix kernelMatrix = new KernelMatrix(sq, relation, ids);
// Output storage.
WritableDoubleDataStore abodvalues = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_STATIC);
DoubleMinMax minmaxabod = new DoubleMinMax();
double max = 0.;
// Storage for squared distances (will be reused!)
WritableDoubleDataStore sqDists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
// Nearest neighbor heap (will be reused!)
KNNHeap nn = DBIDUtil.newHeap(k);
// Priority queue for candidates
ModifiableDoubleDBIDList candidates = DBIDUtil.newDistanceDBIDList(relation.size());
// get Candidate Ranking
for (DBIDIter pA = relation.iterDBIDs(); pA.valid(); pA.advance()) {
// Compute nearest neighbors and distances.
nn.clear();
double simAA = kernelMatrix.getSimilarity(pA, pA);
// Sum of 1./(|AB|) and 1./(|AB|^2); for computing R2.
double sumid = 0., sumisqd = 0.;
for (pB.seek(0); pB.valid(); pB.advance()) {
if (DBIDUtil.equal(pB, pA)) {
continue;
}
double simBB = kernelMatrix.getSimilarity(pB, pB);
double simAB = kernelMatrix.getSimilarity(pA, pB);
double sqdAB = simAA + simBB - simAB - simAB;
sqDists.putDouble(pB, sqdAB);
final double isqdAB = 1. / sqdAB;
sumid += FastMath.sqrt(isqdAB);
sumisqd += isqdAB;
// Update heap
nn.insert(sqdAB, pB);
}
// Compute FastABOD approximation, adjust for lower bound.
// LB-ABOF is defined via a numerically unstable formula.
// Variance as E(X^2)-E(X)^2 suffers from catastrophic cancellation!
// TODO: ensure numerical precision!
double nnsum = 0., nnsumsq = 0., nnsumisqd = 0.;
KNNList nl = nn.toKNNList();
DoubleDBIDListIter iB = nl.iter(), iC = nl.iter();
for (; iB.valid(); iB.advance()) {
double sqdAB = iB.doubleValue();
double simAB = kernelMatrix.getSimilarity(pA, iB);
if (!(sqdAB > 0.)) {
continue;
}
for (iC.seek(iB.getOffset() + 1); iC.valid(); iC.advance()) {
double sqdAC = iC.doubleValue();
double simAC = kernelMatrix.getSimilarity(pA, iC);
if (!(sqdAC > 0.)) {
continue;
}
// Exploit bilinearity of scalar product:
// <B-A, C-A> = <B, C-A> - <A,C-A>
// = <B,C> - <B,A> - <A,C> + <A,A>
double simBC = kernelMatrix.getSimilarity(iB, iC);
double numerator = simBC - simAB - simAC + simAA;
double sqweight = 1. / (sqdAB * sqdAC);
double weight = FastMath.sqrt(sqweight);
double val = numerator * sqweight;
nnsum += val * weight;
nnsumsq += val * val * weight;
nnsumisqd += sqweight;
}
}
// Remaining weight, term R2:
double r2 = sumisqd * sumisqd - 2. * nnsumisqd;
double tmp = (2. * nnsum + r2) / (sumid * sumid);
double lbabof = 2. * nnsumsq / (sumid * sumid) - tmp * tmp;
// Track maximum?
if (lbabof > max) {
max = lbabof;
}
abodvalues.putDouble(pA, lbabof);
candidates.add(lbabof, pA);
}
// Put maximum from approximate values.
minmaxabod.put(max);
candidates.sort();
// refine Candidates
int refinements = 0;
DoubleMinHeap topscores = new DoubleMinHeap(l);
MeanVariance s = new MeanVariance();
for (DoubleDBIDListIter pA = candidates.iter(); pA.valid(); pA.advance()) {
// Stop refining
if (topscores.size() >= k && pA.doubleValue() > topscores.peek()) {
break;
}
final double abof = computeABOF(kernelMatrix, pA, pB, pC, s);
// Store refined score:
abodvalues.putDouble(pA, abof);
minmaxabod.put(abof);
// Update the heap tracking the top scores.
if (topscores.size() < k) {
topscores.add(abof);
} else {
if (topscores.peek() > abof) {
topscores.replaceTopElement(abof);
}
}
refinements += 1;
}
if (LOG.isStatistics()) {
LoggingConfiguration.setVerbose(Level.VERYVERBOSE);
LOG.statistics(new LongStatistic("lb-abod.refinements", refinements));
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Angle-based Outlier Detection", "abod-outlier", abodvalues, ids);
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(minmaxabod.getMin(), minmaxabod.getMax(), 0.0, Double.POSITIVE_INFINITY);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class SilhouetteOutlierDetection method run.
@Override
public OutlierResult run(Database database) {
Relation<O> relation = database.getRelation(getDistanceFunction().getInputTypeRestriction());
DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
// TODO: improve ELKI api to ensure we're using the same DBIDs!
Clustering<?> c = clusterer.run(database);
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_DB);
DoubleMinMax mm = new DoubleMinMax();
List<? extends Cluster<?>> clusters = c.getAllClusters();
for (Cluster<?> cluster : clusters) {
if (cluster.size() <= 1 || cluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
case TREAT_NOISE_AS_SINGLETONS:
// As suggested in Rousseeuw, we use 0 for singletons.
for (DBIDIter iter = cluster.getIDs().iter(); iter.valid(); iter.advance()) {
scores.put(iter, 0.);
}
mm.put(0.);
continue;
case MERGE_NOISE:
// Treat as cluster below
break;
}
}
ArrayDBIDs ids = DBIDUtil.ensureArray(cluster.getIDs());
// temporary storage.
double[] as = new double[ids.size()];
DBIDArrayIter it1 = ids.iter(), it2 = ids.iter();
for (it1.seek(0); it1.valid(); it1.advance()) {
// a: In-cluster distances
// Already computed distances
double a = as[it1.getOffset()];
for (it2.seek(it1.getOffset() + 1); it2.valid(); it2.advance()) {
final double dist = dq.distance(it1, it2);
a += dist;
as[it2.getOffset()] += dist;
}
a /= (ids.size() - 1);
// b: other clusters:
double min = Double.POSITIVE_INFINITY;
for (Cluster<?> ocluster : clusters) {
if (ocluster == /* yes, reference identity */
cluster) {
continue;
}
if (ocluster.isNoise()) {
switch(noiseOption) {
case IGNORE_NOISE:
continue;
case MERGE_NOISE:
// No special treatment
break;
case TREAT_NOISE_AS_SINGLETONS:
// Treat noise cluster as singletons:
for (DBIDIter it3 = ocluster.getIDs().iter(); it3.valid(); it3.advance()) {
double dist = dq.distance(it1, it3);
if (dist < min) {
min = dist;
}
}
continue;
}
}
final DBIDs oids = ocluster.getIDs();
double b = 0.;
for (DBIDIter it3 = oids.iter(); it3.valid(); it3.advance()) {
b += dq.distance(it1, it3);
}
b /= oids.size();
if (b < min) {
min = b;
}
}
final double score = (min - a) / Math.max(min, a);
scores.put(it1, score);
mm.put(score);
}
}
// Build result representation.
DoubleRelation scoreResult = new MaterializedDoubleRelation("Silhouette Coefficients", "silhouette-outlier", scores, relation.getDBIDs());
OutlierScoreMeta scoreMeta = new InvertedOutlierScoreMeta(mm.getMin(), mm.getMax(), -1., 1., .5);
return new OutlierResult(scoreMeta, scoreResult);
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter in project elki by elki-project.
the class TSNE method run.
public Relation<DoubleVector> run(Relation<O> relation) {
AffinityMatrix pij = affinity.computeAffinityMatrix(relation, EARLY_EXAGGERATION);
// Create initial solution.
final int size = pij.size();
double[][] sol = randomInitialSolution(size, dim, random.getSingleThreadedRandom());
projectedDistances.setLong(0L);
optimizetSNE(pij, sol);
LOG.statistics(projectedDistances);
// Remove the original (unprojected) data unless configured otherwise.
removePreviousRelation(relation);
// Transform into output data format.
DBIDs ids = relation.getDBIDs();
WritableDataStore<DoubleVector> proj = DataStoreFactory.FACTORY.makeStorage(ids, DataStoreFactory.HINT_DB | DataStoreFactory.HINT_SORTED, DoubleVector.class);
VectorFieldTypeInformation<DoubleVector> otype = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
for (DBIDArrayIter it = pij.iterDBIDs(); it.valid(); it.advance()) {
proj.put(it, DoubleVector.wrap(sol[it.getOffset()]));
}
return new MaterializedRelation<>("tSNE", "t-SNE", otype, proj, ids);
}
Aggregations