use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.
the class AbstractIndexStructureTest method testExactEuclidean.
/**
* Actual test routine.
*
* @param inputparams
*/
protected void testExactEuclidean(ListParameterization inputparams, Class<?> expectKNNQuery, Class<?> expectRangeQuery) {
// Use a fixed DBID - historically, we used 1 indexed - to reduce random
// variation in results due to different hash codes everywhere.
inputparams.addParameter(AbstractDatabaseConnection.Parameterizer.FILTERS_ID, new FixedDBIDsFilter(1));
Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds, inputparams);
Relation<DoubleVector> rep = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
DistanceQuery<DoubleVector> dist = db.getDistanceQuery(rep, EuclideanDistanceFunction.STATIC);
if (expectKNNQuery != null) {
// get the 10 next neighbors
DoubleVector dv = DoubleVector.wrap(querypoint);
KNNQuery<DoubleVector> knnq = db.getKNNQuery(dist, k);
assertTrue("Returned knn query is not of expected class: expected " + expectKNNQuery + " got " + knnq.getClass(), expectKNNQuery.isAssignableFrom(knnq.getClass()));
KNNList ids = knnq.getKNNForObject(dv, k);
assertEquals("Result size does not match expectation!", shouldd.length, ids.size(), 1e-15);
// verify that the neighbors match.
int i = 0;
for (DoubleDBIDListIter res = ids.iter(); res.valid(); res.advance(), i++) {
// Verify distance
assertEquals("Expected distance doesn't match.", shouldd[i], res.doubleValue(), 1e-6);
// verify vector
DoubleVector c = rep.get(res);
DoubleVector c2 = DoubleVector.wrap(shouldc[i]);
assertEquals("Expected vector doesn't match: " + c.toString(), 0.0, dist.distance(c, c2), 1e-15);
}
}
if (expectRangeQuery != null) {
// Do a range query
DoubleVector dv = DoubleVector.wrap(querypoint);
RangeQuery<DoubleVector> rangeq = db.getRangeQuery(dist, eps);
assertTrue("Returned range query is not of expected class: expected " + expectRangeQuery + " got " + rangeq.getClass(), expectRangeQuery.isAssignableFrom(rangeq.getClass()));
DoubleDBIDList ids = rangeq.getRangeForObject(dv, eps);
assertEquals("Result size does not match expectation!", shouldd.length, ids.size(), 1e-15);
// verify that the neighbors match.
int i = 0;
for (DoubleDBIDListIter res = ids.iter(); res.valid(); res.advance(), i++) {
// Verify distance
assertEquals("Expected distance doesn't match.", shouldd[i], res.doubleValue(), 1e-6);
// verify vector
DoubleVector c = rep.get(res);
DoubleVector c2 = DoubleVector.wrap(shouldc[i]);
assertEquals("Expected vector doesn't match: " + c.toString(), 0.0, dist.distance(c, c2), 1e-15);
}
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.
the class Leader method run.
/**
* Run the leader clustering algorithm.
*
* @param relation Data set
* @return Clustering result
*/
public Clustering<PrototypeModel<O>> run(Relation<O> relation) {
RangeQuery<O> rq = relation.getRangeQuery(getDistanceFunction(), threshold);
ModifiableDBIDs seen = DBIDUtil.newHashSet(relation.size());
Clustering<PrototypeModel<O>> clustering = new Clustering<>("Prototype clustering", "prototype-clustering");
int queries = 0;
FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Leader clustering", relation.size(), LOG) : null;
for (DBIDIter it = relation.iterDBIDs(); it.valid() && seen.size() < relation.size(); it.advance()) {
if (seen.contains(it)) {
continue;
}
DoubleDBIDList res = rq.getRangeForDBID(it, threshold);
++queries;
ModifiableDBIDs ids = DBIDUtil.newArray(res.size());
for (DBIDIter cand = res.iter(); cand.valid(); cand.advance()) {
if (seen.add(cand)) {
LOG.incrementProcessed(prog);
ids.add(cand);
}
}
assert (ids.size() > 0 && ids.contains(it));
PrototypeModel<O> mod = new SimplePrototypeModel<>(relation.get(it));
clustering.addToplevelCluster(new Cluster<>(ids, mod));
}
LOG.statistics(new LongStatistic(this.getClass().getName() + ".queries", queries));
LOG.ensureCompleted(prog);
return clustering;
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.
the class DBSCAN method expandCluster.
/**
* DBSCAN-function expandCluster.
*
* Border-Objects become members of the first possible cluster.
*
* @param relation Database relation to run on
* @param rangeQuery Range query to use
* @param startObjectID potential seed of a new potential cluster
* @param seeds Array to store the current seeds
* @param objprog Number of objects processed (may be {@code null})
* @param clusprog Number of clusters found (may be {@code null})
*/
protected void expandCluster(Relation<O> relation, RangeQuery<O> rangeQuery, DBIDRef startObjectID, ArrayModifiableDBIDs seeds, FiniteProgress objprog, IndefiniteProgress clusprog) {
DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
ncounter += neighbors.size();
// startObject is no core-object
if (neighbors.size() < minpts) {
noise.add(startObjectID);
processedIDs.add(startObjectID);
if (objprog != null) {
objprog.incrementProcessed(LOG);
}
return;
}
ModifiableDBIDs currentCluster = DBIDUtil.newArray();
currentCluster.add(startObjectID);
processedIDs.add(startObjectID);
// try to expand the cluster
assert (seeds.size() == 0);
seeds.clear();
processNeighbors(neighbors.iter(), currentCluster, seeds);
DBIDVar o = DBIDUtil.newVar();
while (!seeds.isEmpty()) {
neighbors = rangeQuery.getRangeForDBID(seeds.pop(o), epsilon);
ncounter += neighbors.size();
if (neighbors.size() >= minpts) {
processNeighbors(neighbors.iter(), currentCluster, seeds);
}
if (objprog != null) {
objprog.incrementProcessed(LOG);
}
}
resultList.add(currentCluster);
if (clusprog != null) {
clusprog.setProcessed(resultList.size(), LOG);
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.
the class KMeansMinusMinus method meansWithTreshhold.
/**
* Returns the mean vectors of the given clusters in the given database.
*
* @param clusters the clusters to compute the means
* @param means the recent means
* @param database the database containing the vectors
* @return the mean vectors of the given clusters in the given database
*/
protected double[][] meansWithTreshhold(List<? extends ModifiableDoubleDBIDList> clusters, double[][] means, Relation<V> database, Double tresh) {
// TODO: use Kahan summation for better numerical precision?
double[][] newMeans = new double[k][];
for (int i = 0; i < k; i++) {
DoubleDBIDList list = clusters.get(i);
double[] raw = null;
int count = 0;
// Update with remaining instances
for (DoubleDBIDListIter iter = list.iter(); iter.valid(); iter.advance()) {
if (iter.doubleValue() >= tresh) {
continue;
}
NumberVector vec = database.get(iter);
if (raw == null) {
// Initialize:
raw = vec.toArray();
}
for (int j = 0; j < raw.length; j++) {
raw[j] += vec.doubleValue(j);
}
count++;
}
newMeans[i] = (raw != null) ? VMath.timesEquals(raw, 1.0 / count) : means[i];
}
return newMeans;
}
use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.
the class OUTRES method outresScore.
/**
* Main loop of OUTRES. Run for each object
*
* @param s start dimension
* @param subspace Current subspace
* @param id Current object ID
* @param kernel Kernel
* @return Score
*/
public double outresScore(final int s, long[] subspace, DBIDRef id, KernelDensityEstimator kernel) {
// Initial score is 1.0
double score = 1.0;
final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace);
MeanVariance meanv = new MeanVariance();
for (int i = s; i < kernel.dim; i++) {
if (BitsUtil.get(subspace, i)) {
// with i=0?
continue;
}
BitsUtil.setI(subspace, i);
df.setSelectedDimensions(subspace);
final double adjustedEps = kernel.adjustedEps(kernel.dim);
// Query with a larger window, to also get neighbors of neighbors
// Subspace euclidean is metric!
final double range = adjustedEps * 2.;
RangeQuery<V> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);
DoubleDBIDList neighc = rq.getRangeForDBID(id, range);
DoubleDBIDList neigh = refineRange(neighc, adjustedEps);
if (neigh.size() > 2) {
// Relevance test
if (relevantSubspace(subspace, neigh, kernel)) {
final double density = kernel.subspaceDensity(subspace, neigh);
// Compute mean and standard deviation for densities of neighbors.
meanv.reset();
for (DoubleDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
DoubleDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel);
meanv.put(kernel.subspaceDensity(subspace, n2));
}
final double deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev());
// High deviation:
if (deviation >= 1) {
score *= (density / deviation);
}
// Recursion
score *= outresScore(i + 1, subspace, id, kernel);
}
}
BitsUtil.clearI(subspace, i);
}
return score;
}
Aggregations