use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.
the class WeightedQuickUnionStaticDBIDsTest method testWorstCase.
/**
* Worst-case with 10 nodes, from Sedgewick.
*
* We don't test runtime, but this is an interesting case nevertheless.
*/
@Test
public void testWorstCase() {
DBIDRange range = DBIDUtil.generateStaticDBIDRange(10);
UnionFind uf = new WeightedQuickUnionStaticDBIDs(range);
DBIDArrayIter i1 = range.iter(), i2 = range.iter();
assertFalse(uf.isConnected(i1.seek(0), i2.seek(1)));
uf.union(i1.seek(0), i2.seek(1));
assertTrue(uf.isConnected(i1.seek(0), i2.seek(1)));
uf.union(i1.seek(2), i2.seek(3));
assertFalse(uf.isConnected(i1.seek(0), i2.seek(2)));
uf.union(i1.seek(5), i2.seek(4));
uf.union(i1.seek(7), i2.seek(6));
uf.union(i1.seek(8), i2.seek(9));
uf.union(i1.seek(1), i2.seek(3));
assertTrue(uf.isConnected(i1.seek(0), i2.seek(2)));
uf.union(i1.seek(4), i2.seek(6));
assertTrue(uf.isConnected(i1.seek(5), i2.seek(7)));
uf.union(i1.seek(3), i2.seek(7));
assertTrue(uf.isConnected(i1.seek(0), i2.seek(4)));
assertFalse(uf.isConnected(i1.seek(0), i2.seek(9)));
uf.union(i1.seek(0), i2.seek(9));
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 8; j++) {
assertTrue(uf.isConnected(i1.seek(i), i2.seek(j)));
}
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.
the class WeightedQuickUnionStaticDBIDsTest method testBruteForce.
@Test
public void testBruteForce() {
final Random r = new Random(0L);
final int size = 100;
DBIDRange range = DBIDUtil.generateStaticDBIDRange(size);
UnionFind uf = new WeightedQuickUnionStaticDBIDs(range);
DBIDArrayIter i1 = range.iter(), i2 = range.iter();
int[] c = new int[size];
for (int i = 0; i < size; i++) {
c[i] = i;
}
int numc = size;
while (numc > 1) {
// Two randoms, with o1 < o2
int o2 = r.nextInt(size - 1) + 1, o1 = r.nextInt(o2);
final int c1 = c[o1], c2 = c[o2];
final boolean ufc = uf.isConnected(i1.seek(o1), i2.seek(o2));
assertEquals(c1 == c2, ufc);
// always
uf.union(i1, i2);
if (c1 != c2) {
for (int j = 0; j < size; j++) {
if (c[j] == c1) {
c[j] = c2;
}
}
--numc;
}
assertEquals(numc, uf.getRoots().size());
}
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.
the class WeightedQuickUnionStaticDBIDsTest method testRoots.
@Test
public void testRoots() {
DBIDRange range = DBIDUtil.generateStaticDBIDRange(8);
UnionFind uf = new WeightedQuickUnionStaticDBIDs(range);
DBIDArrayIter i1 = range.iter(), i2 = range.iter();
uf.union(i1.seek(0), i2.seek(1));
uf.union(i1.seek(2), i2.seek(3));
assertEquals(6, uf.getRoots().size());
uf.union(i1.seek(0), i2.seek(2));
assertEquals(5, uf.getRoots().size());
uf.union(i1.seek(4), i2.seek(5));
uf.union(i1.seek(6), i2.seek(7));
uf.union(i1.seek(4), i2.seek(6));
assertEquals(2, uf.getRoots().size());
uf.union(i1.seek(0), i2.seek(4));
assertEquals(1, uf.getRoots().size());
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.
the class RangeQueryBenchmarkAlgorithm method run.
/**
* Run the algorithm, with a separate query set.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
if (queries == null) {
throw new AbortException("A query set is required for this 'run' method.");
}
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
NumberVector.Factory<O> ofactory = RelationUtil.getNumberVectorFactory(relation);
int dim = RelationUtil.dimensionality(relation);
// Separate query set.
TypeInformation res = VectorFieldTypeInformation.typeRequest(NumberVector.class, dim + 1, dim + 1);
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
StringBuilder buf = new StringBuilder();
buf.append("No compatible data type in query input was found. Expected: ");
buf.append(res.toString());
buf.append(" have: ");
for (int i = 0; i < bundle.metaLength(); i++) {
if (i > 0) {
buf.append(' ');
}
buf.append(bundle.meta(i).toString());
}
throw new IncompatibleDataException(buf.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance();
double[] buf = new double[dim];
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
NumberVector o = (NumberVector) bundle.data(off, col);
for (int i = 0; i < dim; i++) {
buf[i] = o.doubleValue(i);
}
O v = ofactory.newNumberVector(buf);
double r = o.doubleValue(dim);
DoubleDBIDList rres = rangeQuery.getRangeForObject(v, r);
int ichecksum = 0;
for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(rres.size());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
}
return null;
}
use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.
the class ValidateApproximativeKNNIndex method run.
/**
* Run the algorithm.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
// Approximate query:
KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_OPTIMIZED_ONLY);
if (knnQuery == null || knnQuery instanceof LinearScanQuery) {
throw new AbortException("Expected an accelerated query, but got a linear scan -- index is not used.");
}
// Exact query:
KNNQuery<O> truekNNQuery;
if (forcelinear) {
truekNNQuery = QueryUtil.getLinearScanKNNQuery(distQuery);
} else {
truekNNQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_EXACT);
}
if (knnQuery.getClass().equals(truekNNQuery.getClass())) {
LOG.warning("Query classes are the same. This experiment may be invalid!");
}
// No query set - use original database.
if (queries == null || pattern != null) {
// Relation to filter on
Relation<String> lrel = (pattern != null) ? DatabaseUtil.guessLabelRepresentation(database) : null;
final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
int misses = 0;
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
if (pattern == null || pattern.matcher(lrel.get(iditer)).find()) {
// Query index:
KNNList knns = knnQuery.getKNNForDBID(iditer, k);
// Query reference:
KNNList trueknns = truekNNQuery.getKNNForDBID(iditer, k);
// Put adjusted knn size:
mv.put(knns.size() * k / (double) trueknns.size());
// Put recall:
mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
if (knns.size() >= k) {
double kdist = knns.getKNNDistance();
final double tdist = trueknns.getKNNDistance();
if (tdist > 0.0) {
mvdist.put(kdist);
mvdaerr.put(kdist - tdist);
mvdrerr.put(kdist / tdist);
}
} else {
// Less than k objects.
misses++;
}
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
}
if (misses > 0) {
LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
}
}
} else {
// Separate query set.
TypeInformation res = getDistanceFunction().getInputTypeRestriction();
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
throw new AbortException("No compatible data type in query input was found. Expected: " + res.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
int misses = 0;
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
@SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
// Query index:
KNNList knns = knnQuery.getKNNForObject(o, k);
// Query reference:
KNNList trueknns = truekNNQuery.getKNNForObject(o, k);
// Put adjusted knn size:
mv.put(knns.size() * k / (double) trueknns.size());
// Put recall:
mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
if (knns.size() >= k) {
double kdist = knns.getKNNDistance();
final double tdist = trueknns.getKNNDistance();
if (tdist > 0.0) {
mvdist.put(kdist);
mvdaerr.put(kdist - tdist);
mvdrerr.put(kdist / tdist);
}
} else {
// Less than k objects.
misses++;
}
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
}
if (misses > 0) {
LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
}
}
}
return null;
}
Aggregations