Search in sources :

Example 11 with DBIDRange

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.

the class WeightedQuickUnionStaticDBIDsTest method testWorstCase.

/**
 * Worst-case with 10 nodes, from Sedgewick.
 *
 * We don't test runtime, but this is an interesting case nevertheless.
 */
@Test
public void testWorstCase() {
    DBIDRange range = DBIDUtil.generateStaticDBIDRange(10);
    UnionFind uf = new WeightedQuickUnionStaticDBIDs(range);
    DBIDArrayIter i1 = range.iter(), i2 = range.iter();
    assertFalse(uf.isConnected(i1.seek(0), i2.seek(1)));
    uf.union(i1.seek(0), i2.seek(1));
    assertTrue(uf.isConnected(i1.seek(0), i2.seek(1)));
    uf.union(i1.seek(2), i2.seek(3));
    assertFalse(uf.isConnected(i1.seek(0), i2.seek(2)));
    uf.union(i1.seek(5), i2.seek(4));
    uf.union(i1.seek(7), i2.seek(6));
    uf.union(i1.seek(8), i2.seek(9));
    uf.union(i1.seek(1), i2.seek(3));
    assertTrue(uf.isConnected(i1.seek(0), i2.seek(2)));
    uf.union(i1.seek(4), i2.seek(6));
    assertTrue(uf.isConnected(i1.seek(5), i2.seek(7)));
    uf.union(i1.seek(3), i2.seek(7));
    assertTrue(uf.isConnected(i1.seek(0), i2.seek(4)));
    assertFalse(uf.isConnected(i1.seek(0), i2.seek(9)));
    uf.union(i1.seek(0), i2.seek(9));
    for (int i = 0; i < 8; i++) {
        for (int j = 0; j < 8; j++) {
            assertTrue(uf.isConnected(i1.seek(i), i2.seek(j)));
        }
    }
}
Also used : DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) Test(org.junit.Test)

Example 12 with DBIDRange

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.

the class WeightedQuickUnionStaticDBIDsTest method testBruteForce.

@Test
public void testBruteForce() {
    final Random r = new Random(0L);
    final int size = 100;
    DBIDRange range = DBIDUtil.generateStaticDBIDRange(size);
    UnionFind uf = new WeightedQuickUnionStaticDBIDs(range);
    DBIDArrayIter i1 = range.iter(), i2 = range.iter();
    int[] c = new int[size];
    for (int i = 0; i < size; i++) {
        c[i] = i;
    }
    int numc = size;
    while (numc > 1) {
        // Two randoms, with o1 < o2
        int o2 = r.nextInt(size - 1) + 1, o1 = r.nextInt(o2);
        final int c1 = c[o1], c2 = c[o2];
        final boolean ufc = uf.isConnected(i1.seek(o1), i2.seek(o2));
        assertEquals(c1 == c2, ufc);
        // always
        uf.union(i1, i2);
        if (c1 != c2) {
            for (int j = 0; j < size; j++) {
                if (c[j] == c1) {
                    c[j] = c2;
                }
            }
            --numc;
        }
        assertEquals(numc, uf.getRoots().size());
    }
}
Also used : Random(java.util.Random) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) Test(org.junit.Test)

Example 13 with DBIDRange

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.

the class WeightedQuickUnionStaticDBIDsTest method testRoots.

@Test
public void testRoots() {
    DBIDRange range = DBIDUtil.generateStaticDBIDRange(8);
    UnionFind uf = new WeightedQuickUnionStaticDBIDs(range);
    DBIDArrayIter i1 = range.iter(), i2 = range.iter();
    uf.union(i1.seek(0), i2.seek(1));
    uf.union(i1.seek(2), i2.seek(3));
    assertEquals(6, uf.getRoots().size());
    uf.union(i1.seek(0), i2.seek(2));
    assertEquals(5, uf.getRoots().size());
    uf.union(i1.seek(4), i2.seek(5));
    uf.union(i1.seek(6), i2.seek(7));
    uf.union(i1.seek(4), i2.seek(6));
    assertEquals(2, uf.getRoots().size());
    uf.union(i1.seek(0), i2.seek(4));
    assertEquals(1, uf.getRoots().size());
}
Also used : DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter) Test(org.junit.Test)

Example 14 with DBIDRange

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.

the class RangeQueryBenchmarkAlgorithm method run.

/**
 * Run the algorithm, with a separate query set.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    if (queries == null) {
        throw new AbortException("A query set is required for this 'run' method.");
    }
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
    NumberVector.Factory<O> ofactory = RelationUtil.getNumberVectorFactory(relation);
    int dim = RelationUtil.dimensionality(relation);
    // Separate query set.
    TypeInformation res = VectorFieldTypeInformation.typeRequest(NumberVector.class, dim + 1, dim + 1);
    MultipleObjectsBundle bundle = queries.loadData();
    int col = -1;
    for (int i = 0; i < bundle.metaLength(); i++) {
        if (res.isAssignableFromType(bundle.meta(i))) {
            col = i;
            break;
        }
    }
    if (col < 0) {
        StringBuilder buf = new StringBuilder();
        buf.append("No compatible data type in query input was found. Expected: ");
        buf.append(res.toString());
        buf.append(" have: ");
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (i > 0) {
                buf.append(' ');
            }
            buf.append(bundle.meta(i).toString());
        }
        throw new IncompatibleDataException(buf.toString());
    }
    // Random sampling is a bit of hack, sorry.
    // But currently, we don't (yet) have an "integer random sample" function.
    DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
    final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
    FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
    int hash = 0;
    MeanVariance mv = new MeanVariance();
    double[] buf = new double[dim];
    for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
        int off = sids.binarySearch(iditer);
        assert (off >= 0);
        NumberVector o = (NumberVector) bundle.data(off, col);
        for (int i = 0; i < dim; i++) {
            buf[i] = o.doubleValue(i);
        }
        O v = ofactory.newNumberVector(buf);
        double r = o.doubleValue(dim);
        DoubleDBIDList rres = rangeQuery.getRangeForObject(v, r);
        int ichecksum = 0;
        for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
            ichecksum += DBIDUtil.asInteger(it);
        }
        hash = Util.mixHashCodes(hash, ichecksum);
        mv.put(rres.size());
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics("Result hashcode: " + hash);
        LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) IncompatibleDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 15 with DBIDRange

use of de.lmu.ifi.dbs.elki.database.ids.DBIDRange in project elki by elki-project.

the class ValidateApproximativeKNNIndex method run.

/**
 * Run the algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    // Approximate query:
    KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_OPTIMIZED_ONLY);
    if (knnQuery == null || knnQuery instanceof LinearScanQuery) {
        throw new AbortException("Expected an accelerated query, but got a linear scan -- index is not used.");
    }
    // Exact query:
    KNNQuery<O> truekNNQuery;
    if (forcelinear) {
        truekNNQuery = QueryUtil.getLinearScanKNNQuery(distQuery);
    } else {
        truekNNQuery = database.getKNNQuery(distQuery, k, DatabaseQuery.HINT_EXACT);
    }
    if (knnQuery.getClass().equals(truekNNQuery.getClass())) {
        LOG.warning("Query classes are the same. This experiment may be invalid!");
    }
    // No query set - use original database.
    if (queries == null || pattern != null) {
        // Relation to filter on
        Relation<String> lrel = (pattern != null) ? DatabaseUtil.guessLabelRepresentation(database) : null;
        final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
        MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
        int misses = 0;
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            if (pattern == null || pattern.matcher(lrel.get(iditer)).find()) {
                // Query index:
                KNNList knns = knnQuery.getKNNForDBID(iditer, k);
                // Query reference:
                KNNList trueknns = truekNNQuery.getKNNForDBID(iditer, k);
                // Put adjusted knn size:
                mv.put(knns.size() * k / (double) trueknns.size());
                // Put recall:
                mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
                if (knns.size() >= k) {
                    double kdist = knns.getKNNDistance();
                    final double tdist = trueknns.getKNNDistance();
                    if (tdist > 0.0) {
                        mvdist.put(kdist);
                        mvdaerr.put(kdist - tdist);
                        mvdrerr.put(kdist / tdist);
                    }
                } else {
                    // Less than k objects.
                    misses++;
                }
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
                LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
                LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
            }
            if (misses > 0) {
                LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
            }
        }
    } else {
        // Separate query set.
        TypeInformation res = getDistanceFunction().getInputTypeRestriction();
        MultipleObjectsBundle bundle = queries.loadData();
        int col = -1;
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (res.isAssignableFromType(bundle.meta(i))) {
                col = i;
                break;
            }
        }
        if (col < 0) {
            throw new AbortException("No compatible data type in query input was found. Expected: " + res.toString());
        }
        // Random sampling is a bit of hack, sorry.
        // But currently, we don't (yet) have an "integer random sample" function.
        DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
        final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        MeanVariance mv = new MeanVariance(), mvrec = new MeanVariance();
        MeanVariance mvdist = new MeanVariance(), mvdaerr = new MeanVariance(), mvdrerr = new MeanVariance();
        int misses = 0;
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            int off = sids.binarySearch(iditer);
            assert (off >= 0);
            @SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
            // Query index:
            KNNList knns = knnQuery.getKNNForObject(o, k);
            // Query reference:
            KNNList trueknns = truekNNQuery.getKNNForObject(o, k);
            // Put adjusted knn size:
            mv.put(knns.size() * k / (double) trueknns.size());
            // Put recall:
            mvrec.put(DBIDUtil.intersectionSize(knns, trueknns) / (double) trueknns.size());
            if (knns.size() >= k) {
                double kdist = knns.getKNNDistance();
                final double tdist = trueknns.getKNNDistance();
                if (tdist > 0.0) {
                    mvdist.put(kdist);
                    mvdaerr.put(kdist - tdist);
                    mvdrerr.put(kdist / tdist);
                }
            } else {
                // Less than k objects.
                misses++;
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            LOG.statistics("Recall of true results: " + mvrec.getMean() + " +- " + mvrec.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean absolute k-error: " + mvdaerr.getMean() + " +- " + mvdaerr.getNaiveStddev());
                LOG.statistics("Mean relative k-error: " + mvdrerr.getMean() + " +- " + mvdrerr.getNaiveStddev());
            }
            if (misses > 0) {
                LOG.statistics(String.format("Number of queries that returned less than k=%d objects: %d (%.2f%%)", k, misses, misses * 100. / mv.getCount()));
            }
        }
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) LinearScanQuery(de.lmu.ifi.dbs.elki.database.query.LinearScanQuery) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)24 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)13 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)8 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)8 Test (org.junit.Test)8 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)6 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)6 Random (java.util.Random)4 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)3 TypeInformation (de.lmu.ifi.dbs.elki.data.type.TypeInformation)3 Database (de.lmu.ifi.dbs.elki.database.Database)3 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)3 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)3 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)3 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)2 StaticArrayDatabase (de.lmu.ifi.dbs.elki.database.StaticArrayDatabase)2 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)2 ArrayAdapterDatabaseConnection (de.lmu.ifi.dbs.elki.datasource.ArrayAdapterDatabaseConnection)2 DatabaseConnection (de.lmu.ifi.dbs.elki.datasource.DatabaseConnection)2 OnDiskUpperTriangleMatrix (de.lmu.ifi.dbs.elki.persistent.OnDiskUpperTriangleMatrix)2