Search in sources :

Example 6 with DoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.

the class AbstractIndexStructureTest method testExactEuclidean.

/**
 * Actual test routine.
 *
 * @param inputparams
 */
protected void testExactEuclidean(ListParameterization inputparams, Class<?> expectKNNQuery, Class<?> expectRangeQuery) {
    // Use a fixed DBID - historically, we used 1 indexed - to reduce random
    // variation in results due to different hash codes everywhere.
    inputparams.addParameter(AbstractDatabaseConnection.Parameterizer.FILTERS_ID, new FixedDBIDsFilter(1));
    Database db = AbstractSimpleAlgorithmTest.makeSimpleDatabase(dataset, shoulds, inputparams);
    Relation<DoubleVector> rep = db.getRelation(TypeUtil.DOUBLE_VECTOR_FIELD);
    DistanceQuery<DoubleVector> dist = db.getDistanceQuery(rep, EuclideanDistanceFunction.STATIC);
    if (expectKNNQuery != null) {
        // get the 10 next neighbors
        DoubleVector dv = DoubleVector.wrap(querypoint);
        KNNQuery<DoubleVector> knnq = db.getKNNQuery(dist, k);
        assertTrue("Returned knn query is not of expected class: expected " + expectKNNQuery + " got " + knnq.getClass(), expectKNNQuery.isAssignableFrom(knnq.getClass()));
        KNNList ids = knnq.getKNNForObject(dv, k);
        assertEquals("Result size does not match expectation!", shouldd.length, ids.size(), 1e-15);
        // verify that the neighbors match.
        int i = 0;
        for (DoubleDBIDListIter res = ids.iter(); res.valid(); res.advance(), i++) {
            // Verify distance
            assertEquals("Expected distance doesn't match.", shouldd[i], res.doubleValue(), 1e-6);
            // verify vector
            DoubleVector c = rep.get(res);
            DoubleVector c2 = DoubleVector.wrap(shouldc[i]);
            assertEquals("Expected vector doesn't match: " + c.toString(), 0.0, dist.distance(c, c2), 1e-15);
        }
    }
    if (expectRangeQuery != null) {
        // Do a range query
        DoubleVector dv = DoubleVector.wrap(querypoint);
        RangeQuery<DoubleVector> rangeq = db.getRangeQuery(dist, eps);
        assertTrue("Returned range query is not of expected class: expected " + expectRangeQuery + " got " + rangeq.getClass(), expectRangeQuery.isAssignableFrom(rangeq.getClass()));
        DoubleDBIDList ids = rangeq.getRangeForObject(dv, eps);
        assertEquals("Result size does not match expectation!", shouldd.length, ids.size(), 1e-15);
        // verify that the neighbors match.
        int i = 0;
        for (DoubleDBIDListIter res = ids.iter(); res.valid(); res.advance(), i++) {
            // Verify distance
            assertEquals("Expected distance doesn't match.", shouldd[i], res.doubleValue(), 1e-6);
            // verify vector
            DoubleVector c = rep.get(res);
            DoubleVector c2 = DoubleVector.wrap(shouldc[i]);
            assertEquals("Expected vector doesn't match: " + c.toString(), 0.0, dist.distance(c, c2), 1e-15);
        }
    }
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) FixedDBIDsFilter(de.lmu.ifi.dbs.elki.datasource.filter.FixedDBIDsFilter) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) Database(de.lmu.ifi.dbs.elki.database.Database) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector)

Example 7 with DoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.

the class Leader method run.

/**
 * Run the leader clustering algorithm.
 *
 * @param relation Data set
 * @return Clustering result
 */
public Clustering<PrototypeModel<O>> run(Relation<O> relation) {
    RangeQuery<O> rq = relation.getRangeQuery(getDistanceFunction(), threshold);
    ModifiableDBIDs seen = DBIDUtil.newHashSet(relation.size());
    Clustering<PrototypeModel<O>> clustering = new Clustering<>("Prototype clustering", "prototype-clustering");
    int queries = 0;
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Leader clustering", relation.size(), LOG) : null;
    for (DBIDIter it = relation.iterDBIDs(); it.valid() && seen.size() < relation.size(); it.advance()) {
        if (seen.contains(it)) {
            continue;
        }
        DoubleDBIDList res = rq.getRangeForDBID(it, threshold);
        ++queries;
        ModifiableDBIDs ids = DBIDUtil.newArray(res.size());
        for (DBIDIter cand = res.iter(); cand.valid(); cand.advance()) {
            if (seen.add(cand)) {
                LOG.incrementProcessed(prog);
                ids.add(cand);
            }
        }
        assert (ids.size() > 0 && ids.contains(it));
        PrototypeModel<O> mod = new SimplePrototypeModel<>(relation.get(it));
        clustering.addToplevelCluster(new Cluster<>(ids, mod));
    }
    LOG.statistics(new LongStatistic(this.getClass().getName() + ".queries", queries));
    LOG.ensureCompleted(prog);
    return clustering;
}
Also used : FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) SimplePrototypeModel(de.lmu.ifi.dbs.elki.data.model.SimplePrototypeModel) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) PrototypeModel(de.lmu.ifi.dbs.elki.data.model.PrototypeModel) SimplePrototypeModel(de.lmu.ifi.dbs.elki.data.model.SimplePrototypeModel)

Example 8 with DoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.

the class DBSCAN method expandCluster.

/**
 * DBSCAN-function expandCluster.
 *
 * Border-Objects become members of the first possible cluster.
 *
 * @param relation Database relation to run on
 * @param rangeQuery Range query to use
 * @param startObjectID potential seed of a new potential cluster
 * @param seeds Array to store the current seeds
 * @param objprog Number of objects processed (may be {@code null})
 * @param clusprog Number of clusters found (may be {@code null})
 */
protected void expandCluster(Relation<O> relation, RangeQuery<O> rangeQuery, DBIDRef startObjectID, ArrayModifiableDBIDs seeds, FiniteProgress objprog, IndefiniteProgress clusprog) {
    DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(startObjectID, epsilon);
    ncounter += neighbors.size();
    // startObject is no core-object
    if (neighbors.size() < minpts) {
        noise.add(startObjectID);
        processedIDs.add(startObjectID);
        if (objprog != null) {
            objprog.incrementProcessed(LOG);
        }
        return;
    }
    ModifiableDBIDs currentCluster = DBIDUtil.newArray();
    currentCluster.add(startObjectID);
    processedIDs.add(startObjectID);
    // try to expand the cluster
    assert (seeds.size() == 0);
    seeds.clear();
    processNeighbors(neighbors.iter(), currentCluster, seeds);
    DBIDVar o = DBIDUtil.newVar();
    while (!seeds.isEmpty()) {
        neighbors = rangeQuery.getRangeForDBID(seeds.pop(o), epsilon);
        ncounter += neighbors.size();
        if (neighbors.size() >= minpts) {
            processNeighbors(neighbors.iter(), currentCluster, seeds);
        }
        if (objprog != null) {
            objprog.incrementProcessed(LOG);
        }
    }
    resultList.add(currentCluster);
    if (clusprog != null) {
        clusprog.setProcessed(resultList.size(), LOG);
    }
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 9 with DoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.

the class KMeansMinusMinus method meansWithTreshhold.

/**
 * Returns the mean vectors of the given clusters in the given database.
 *
 * @param clusters the clusters to compute the means
 * @param means the recent means
 * @param database the database containing the vectors
 * @return the mean vectors of the given clusters in the given database
 */
protected double[][] meansWithTreshhold(List<? extends ModifiableDoubleDBIDList> clusters, double[][] means, Relation<V> database, Double tresh) {
    // TODO: use Kahan summation for better numerical precision?
    double[][] newMeans = new double[k][];
    for (int i = 0; i < k; i++) {
        DoubleDBIDList list = clusters.get(i);
        double[] raw = null;
        int count = 0;
        // Update with remaining instances
        for (DoubleDBIDListIter iter = list.iter(); iter.valid(); iter.advance()) {
            if (iter.doubleValue() >= tresh) {
                continue;
            }
            NumberVector vec = database.get(iter);
            if (raw == null) {
                // Initialize:
                raw = vec.toArray();
            }
            for (int j = 0; j < raw.length; j++) {
                raw[j] += vec.doubleValue(j);
            }
            count++;
        }
        newMeans[i] = (raw != null) ? VMath.timesEquals(raw, 1.0 / count) : means[i];
    }
    return newMeans;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)

Example 10 with DoubleDBIDList

use of de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList in project elki by elki-project.

the class OUTRES method outresScore.

/**
 * Main loop of OUTRES. Run for each object
 *
 * @param s start dimension
 * @param subspace Current subspace
 * @param id Current object ID
 * @param kernel Kernel
 * @return Score
 */
public double outresScore(final int s, long[] subspace, DBIDRef id, KernelDensityEstimator kernel) {
    // Initial score is 1.0
    double score = 1.0;
    final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace);
    MeanVariance meanv = new MeanVariance();
    for (int i = s; i < kernel.dim; i++) {
        if (BitsUtil.get(subspace, i)) {
            // with i=0?
            continue;
        }
        BitsUtil.setI(subspace, i);
        df.setSelectedDimensions(subspace);
        final double adjustedEps = kernel.adjustedEps(kernel.dim);
        // Query with a larger window, to also get neighbors of neighbors
        // Subspace euclidean is metric!
        final double range = adjustedEps * 2.;
        RangeQuery<V> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);
        DoubleDBIDList neighc = rq.getRangeForDBID(id, range);
        DoubleDBIDList neigh = refineRange(neighc, adjustedEps);
        if (neigh.size() > 2) {
            // Relevance test
            if (relevantSubspace(subspace, neigh, kernel)) {
                final double density = kernel.subspaceDensity(subspace, neigh);
                // Compute mean and standard deviation for densities of neighbors.
                meanv.reset();
                for (DoubleDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
                    DoubleDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel);
                    meanv.put(kernel.subspaceDensity(subspace, n2));
                }
                final double deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev());
                // High deviation:
                if (deviation >= 1) {
                    score *= (density / deviation);
                }
                // Recursion
                score *= outresScore(i + 1, subspace, id, kernel);
            }
        }
        BitsUtil.clearI(subspace, i);
    }
    return score;
}
Also used : MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) SubspaceEuclideanDistanceFunction(de.lmu.ifi.dbs.elki.distance.distancefunction.subspace.SubspaceEuclideanDistanceFunction) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)

Aggregations

DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)19 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)13 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)12 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)9 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)4 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)4 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)4 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)4 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)3 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)3 ModifiableDoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList)3 Duration (de.lmu.ifi.dbs.elki.logging.statistics.Duration)3 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)2 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)2 Database (de.lmu.ifi.dbs.elki.database.Database)2 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)2 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)2 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)2 FixedDBIDsFilter (de.lmu.ifi.dbs.elki.datasource.filter.FixedDBIDsFilter)2