Search in sources :

Example 6 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class KMeansElkan method initialAssignToNearestCluster.

/**
 * Reassign objects, but only if their bounds indicate it is necessary to do
 * so.
 *
 * @param relation Data
 * @param means Current means
 * @param sums New means
 * @param clusters Current clusters
 * @param assignment Cluster assignment
 * @param upper Upper bounds
 * @param lower Lower bounds
 * @return Number of changes (i.e. relation size)
 */
private int initialAssignToNearestCluster(Relation<V> relation, double[][] means, double[][] sums, List<ModifiableDBIDs> clusters, WritableIntegerDataStore assignment, WritableDoubleDataStore upper, WritableDataStore<double[]> lower) {
    assert (k == means.length);
    final boolean issquared = distanceFunction.isSquared();
    for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
        V fv = relation.get(it);
        double[] l = lower.get(it);
        // Check all (other) means:
        double best = Double.POSITIVE_INFINITY;
        int cur = -1;
        for (int j = 0; j < k; j++) {
            double dist = distanceFunction.distance(fv, DoubleVector.wrap(means[j]));
            dist = issquared ? FastMath.sqrt(dist) : dist;
            l[j] = dist;
            if (dist < best) {
                cur = j;
                best = dist;
            }
        }
        // Assign to nearest cluster.
        ModifiableDBIDs newc = clusters.get(cur);
        newc.add(it);
        assignment.putInt(it, cur);
        upper.putDouble(it, best);
        double[] newmean = sums[cur];
        for (int d = 0; d < fv.getDimensionality(); d++) {
            newmean[d] += fv.doubleValue(d);
        }
    }
    return relation.size();
}
Also used : ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 7 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class KMeansElkan method assignToNearestCluster.

/**
 * Reassign objects, but only if their bounds indicate it is necessary to do
 * so.
 *
 * @param relation Data
 * @param means Current means
 * @param sums New means
 * @param clusters Current clusters
 * @param assignment Cluster assignment
 * @param sep Separation of means
 * @param cdist Center-to-center distances
 * @param upper Upper bounds
 * @param lower Lower bounds
 * @return true when the object was reassigned
 */
private int assignToNearestCluster(Relation<V> relation, double[][] means, double[][] sums, List<ModifiableDBIDs> clusters, WritableIntegerDataStore assignment, double[] sep, double[][] cdist, WritableDoubleDataStore upper, WritableDataStore<double[]> lower) {
    assert (k == means.length);
    final boolean issquared = distanceFunction.isSquared();
    int changed = 0;
    for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
        final int orig = assignment.intValue(it);
        double u = upper.doubleValue(it);
        // Upper bound check (#2):
        if (u <= sep[orig]) {
            continue;
        }
        // Elkan's r(x)
        boolean recompute_u = true;
        V fv = relation.get(it);
        double[] l = lower.get(it);
        // Check all (other) means:
        int cur = orig;
        for (int j = 0; j < k; j++) {
            if (orig == j || u <= l[j] || u <= cdist[cur][j]) {
                // Condition #3 i-iii not satisfied
                continue;
            }
            if (recompute_u) {
                // Need to update bound? #3a
                u = distanceFunction.distance(fv, DoubleVector.wrap(means[cur]));
                u = issquared ? FastMath.sqrt(u) : u;
                upper.putDouble(it, u);
                // Once only
                recompute_u = false;
                if (u <= l[j] || u <= cdist[cur][j]) {
                    // #3b
                    continue;
                }
            }
            double dist = distanceFunction.distance(fv, DoubleVector.wrap(means[j]));
            dist = issquared ? FastMath.sqrt(dist) : dist;
            l[j] = dist;
            if (dist < u) {
                cur = j;
                u = dist;
            }
        }
        // Object is to be reassigned.
        if (cur != orig) {
            // Remember bound.
            upper.putDouble(it, u);
            ModifiableDBIDs newc = clusters.get(cur);
            newc.add(it);
            assignment.putInt(it, cur);
            double[] newmean = sums[cur];
            ModifiableDBIDs oldc = clusters.get(orig);
            oldc.remove(it);
            double[] oldmean = sums[orig];
            for (int d = 0; d < fv.getDimensionality(); d++) {
                final double v = fv.doubleValue(d);
                newmean[d] += v;
                oldmean[d] -= v;
            }
            ++changed;
        }
    }
    return changed;
}
Also used : ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 8 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class ChengAndChurch method biclustering.

@Override
public Clustering<BiclusterWithInversionsModel> biclustering() {
    double[][] mat = RelationUtil.relationAsMatrix(relation, rowIDs);
    BiclusterCandidate cand = new BiclusterCandidate(getRowDim(), getColDim());
    Clustering<BiclusterWithInversionsModel> result = new Clustering<>("Cheng-and-Church", "Cheng and Church Biclustering");
    ModifiableDBIDs noise = DBIDUtil.newHashSet(relation.getDBIDs());
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Extracting Cluster", n, LOG) : null;
    for (int i = 0; i < n; i++) {
        cand.reset();
        multipleNodeDeletion(mat, cand);
        if (LOG.isVeryVerbose()) {
            LOG.veryverbose("Residue after Alg 2: " + cand.residue + " " + cand.rowcard + "x" + cand.colcard);
        }
        singleNodeDeletion(mat, cand);
        if (LOG.isVeryVerbose()) {
            LOG.veryverbose("Residue after Alg 1: " + cand.residue + " " + cand.rowcard + "x" + cand.colcard);
        }
        nodeAddition(mat, cand);
        if (LOG.isVeryVerbose()) {
            LOG.veryverbose("Residue after Alg 3: " + cand.residue + " " + cand.rowcard + "x" + cand.colcard);
        }
        cand.maskMatrix(mat, dist);
        BiclusterWithInversionsModel model = new BiclusterWithInversionsModel(colsBitsetToIDs(cand.cols), rowsBitsetToIDs(cand.irow));
        final ArrayDBIDs cids = rowsBitsetToIDs(cand.rows);
        noise.removeDBIDs(cids);
        result.addToplevelCluster(new Cluster<>(cids, model));
        if (LOG.isVerbose()) {
            LOG.verbose("Score of bicluster " + (i + 1) + ": " + cand.residue + "\n");
            LOG.verbose("Number of rows: " + cand.rowcard + "\n");
            LOG.verbose("Number of columns: " + cand.colcard + "\n");
        // LOG.verbose("Total number of masked values: " + maskedVals.size() +
        // "\n");
        }
        LOG.incrementProcessed(prog);
    }
    // Add a noise cluster, full-dimensional.
    if (!noise.isEmpty()) {
        long[] allcols = BitsUtil.ones(getColDim());
        BiclusterWithInversionsModel model = new BiclusterWithInversionsModel(colsBitsetToIDs(allcols), DBIDUtil.EMPTYDBIDS);
        result.addToplevelCluster(new Cluster<>(noise, true, model));
    }
    LOG.ensureCompleted(prog);
    return result;
}
Also used : BiclusterWithInversionsModel(de.lmu.ifi.dbs.elki.data.model.BiclusterWithInversionsModel) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) ArrayDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) Clustering(de.lmu.ifi.dbs.elki.data.Clustering)

Example 9 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class MkAppTree method insertAll.

/**
 * Inserts the specified objects into this MkApp-Tree.
 *
 * @param entries the entries to be inserted
 */
@Override
public void insertAll(List<MkAppEntry> entries) {
    if (entries.isEmpty()) {
        return;
    }
    if (LOG.isDebugging()) {
        LOG.debugFine("insert " + entries + "\n");
    }
    if (!initialized) {
        initialize(entries.get(0));
    }
    ModifiableDBIDs ids = DBIDUtil.newArray(entries.size());
    // insert
    for (MkAppEntry entry : entries) {
        ids.add(entry.getRoutingObjectID());
        // insert the object
        super.insert(entry, false);
    }
    // do batch nn
    Map<DBID, KNNList> knnLists = batchNN(getRoot(), ids, settings.kmax + 1);
    // adjust the knn distances
    adjustApproximatedKNNDistances(getRootEntry(), knnLists);
    if (EXTRA_INTEGRITY_CHECKS) {
        getRoot().integrityCheck(this, getRootEntry());
    }
}
Also used : KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)

Example 10 with ModifiableDBIDs

use of de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs in project elki by elki-project.

the class MkCoPTree method reverseKNNQuery.

/**
 * Performs a reverse k-nearest neighbor query for the given object ID. The
 * query result is in ascending order to the distance to the query object.
 *
 * @param id the query object id
 * @param k the number of nearest neighbors to be returned
 * @return a List of the query results
 */
@Override
public DoubleDBIDList reverseKNNQuery(DBIDRef id, int k) {
    if (k > settings.kmax) {
        throw new IllegalArgumentException("Parameter k has to be less or equal than " + "parameter kmax of the MCop-Tree!");
    }
    ModifiableDoubleDBIDList result = DBIDUtil.newDistanceDBIDList();
    ModifiableDBIDs candidates = DBIDUtil.newArray();
    doReverseKNNQuery(k, id, result, candidates);
    // refinement of candidates
    Map<DBID, KNNList> knnLists = batchNN(getRoot(), candidates, k);
    result.sort();
    for (DBIDIter iter = candidates.iter(); iter.valid(); iter.advance()) {
        DBID cid = DBIDUtil.deref(iter);
        KNNList cands = knnLists.get(cid);
        for (DoubleDBIDListIter iter2 = cands.iter(); iter2.valid(); iter2.advance()) {
            if (DBIDUtil.equal(id, iter2)) {
                result.add(iter2.doubleValue(), cid);
                break;
            }
        }
    }
    result.sort();
    // rkNNStatistics.addResults(result.size());
    return result;
}
Also used : DoubleDBIDListIter(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter) ModifiableDoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.ModifiableDoubleDBIDList) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) DBID(de.lmu.ifi.dbs.elki.database.ids.DBID) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Aggregations

ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)80 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)44 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)30 ArrayList (java.util.ArrayList)30 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)28 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)18 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)15 WritableIntegerDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore)14 IndefiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress)14 DoubleStatistic (de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic)12 LongStatistic (de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic)12 Model (de.lmu.ifi.dbs.elki.data.model.Model)11 DBID (de.lmu.ifi.dbs.elki.database.ids.DBID)11 KMeansModel (de.lmu.ifi.dbs.elki.data.model.KMeansModel)10 StringStatistic (de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic)10 Cluster (de.lmu.ifi.dbs.elki.data.Cluster)9 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)9 HashSetModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs)8 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)8 ClusterModel (de.lmu.ifi.dbs.elki.data.model.ClusterModel)7