Search in sources :

Example 81 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class AbstractHDBSCAN method computeCoreDists.

/**
 * Compute the core distances for all objects.
 *
 * @param ids Objects
 * @param knnQ kNN query
 * @param minPts Minimum neighborhood size
 * @return Data store with core distances
 */
protected WritableDoubleDataStore computeCoreDists(DBIDs ids, KNNQuery<O> knnQ, int minPts) {
    final Logging LOG = getLogger();
    final WritableDoubleDataStore coredists = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_DB);
    FiniteProgress cprog = LOG.isVerbose() ? new FiniteProgress("Computing core sizes", ids.size(), LOG) : null;
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        coredists.put(iter, knnQ.getKNNForDBID(iter, minPts).getKNNDistance());
        LOG.incrementProcessed(cprog);
    }
    LOG.ensureCompleted(cprog);
    return coredists;
}
Also used : Logging(de.lmu.ifi.dbs.elki.logging.Logging) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 82 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class PROCLUS method greedy.

/**
 * Returns a piercing set of k medoids from the specified sample set.
 *
 * @param distFunc the distance function
 * @param sampleSet the sample set
 * @param m the number of medoids to be returned
 * @param random random number generator
 * @return a piercing set of m medoids from the specified sample set
 */
private ArrayDBIDs greedy(DistanceQuery<V> distFunc, DBIDs sampleSet, int m, Random random) {
    ArrayModifiableDBIDs medoids = DBIDUtil.newArray(m);
    ArrayModifiableDBIDs s = DBIDUtil.newArray(sampleSet);
    DBIDArrayIter iter = s.iter();
    DBIDVar m_i = DBIDUtil.newVar();
    int size = s.size();
    // Move a random element to the end, then pop()
    s.swap(random.nextInt(size), --size);
    medoids.add(s.pop(m_i));
    if (LOG.isDebugging()) {
        LOG.debugFiner("medoids " + medoids.toString());
    }
    // To track the current worst element:
    int worst = -1;
    double worstd = Double.NEGATIVE_INFINITY;
    // compute distances between each point in S and m_i
    WritableDoubleDataStore distances = DataStoreUtil.makeDoubleStorage(s, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    for (iter.seek(0); iter.getOffset() < size; iter.advance()) {
        final double dist = distFunc.distance(iter, m_i);
        distances.putDouble(iter, dist);
        if (dist > worstd) {
            worstd = dist;
            worst = iter.getOffset();
        }
    }
    for (int i = 1; i < m; i++) {
        // choose medoid m_i to be far from previous medoids
        s.swap(worst, --size);
        medoids.add(s.pop(m_i));
        // compute distances of each point to closest medoid; track worst.
        worst = -1;
        worstd = Double.NEGATIVE_INFINITY;
        for (iter.seek(0); iter.getOffset() < size; iter.advance()) {
            double dist_new = distFunc.distance(iter, m_i);
            double dist_old = distances.doubleValue(iter);
            double dist = (dist_new < dist_old) ? dist_new : dist_old;
            distances.putDouble(iter, dist);
            if (dist > worstd) {
                worstd = dist;
                worst = iter.getOffset();
            }
        }
        if (LOG.isDebugging()) {
            LOG.debugFiner("medoids " + medoids.toString());
        }
    }
    return medoids;
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDArrayIter(de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)

Example 83 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class KMeansHamerly method run.

@Override
public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
        return new Clustering<>("k-Means Clustering", "kmeans-clustering");
    }
    // Choose initial means
    if (LOG.isStatistics()) {
        LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
        clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment = DataStoreUtil.makeIntegerStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    // Hamerly bounds
    WritableDoubleDataStore upper = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, Double.POSITIVE_INFINITY);
    WritableDoubleDataStore lower = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, 0.);
    // Storage for updated means:
    final int dim = means[0].length;
    double[][] sums = new double[k][dim];
    // Separation of means / distance moved.
    double[] sep = new double[k];
    IndefiniteProgress prog = LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
    LongStatistic rstat = LOG.isStatistics() ? new LongStatistic(KEY + ".reassignments") : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
        LOG.incrementProcessed(prog);
        int changed;
        if (iteration == 0) {
            changed = initialAssignToNearestCluster(relation, means, sums, clusters, assignment, upper, lower);
        } else {
            recomputeSeperation(means, sep);
            changed = assignToNearestCluster(relation, means, sums, clusters, assignment, sep, upper, lower);
        }
        if (rstat != null) {
            rstat.setLong(changed);
            LOG.statistics(rstat);
        }
        // Stop if no cluster assignment changed.
        if (changed == 0) {
            break;
        }
        // Recompute means.
        for (int i = 0; i < k; i++) {
            final int s = clusters.get(i).size();
            timesEquals(sums[i], s > 0 ? 1. / s : 1.);
        }
        double delta = maxMoved(means, sums, sep);
        updateBounds(relation, assignment, upper, lower, sep, delta);
        for (int i = 0; i < k; i++) {
            final int s = clusters.get(i).size();
            System.arraycopy(sums[i], 0, means[i], 0, dim);
            // Restore to sum for next iteration
            timesEquals(sums[i], s > 0 ? s : 1.);
        }
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }
    upper.destroy();
    lower.destroy();
    // Wrap result
    double totalvariance = 0.;
    Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
    for (int i = 0; i < clusters.size(); i++) {
        DBIDs ids = clusters.get(i);
        if (ids.size() == 0) {
            continue;
        }
        double[] mean = means[i];
        double varsum = 0.;
        if (varstat) {
            DoubleVector mvec = DoubleVector.wrap(mean);
            for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
                varsum += distanceFunction.distance(mvec, relation.get(it));
            }
            totalvariance += varsum;
        }
        KMeansModel model = new KMeansModel(mean, varsum);
        result.addToplevelCluster(new Cluster<>(ids, model));
    }
    if (LOG.isStatistics() && varstat) {
        LOG.statistics(new DoubleStatistic(this.getClass().getName() + ".variance-sum", totalvariance));
    }
    return result;
}
Also used : WritableIntegerDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableIntegerDataStore) KMeansModel(de.lmu.ifi.dbs.elki.data.model.KMeansModel) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) ArrayList(java.util.ArrayList) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) DoubleStatistic(de.lmu.ifi.dbs.elki.logging.statistics.DoubleStatistic) StringStatistic(de.lmu.ifi.dbs.elki.logging.statistics.StringStatistic) IndefiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.IndefiniteProgress) LongStatistic(de.lmu.ifi.dbs.elki.logging.statistics.LongStatistic) ModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector)

Example 84 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class FarthestPointsInitialMeans method chooseInitialMeans.

@Override
public <T extends NumberVector> double[][] chooseInitialMeans(Database database, Relation<T> relation, int k, NumberVectorDistanceFunction<? super T> distanceFunction) {
    // Get a distance query
    DistanceQuery<T> distQ = database.getDistanceQuery(relation, distanceFunction);
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore store = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP, Double.POSITIVE_INFINITY);
    // Chose first mean
    double[][] means = new double[k][];
    DBIDRef first = DBIDUtil.randomSample(ids, rnd);
    T prevmean = relation.get(first);
    means[0] = prevmean.toArray();
    // Find farthest object each.
    DBIDVar best = DBIDUtil.newVar(first);
    for (int i = (dropfirst ? 0 : 1); i < k; i++) {
        double maxdist = Double.NEGATIVE_INFINITY;
        for (DBIDIter it = ids.iter(); it.valid(); it.advance()) {
            final double prev = store.doubleValue(it);
            if (prev != prev) {
                // NaN: already chosen!
                continue;
            }
            double val = Math.min(prev, distQ.distance(prevmean, it));
            // Don't store distance to first mean, when it will be dropped below.
            if (i > 0) {
                store.putDouble(it, val);
            }
            if (val > maxdist) {
                maxdist = val;
                best.set(it);
            }
        }
        // Add new mean (and drop the initial mean when desired)
        // So it won't be chosen twice.
        store.putDouble(best, Double.NaN);
        prevmean = relation.get(best);
        means[i] = prevmean.toArray();
    }
    // Explicitly destroy temporary data.
    store.destroy();
    return means;
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) DBIDRef(de.lmu.ifi.dbs.elki.database.ids.DBIDRef) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter)

Example 85 with WritableDoubleDataStore

use of de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore in project elki by elki-project.

the class PAMInitialMeans method chooseInitialMedoids.

@Override
public DBIDs chooseInitialMedoids(int k, DBIDs ids, DistanceQuery<? super O> distQ) {
    ArrayModifiableDBIDs medids = DBIDUtil.newArray(k);
    DBIDVar bestid = DBIDUtil.newVar();
    // We need three temporary storage arrays:
    WritableDoubleDataStore mindist, bestd, tempd;
    mindist = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    bestd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    tempd = DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_HOT | DataStoreFactory.HINT_TEMP);
    // First mean is chosen by having the smallest distance sum to all others.
    {
        double best = Double.POSITIVE_INFINITY;
        FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Choosing initial mean", ids.size(), LOG) : null;
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            double sum = 0, d;
            for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
                sum += d = distQ.distance(iter, iter2);
                tempd.putDouble(iter2, d);
            }
            if (sum < best) {
                best = sum;
                bestid.set(iter);
                // Swap mindist and newd:
                WritableDoubleDataStore temp = mindist;
                mindist = tempd;
                tempd = temp;
            }
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        medids.add(bestid);
    }
    assert (mindist != null);
    // Subsequent means optimize the full criterion.
    FiniteProgress prog = LOG.isVerbose() ? new FiniteProgress("Choosing initial centers", k, LOG) : null;
    // First one was just chosen.
    LOG.incrementProcessed(prog);
    for (int i = 1; i < k; i++) {
        double best = Double.POSITIVE_INFINITY;
        bestid.unset();
        for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
            if (medids.contains(iter)) {
                continue;
            }
            double sum = 0., v;
            for (DBIDIter iter2 = ids.iter(); iter2.valid(); iter2.advance()) {
                sum += v = MathUtil.min(distQ.distance(iter, iter2), mindist.doubleValue(iter2));
                tempd.put(iter2, v);
            }
            if (sum < best) {
                best = sum;
                bestid.set(iter);
                // Swap bestd and newd:
                WritableDoubleDataStore temp = bestd;
                bestd = tempd;
                tempd = temp;
            }
        }
        if (!bestid.isSet()) {
            throw new AbortException("No median found that improves the criterion function?!? Too many infinite distances.");
        }
        medids.add(bestid);
        // Swap bestd and mindist:
        WritableDoubleDataStore temp = bestd;
        bestd = mindist;
        mindist = temp;
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    mindist.destroy();
    bestd.destroy();
    tempd.destroy();
    return medids;
}
Also used : DBIDVar(de.lmu.ifi.dbs.elki.database.ids.DBIDVar) ArrayModifiableDBIDs(de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs) WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)90 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)70 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)70 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)70 OutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta)70 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)68 DoubleMinMax (de.lmu.ifi.dbs.elki.math.DoubleMinMax)61 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)43 BasicOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.BasicOutlierScoreMeta)35 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)33 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)20 InvertedOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.InvertedOutlierScoreMeta)13 ArrayModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayModifiableDBIDs)12 DoubleDBIDListIter (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDListIter)12 StepProgress (de.lmu.ifi.dbs.elki.logging.progress.StepProgress)12 ProbabilisticOutlierScore (de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore)12 QuotientOutlierScoreMeta (de.lmu.ifi.dbs.elki.result.outlier.QuotientOutlierScoreMeta)12 ArrayList (java.util.ArrayList)11 ArrayDBIDs (de.lmu.ifi.dbs.elki.database.ids.ArrayDBIDs)10 NeighborSetPredicate (de.lmu.ifi.dbs.elki.algorithm.outlier.spatial.neighborhood.NeighborSetPredicate)9